├── deeppavlov_agent
├── __init__.py
├── tests
│ ├── __init__.py
│ └── workflow_manager_test.py
├── core
│ ├── transport
│ │ ├── __init__.py
│ │ ├── gateways
│ │ │ ├── __init__.py
│ │ │ └── rabbitmq.py
│ │ ├── mapping.py
│ │ ├── settings.py
│ │ ├── messages.py
│ │ └── base.py
│ ├── __init__.py
│ ├── db.py
│ ├── telegram_client.py
│ ├── service.py
│ ├── pipeline.py
│ ├── log.py
│ ├── workflow_manager.py
│ ├── agent.py
│ ├── connectors.py
│ └── state_manager.py
├── http_api
│ ├── templates
│ │ ├── __init__.py
│ │ ├── base.html
│ │ ├── services_ws_highcharts.html
│ │ └── chat.html
│ ├── __init__.py
│ ├── api.py
│ └── handlers.py
├── log_config.yml
├── run_tg.py
├── state_formatters
│ ├── output_formatters.py
│ ├── __init__.py
│ └── dp_formatters.py
├── run_http.py
├── run.py
├── cmd_client.py
├── utils
│ ├── http_api_stress_test.py
│ └── http_api_test.py
├── settings.py
├── setup_agent.py
└── parse_config.py
├── dev_requirements.txt
├── docs
├── source
│ ├── user_commands
│ │ └── commands.rst
│ ├── _static
│ │ ├── Agent_Pipeline.png
│ │ ├── Agent_Pipeline_v2.png
│ │ ├── api.html
│ │ └── apispec
│ │ │ ├── agent_v0.12.0.yml
│ │ │ └── agent_v0.12.1.yml
│ ├── index.rst
│ ├── api
│ │ ├── user_state_api.rst
│ │ └── services_http_api.rst
│ ├── state_formatters
│ │ └── formatters.rst
│ ├── built_in
│ │ ├── state_manager_method.rst
│ │ └── connectors.rst
│ ├── intro
│ │ └── overview.rst
│ ├── conf.py
│ └── config
│ │ └── config.rst
└── Makefile
├── .flake8
├── MANIFEST.in
├── README.md
├── requirements.txt
├── dockerfile_agent
├── setup.py
├── .gitignore
└── LICENSE
/deeppavlov_agent/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deeppavlov_agent/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/templates/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/gateways/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==2.1.2
2 | recommonmark==0.5.0
3 | sphinx_rtd_theme
4 | Pygments==2.4.2
5 | flake8
--------------------------------------------------------------------------------
/deeppavlov_agent/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .log import init_logger
2 |
3 | STATE_API_VERSION = "0.13.0"
4 |
5 | init_logger()
6 |
--------------------------------------------------------------------------------
/docs/source/user_commands/commands.rst:
--------------------------------------------------------------------------------
1 | /start
2 | ======
3 |
4 | To start a new dialog send **"/start"** utterance to the bot.
--------------------------------------------------------------------------------
/docs/source/_static/Agent_Pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp-agent/HEAD/docs/source/_static/Agent_Pipeline.png
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length=120
3 | ignore=D100,D101,D102,D103,D107,F403,F405
4 | exclude=.git,__pycache__,build,dist,env
5 |
--------------------------------------------------------------------------------
/docs/source/_static/Agent_Pipeline_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp-agent/HEAD/docs/source/_static/Agent_Pipeline_v2.png
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include deeppavlov_agent/http_api/templates/*.html
3 | include deeppavlov_agent/log_config.yml
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepPavlov Agent
2 |
3 | **DeepPavlov Agent** is a platform for creating multi-skill chatbots.
4 |
5 | Please refer to our [readthedocs documentation](https://deeppavlov-agent.readthedocs.io).
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aioconsole==0.1.15
2 | aiogram==2.3
3 | aiohttp==3.6.1
4 | aiohttp-socks==0.2.2
5 | aiohttp-jinja2==1.2.0
6 | aiohttp-swagger==1.0.9
7 | pyyaml==5.1
8 | aio-pika==5.6.0
9 | motor==2.0.0
10 | tqdm==4.36.1
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% block head %}
5 | {% block title %}{% endblock %} - DeepPavlov Agent
6 | {% endblock %}
7 |
8 |
9 | {% block content %}
10 | {% endblock %}
11 |
15 |
16 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/mapping.py:
--------------------------------------------------------------------------------
1 | from .gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway
2 | from ..connectors import ServiceGatewayHTTPConnector
3 |
4 | GATEWAYS_MAP = {
5 | 'AMQP': {
6 | 'agent': RabbitMQAgentGateway,
7 | 'service': RabbitMQServiceGateway,
8 | 'channel': RabbitMQChannelGateway
9 | }
10 | }
11 |
12 | CONNECTORS_MAP = {
13 | 'AMQP': ServiceGatewayHTTPConnector
14 | }
15 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/settings.py:
--------------------------------------------------------------------------------
1 | TRANSPORT_SETTINGS = {
2 | 'agent_namespace': 'deeppavlov_agent',
3 | 'agent_name': 'dp_agent',
4 | 'utterance_lifetime_sec': 120,
5 | 'channels': {},
6 | 'transport': {
7 | 'type': 'AMQP',
8 | 'AMQP': {
9 | 'host': '127.0.0.1',
10 | 'port': 5672,
11 | 'login': 'guest',
12 | 'password': 'guest',
13 | 'virtualhost': '/'
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/dockerfile_agent:
--------------------------------------------------------------------------------
1 | FROM python:3.7-slim-stretch
2 |
3 | EXPOSE 4242
4 |
5 | ENV DEBIAN_FRONTEND noninteractive
6 | ENV PYTHONPATH "${PYTHONPATH}:/dp-agent"
7 | ENV DPA_LAUNCHING_ENV "docker"
8 |
9 | COPY requirements.txt /
10 |
11 | RUN apt-get update -y --fix-missing && \
12 | apt-get install -y -q \
13 | build-essential \
14 | openssl \
15 | git \
16 | libssl-dev \
17 | libffi-dev && \
18 | echo "stty iutf8" >> ~/.bashrc && \
19 | pip install -r requirements.txt && \
20 | rm -rf /var/lib/apt/lists/*
21 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/db.py:
--------------------------------------------------------------------------------
1 | import motor.motor_asyncio
2 |
3 |
4 | class DataBase:
5 | _inst = None
6 |
7 | def __new__(cls, *args, **kwargs):
8 | if not cls._inst:
9 | cls._inst = super(DataBase, cls).__new__(cls)
10 | return cls._inst
11 |
12 | def __init__(self, host, port, name):
13 | if isinstance(port, str):
14 | port = int(port)
15 | self.client = motor.motor_asyncio.AsyncIOMotorClient(host, port)
16 | self.db = self.client[name]
17 |
18 | def get_db(self):
19 | return self.db
20 |
--------------------------------------------------------------------------------
/docs/source/_static/api.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | DeepPavlov Skills API
5 |
6 |
7 |
8 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/deeppavlov_agent/log_config.yml:
--------------------------------------------------------------------------------
1 | version: 1
2 | disable_existing_loggers: false
3 | loggers:
4 | core:
5 | level: 'INFO'
6 | propagate: true
7 | handlers: ['stderr']
8 | formatters:
9 | default:
10 | format: "%(asctime)s.%(msecs)d %(levelname)s in '%(name)s'['%(module)s'] at line %(lineno)d: %(message)s"
11 | datefmt: "%Y-%m-%d %H:%M:%S"
12 | handlers:
13 | file:
14 | class: 'logging.FileHandler'
15 | level: 'DEBUG'
16 | formatter: 'default'
17 | filename: '~/dp_agent.log'
18 | stdout:
19 | class: 'logging.StreamHandler'
20 | level: 'DEBUG'
21 | formatter: 'default'
22 | stream: 'ext://sys.stdout'
23 | stderr:
24 | class: 'logging.StreamHandler'
25 | level: 'DEBUG'
26 | formatter: 'default'
27 | stream: 'ext://sys.stderr'
28 |
--------------------------------------------------------------------------------
/deeppavlov_agent/run_tg.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from .settings import TELEGRAM_TOKEN, TELEGRAM_PROXY
4 | from .core.telegram_client import run_tg
5 | from .setup_agent import setup_agent
6 |
7 |
8 | def run_telegram(pipeline_configs=None):
9 | agent, session, workers = setup_agent(pipeline_configs)
10 | try:
11 | run_tg(TELEGRAM_TOKEN, TELEGRAM_PROXY, agent)
12 | finally:
13 | session.close()
14 | for i in workers:
15 | i.cancel()
16 |
17 |
18 | if __name__ == '__main__':
19 | parser = argparse.ArgumentParser()
20 | parser.add_argument('-pl', '--pipeline_config', help='Pipeline config (overwrite value, defined in settings)',
21 | type=str, action='append')
22 | args = parser.parse_args()
23 |
24 | run_telegram(args.pipeline_config)
25 |
--------------------------------------------------------------------------------
/deeppavlov_agent/state_formatters/output_formatters.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 |
4 | def http_api_output_formatter(payload: Dict):
5 | return {
6 | 'dialog_id': payload['dialog_id'],
7 | 'utt_id': payload['utterances'][-1]['utt_id'],
8 | 'user_id': payload['human']['user_external_id'],
9 | 'response': payload['utterances'][-1]['text'],
10 | }
11 |
12 |
13 | def http_debug_output_formatter(payload: Dict):
14 | return {
15 | 'dialog_id': payload['dialog_id'],
16 | 'utt_id': payload['utterances'][-1]['utt_id'],
17 | 'user_id': payload['human']['user_external_id'],
18 | 'response': payload['utterances'][-1]['text'],
19 | 'active_skill': payload['utterances'][-1]['active_skill'],
20 | 'debug_output': payload['utterances'][-2]['hypotheses']
21 | }
22 |
--------------------------------------------------------------------------------
/deeppavlov_agent/state_formatters/__init__.py:
--------------------------------------------------------------------------------
1 | from .dp_formatters import *
2 | from .output_formatters import *
3 |
4 | all_formatters = {
5 | 'base_last_utterances_formatter_in': base_last_utterances_formatter_in,
6 | 'chitchat_formatter_in': chitchat_formatter_in,
7 | 'odqa_formatter_in': odqa_formatter_in,
8 | 'chitchat_example_formatter_in': chitchat_example_formatter_in,
9 | 'ner_formatter_out': ner_formatter_out,
10 | 'sentiment_formatter_out': sentiment_formatter_out,
11 | 'chitchat_odqa_formatter_out': chitchat_odqa_formatter_out,
12 | 'add_confidence_formatter_out': add_confidence_formatter_out,
13 | 'chitchat_example_formatter_out': chitchat_example_formatter_out,
14 | 'base_hypotheses_formatter_in': base_hypotheses_formatter_in,
15 | 'http_debug_output_formatter': http_debug_output_formatter,
16 | 'http_api_output_formatter': http_api_output_formatter,
17 | 'all_hypotheses_formatter_in': all_hypotheses_formatter_in
18 | }
19 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/__init__.py:
--------------------------------------------------------------------------------
1 | # This module implements http api, which allows to communicate with Agent
2 |
3 | from .api import init_app
4 |
5 | from ..settings import (
6 | TIME_LIMIT, OUTPUT_FORMATTER, DEBUG_OUTPUT_FORMATTER, DEBUG, RESPONSE_LOGGER, CORS
7 | )
8 |
9 | from ..setup_agent import setup_agent
10 | from ..core.log import LocalResponseLogger
11 |
12 |
13 | def app_factory(pipeline_configs=None, debug=None, response_time_limit=None, cors=None):
14 | agent, session, workers = setup_agent(pipeline_configs)
15 | response_logger = LocalResponseLogger(RESPONSE_LOGGER)
16 | if DEBUG:
17 | output_formatter = DEBUG_OUTPUT_FORMATTER
18 | else:
19 | output_formatter = OUTPUT_FORMATTER
20 |
21 | app = init_app(
22 | agent=agent, session=session, consumers=workers,
23 | logger_stats=response_logger, output_formatter=output_formatter,
24 | debug=debug or DEBUG, response_time_limit=response_time_limit or TIME_LIMIT,
25 | cors=CORS if cors is None else cors
26 | )
27 |
28 | return app
29 |
--------------------------------------------------------------------------------
/deeppavlov_agent/run_http.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from aiohttp import web
4 |
5 | from .http_api import app_factory
6 | from .settings import PORT
7 |
8 |
9 | def run_http(port, pipeline_configs=None, debug=None, time_limit=None, cors=None):
10 | app = app_factory(pipeline_configs=pipeline_configs, debug=debug, response_time_limit=time_limit, cors=cors)
11 | web.run_app(app, port=port)
12 |
13 |
14 | if __name__ == '__main__':
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('-p', '--port', help=f'port for http client, default {PORT}', type=int)
17 | parser.add_argument('-pl', '--pipeline_configs', help='Pipeline config (overwrite value, defined in settings)',
18 | type=str, action='append')
19 | parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true')
20 | parser.add_argument('-tl', '--time_limit', help='response time limit, 0 = no limit', type=int)
21 | args = parser.parse_args()
22 |
23 | port = args.port or PORT
24 | run_http(port, args.pipeline_configs, args.debug, args.time_limit)
25 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/telegram_client.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from aiogram import Bot
4 | from aiogram.dispatcher import Dispatcher
5 | from aiogram.utils import executor
6 |
7 |
8 | class TelegramMessageProcessor:
9 | def __init__(self, register_msg):
10 | self.register_msg = register_msg
11 |
12 | async def handle_message(self, message):
13 | response = await self.register_msg(
14 | utterance=message.text,
15 | user_external_id=str(message.from_user.id),
16 | user_device_type='telegram',
17 | date_time=message.date, location='', channel_type='telegram',
18 | require_response=True
19 | )
20 | await message.answer(response['dialog'].utterances[-1].text)
21 |
22 |
23 | def run_tg(token, proxy, agent):
24 | loop = asyncio.get_event_loop()
25 | bot = Bot(token=token, loop=loop, proxy=proxy)
26 | dp = Dispatcher(bot)
27 | tg_msg_processor = TelegramMessageProcessor(agent.register_msg)
28 |
29 | dp.message_handler()(tg_msg_processor.handle_message)
30 |
31 | executor.start_polling(dp, skip_updates=True)
32 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to DeepPavlov Agent documentation!
2 | ==========================================
3 |
4 | **DeepPavlov Agent** is a framework for development of scalable and production ready multi-skill virtual assistants, complex dialogue systems and chatbots.
5 |
6 | .. toctree::
7 | :glob:
8 | :maxdepth: 2
9 | :caption: Overview
10 |
11 | intro/overview
12 |
13 |
14 | .. toctree::
15 | :maxdepth: 2
16 | :caption: Services HTTP API
17 |
18 | api/services_http_api
19 |
20 |
21 | .. toctree::
22 | :maxdepth: 2
23 | :caption: User State API
24 |
25 | api/user_state_api
26 |
27 | .. toctree::
28 | :maxdepth: 2
29 | :caption: User Commands
30 |
31 | user_commands/commands
32 |
33 | .. toctree::
34 | :maxdepth: 2
35 | :caption: State Formatters
36 |
37 | state_formatters/formatters
38 |
39 | .. toctree::
40 | :maxdepth: 2
41 | :caption: Configuration files
42 |
43 | config/config
44 |
45 | .. toctree::
46 | :maxdepth: 2
47 | :caption: State manager
48 |
49 | built_in/state_manager_method
50 |
51 | .. toctree::
52 | :maxdepth: 2
53 | :caption: Connectors
54 |
55 | built_in/connectors
--------------------------------------------------------------------------------
/docs/source/api/user_state_api.rst:
--------------------------------------------------------------------------------
1 | User State API
2 | ==============
3 |
4 | Each utterance in a **Dialog state** is generated by some **User** either **Human** or **Bot**.
5 | The ``user.user_type`` field stores reference to source of the utterance:
6 |
7 | .. code:: javascript
8 |
9 | {"utterances": [{"user": {"user_type": "human"}}]}
10 |
11 | A skill can update any fields in **User** (**Human** or **Bot**) objects. If a **Skill** updates a **Human**,
12 | the **Human** fields will be changed in this utterance accordingly. If a **Skill** updates a **Bot**, the **Bot** fields will be
13 | changed in the *next* (generated by the bot) utterance.
14 |
15 | Each new dialog starts with a new **Bot** with all default fields. However, the **Human** object is updated permanently, and
16 | when a **Human** starts a new dialog, the object is retrieved from a database with all updated fields.
17 |
18 | The history of all changes made by skills to users can be looked up at the list of possible responses in the
19 | ``hypotheses`` field of a human utterance:
20 |
21 | .. code:: javascript
22 |
23 | {"utterances": [{"user": {"user_type": "human"}, "hypotheses": []}]}
24 |
--------------------------------------------------------------------------------
/deeppavlov_agent/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from .cmd_client import run_cmd
4 | from .run_http import run_http
5 | from .run_tg import run_telegram
6 |
7 |
8 | def main():
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('-pl', '--pipeline_configs', help='Pipeline config (overwrite value, defined in settings)',
11 | type=str, action='append')
12 | parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str,
13 | choices=['cmd_client', 'http_client', 'telegram'], default='cmd_client')
14 | parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242)
15 | parser.add_argument('-c', '--cors', help='whether to add CORS middleware to http_client',
16 | action='store_true', default=None)
17 | parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true')
18 | parser.add_argument('-tl', '--time_limit', help='response time limit, 0 = no limit', type=int, default=0)
19 | args = parser.parse_args()
20 |
21 | if args.channel == 'cmd_client':
22 | run_cmd(args.pipeline_configs, args.debug)
23 | elif args.channel == 'http_client':
24 | run_http(args.port, args.pipeline_configs, args.debug, args.time_limit, args.cors)
25 | elif args.channel == 'telegram':
26 | run_telegram(args.pipeline_configs)
27 |
28 |
29 | if __name__ == '__main__':
30 | main()
31 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | import os
4 |
5 |
6 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
7 |
8 |
9 | def read_requirements():
10 | """parses requirements from requirements.txt"""
11 | reqs_path = os.path.join(__location__, 'requirements.txt')
12 | with open(reqs_path, encoding='utf8') as f:
13 | reqs = [line.strip() for line in f if not line.strip().startswith('#')]
14 |
15 | names = []
16 | links = []
17 | for req in reqs:
18 | if '://' in req:
19 | links.append(req)
20 | else:
21 | names.append(req)
22 | return {'install_requires': names, 'dependency_links': links}
23 |
24 |
25 | setuptools.setup(
26 | name='deeppavlov_agent',
27 | version='2.2.0',
28 | include_package_data=True,
29 | description='An open source library, allowing you to create data processing systems based on a sequence graph, '
30 | 'alongside with saving sample processing results in database.',
31 | long_description='An open source library, allowing you to create data processing systems based on a sequence '
32 | 'graph, alongside with saving sample processing results in database. '
33 | 'Possible application is chatbots or other NLP systems which combine multiple skills.',
34 | keywords=['chatbots', 'microservices', 'dialog systems', 'NLP'],
35 | packages=setuptools.find_packages(exclude=('docs',)),
36 | python_requires='>=3.7',
37 | url="https://github.com/deepmipt/dp-agent",
38 | **read_requirements()
39 | )
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | #IDEA
107 | .idea/
108 |
109 | #GIT
110 | .git/
111 |
112 | #vscode
113 | .vscode/
--------------------------------------------------------------------------------
/deeppavlov_agent/cmd_client.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import asyncio
3 |
4 | from aioconsole import ainput
5 |
6 | from .setup_agent import setup_agent
7 |
8 |
9 | async def message_processor(register_msg):
10 | user_id = await ainput('Provide user id: ')
11 | while True:
12 | msg = await ainput(f'You ({user_id}): ')
13 | msg = msg.strip()
14 | if msg:
15 | response = await register_msg(utterance=msg, user_external_id=user_id, user_device_type='cmd',
16 | location='lab', channel_type='cmd_client',
17 | deadline_timestamp=None, require_response=True)
18 | print('Bot: ', response['dialog'].utterances[-1].text)
19 |
20 |
21 | def run_cmd(pipeline_configs, debug):
22 | agent, session, workers = setup_agent(pipeline_configs=pipeline_configs)
23 | loop = asyncio.get_event_loop()
24 | loop.set_debug(debug)
25 | future = asyncio.ensure_future(message_processor(agent.register_msg))
26 | for i in workers:
27 | loop.create_task(i.call_service(agent.process))
28 | try:
29 | loop.run_until_complete(future)
30 | except KeyboardInterrupt:
31 | pass
32 | except Exception as e:
33 | raise e
34 | finally:
35 | future.cancel()
36 | if session:
37 | loop.run_until_complete(session.close())
38 | loop.stop()
39 | loop.close()
40 |
41 |
42 | if __name__ == '__main__':
43 | parser = argparse.ArgumentParser()
44 | parser.add_argument('-pl', '--pipeline_configs', help='Pipeline config (overwrite value, defined in settings)',
45 | type=str, action='append')
46 | parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true')
47 | args = parser.parse_args()
48 |
49 | run_cmd(args.pipeline_configs, args.debug)
50 |
--------------------------------------------------------------------------------
/docs/source/state_formatters/formatters.rst:
--------------------------------------------------------------------------------
1 | **Formatters** are the functions that allow converting the input and output API of services into Agent's API.
2 |
3 | Defining the formatters
4 | =======================
5 |
6 | There are two main formatter types: which extracts data from dict representation of dialogs and formats it to
7 | service accessible form (dialog formatter), and which extracts data from service response and formats it prior
8 | adding to state (response formatter, this is optional one)
9 |
10 | **Dialog formatters**
11 |
12 | This functions should accept a single parameter: dialog (in dict form), and return a list of tasks for service processing.
13 | Each task should be in a format, which is correct for associated service.
14 | From a dict form of a dialog you can extract data on:
15 |
16 | * Human - ``dialog['human']``
17 | * Bot - ``dialog['bot']``
18 | * List of all utterances - ``dialog['utterances']``
19 | * List of only human utterances - ``dialog['human_utterances']``
20 | * List of only bot utterances - ``dialog['bot_utterances']``
21 |
22 | Each utterance (both bot and human) have some amount of same parameters:
23 |
24 | * Text - ``utterance['text']``
25 | * Annotations - ``utterance['annotations']``
26 | * User (human or bot, depending on type of utterance) - ``utterance['user']``
27 |
28 | Human utterance have an additional parameters:
29 |
30 | * List of hypotheses - ``utterance['hypotheses']``
31 | * Additional attributes - ``utterance['attributes']``
32 |
33 | Bot utterance also have additional attributes:
34 |
35 | * Active skill name (skill, which provided actual response) - ``utterance['active_skill']``
36 | * Response confidence - ``utterance['confidence']``
37 | * Original response text (not modified by postprocessors) - ``utterance['orig_text']``
38 |
39 | **Response formatters**
40 |
41 | This functions should accept one sample of skill response, and re-format it, making further processing available.
42 | This formatters are optional.
43 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/service.py:
--------------------------------------------------------------------------------
1 | class Service:
2 | def __init__(self, name, connector_func, state_processor_method=None,
3 | batch_size=1, tags=None, names_previous_services=None,
4 | names_required_previous_services=None,
5 | workflow_formatter=None, dialog_formatter=None, response_formatter=None,
6 | label=None):
7 | self.name = name
8 | self.batch_size = batch_size
9 | self.state_processor_method = state_processor_method
10 | self.names_previous_services = names_previous_services or set()
11 | self.names_required_previous_services = names_required_previous_services or set()
12 | self.tags = set(tags or [])
13 | self.workflow_formatter = workflow_formatter
14 | self.dialog_formatter = dialog_formatter
15 | self.response_formatter = response_formatter
16 | self.connector_func = connector_func
17 | self.previous_services = set()
18 | self.required_previous_services = set()
19 | self.dependent_services = set()
20 | self.next_services = set()
21 | self.label = label or self.name
22 |
23 | def is_sselector(self):
24 | return 'selector' in self.tags
25 |
26 | def is_responder(self):
27 | return 'responder' in self.tags
28 |
29 | def is_input(self):
30 | return 'input' in self.tags
31 |
32 | def is_last_chance(self):
33 | return 'last_chance' in self.tags
34 |
35 | def is_timeout(self):
36 | return 'timeout' in self.tags
37 |
38 | def apply_workflow_formatter(self, payload):
39 | if not self.workflow_formatter:
40 | return payload
41 | return self.workflow_formatter(payload)
42 |
43 | def apply_dialog_formatter(self, payload):
44 | if not self.dialog_formatter:
45 | return [self.apply_workflow_formatter(payload)]
46 | return self.dialog_formatter(self.apply_workflow_formatter(payload))
47 |
48 | def apply_response_formatter(self, payload):
49 | if not self.response_formatter:
50 | return payload
51 | return self.response_formatter(payload)
52 |
53 |
54 | def simple_workflow_formatter(workflow_record):
55 | return workflow_record['dialog'].to_dict()
56 |
--------------------------------------------------------------------------------
/deeppavlov_agent/utils/http_api_stress_test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import asyncio
3 | import uuid
4 | from statistics import mean, median
5 | from time import time
6 |
7 | import aiohttp
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('-u', '--url', type=str)
11 | parser.add_argument('-pf', '--phrasesfile', help='name of the file with phrases for dialog', type=str, default="")
12 | parser.add_argument('-of', '--outputfile', help='name of the output file', type=str, default='output.csv')
13 | parser.add_argument('-mnu', '--minusers', type=int, default=1)
14 | parser.add_argument('-mxu', '--maxusers', type=int, default=10)
15 |
16 | args = parser.parse_args()
17 |
18 | try:
19 | with open(args.phrasesfile, 'r') as file:
20 | payloads = [line.rstrip('\n') for line in file]
21 | except Exception as e:
22 | raise e
23 |
24 |
25 | async def perform_test_dialogue(session, url, uuid, payloads):
26 | times = []
27 | for i in payloads:
28 | request_body = {'user_id': uuid, 'payload': i}
29 | start_time = time()
30 | async with session.post(url, json=request_body) as resp:
31 | response = await resp.json()
32 | end_time = time()
33 | if response['user_id'] != uuid:
34 | print('INFO, request returned wrong uuid')
35 |
36 | times.append(end_time - start_time)
37 |
38 | return times
39 |
40 |
41 | async def run_users(url, payload, mnu, mxu):
42 | payload_len = len(payload)
43 | async with aiohttp.ClientSession() as session:
44 | for i in range(mnu, mxu + 1):
45 | tasks = []
46 | for _ in range(0, i):
47 | user_id = uuid.uuid4().hex
48 | tasks.append(asyncio.ensure_future(perform_test_dialogue(session, url, user_id, payload)))
49 | test_start_time = time()
50 | responses = await asyncio.gather(*tasks)
51 | test_time = time() - test_start_time
52 | times = []
53 | for resp in responses:
54 | times.extend(resp)
55 |
56 | print(f'test No {i} finished: {max(times)} {min(times)} {mean(times)} {median(times)} '
57 | f'total_time {test_time} msgs {i*payload_len} mean_rps {(i*payload_len)/test_time}')
58 |
59 |
60 | if __name__ == '__main__':
61 | loop = asyncio.get_event_loop()
62 | future = asyncio.ensure_future(run_users(args.url, payloads, args.minusers, args.maxusers))
63 | loop.run_until_complete(future)
64 |
--------------------------------------------------------------------------------
/deeppavlov_agent/state_formatters/dp_formatters.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 |
3 |
4 | def base_last_utterances_formatter_in(dialog: Dict, model_args_names=('x',)):
5 | return [{model_args_names[0]: [dialog['utterances'][-1]['text']]}]
6 |
7 |
8 | def base_hypotheses_formatter_in(dialog: Dict, model_args_names=('x',)):
9 | return [{model_args_names[0]: [i['text']]} for i in dialog['utterances'][-1]['hypotheses']]
10 |
11 |
12 | def all_hypotheses_formatter_in(dialog: Dict):
13 | return[{'hypotheses': dialog['utterances'][-1]['hypotheses']}]
14 |
15 |
16 | def chitchat_formatter_in(dialog: Dict, model_args_names=('q',)):
17 | return [{model_args_names[0]: [dialog['utterances'][-1]['text']]}]
18 |
19 |
20 | def odqa_formatter_in(dialog: Dict, model_args_names=('question_raw',)):
21 | return [{model_args_names[0]: [dialog['utterances'][-1]['text']]}]
22 |
23 |
24 | def chitchat_example_formatter_in(dialog: Dict,
25 | model_args_names=("utterances", 'annotations', 'u_histories', 'dialogs')):
26 | return {
27 | model_args_names[0]: [dialog['utterances'][-1]['text']],
28 | model_args_names[1]: [dialog['utterances'][-1]['annotations']],
29 | model_args_names[2]: [[utt['text'] for utt in dialog['utterances']]],
30 | model_args_names[3]: [dialog]
31 | }
32 |
33 |
34 | def ner_formatter_out(payload: List):
35 | if len(payload) == 2:
36 | return {'tokens': payload[0],
37 | 'tags': payload[1]}
38 | else:
39 | raise ValueError("Payload doesn't contain all required fields")
40 |
41 |
42 | def sentiment_formatter_out(payload: List):
43 | return payload
44 |
45 |
46 | def chitchat_odqa_formatter_out(payload: List):
47 | if payload:
48 | class_name = payload[0]
49 | if class_name in ['speech', 'negative']:
50 | response = ['chitchat']
51 | else:
52 | response = ['odqa']
53 | return response
54 | else:
55 | raise ValueError('Empty payload provided')
56 |
57 |
58 | def add_confidence_formatter_out(payload: List, confidence=0.5):
59 | if payload:
60 | return [{"text": payload[0], "confidence": 0.5}]
61 | else:
62 | raise ValueError('Empty payload provided')
63 |
64 |
65 | def chitchat_example_formatter_out(payload: List):
66 | if len(payload) == 3:
67 | return [{"text": payload[0],
68 | "confidence": payload[1],
69 | "name": payload[2]}]
70 | else:
71 | raise ValueError("Payload doesn't contain all required fields")
72 |
--------------------------------------------------------------------------------
/deeppavlov_agent/settings.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from importlib import import_module
3 |
4 | from .core.db import DataBase
5 | from .core.state_manager import StateManager
6 | from .core.workflow_manager import WorkflowManager
7 | from .state_formatters.output_formatters import (http_api_output_formatter,
8 | http_debug_output_formatter)
9 |
10 | # Default parameters
11 | BASE_PARAMETERS = {
12 | 'debug': True,
13 | 'state_manager_class': StateManager,
14 | 'workflow_manager_class': WorkflowManager,
15 | 'db_class': DataBase,
16 | 'pipeline_config': 'pipeline_conf.json',
17 | 'db_config': 'db_conf.json',
18 | 'overwrite_last_chance': None,
19 | 'overwrite_timeout': None,
20 | 'formatters_module': None,
21 | 'connectors_module': None,
22 | 'response_logger': True,
23 | 'time_limit': 0,
24 | 'output_formatter': http_api_output_formatter,
25 | 'debug_output_formatter': http_debug_output_formatter,
26 | 'port': 4242,
27 | 'cors': False,
28 | 'telegram_token': '',
29 | 'telegram_proxy': ''
30 | }
31 |
32 |
33 | # Replasing constants with ones from user settings
34 | def setup_parameter(name, user_settings):
35 | res = None
36 | if user_settings:
37 | res = getattr(user_settings, name, None)
38 | if res is None:
39 | res = BASE_PARAMETERS[name]
40 | return res
41 |
42 |
43 | user_settings = None
44 | try:
45 | user_settings = import_module('dp_agent_settings')
46 | except ModuleNotFoundError:
47 | logging.info('settings.py was not found. Default settings are used')
48 |
49 | # Set up common parameters
50 | DEBUG = setup_parameter('debug', user_settings)
51 |
52 | # Basic agent configuration parameters (some are currently unavailable)
53 | STATE_MANAGER_CLASS = StateManager
54 | WORKFLOW_MANAGER_CLASS = WorkflowManager
55 | DB_CLASS = DataBase
56 |
57 | PIPELINE_CONFIG = setup_parameter('pipeline_config', user_settings)
58 | DB_CONFIG = setup_parameter('db_config', user_settings)
59 |
60 | OVERWRITE_LAST_CHANCE = setup_parameter('overwrite_last_chance', user_settings)
61 | OVERWRITE_TIMEOUT = setup_parameter('overwrite_timeout', user_settings)
62 |
63 | RESPONSE_LOGGER = setup_parameter('response_logger', user_settings)
64 |
65 | # HTTP app configuraion parameters
66 | TIME_LIMIT = setup_parameter('time_limit', user_settings) # Without engaging the timeout by default
67 | CORS = setup_parameter('cors', user_settings)
68 |
69 | OUTPUT_FORMATTER = setup_parameter('output_formatter', user_settings)
70 | DEBUG_OUTPUT_FORMATTER = setup_parameter('debug_output_formatter', user_settings)
71 |
72 | # HTTP api run parameters
73 | PORT = setup_parameter('port', user_settings)
74 |
75 | # Telegram client configuration parameters
76 | TELEGRAM_TOKEN = setup_parameter('telegram_token', user_settings)
77 | TELEGRAM_PROXY = setup_parameter('telegram_proxy', user_settings)
78 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/api.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | import aiohttp_jinja2
4 | import jinja2
5 | from aiohttp import web
6 |
7 | from .handlers import ApiHandler, PagesHandler, WSstatsHandler, WSChatHandler
8 |
9 |
10 | @web.middleware
11 | async def cors_mw(request, handler):
12 | resp = await handler(request)
13 | resp.headers['Access-Control-Allow-Origin'] = '*'
14 | resp.headers['Access-Control-Max-Age'] = '86400'
15 | resp.headers['Access-Control-Allow-Headers'] = 'Content-Type'
16 | resp.headers['access-control-allow-credentials'] = 'true'
17 | return resp
18 |
19 |
20 | async def init_app(agent, session, consumers, logger_stats, output_formatter,
21 | debug=False, response_time_limit=0, cors=None):
22 | middlewares = [cors_mw] if cors else []
23 | app = web.Application(middlewares=middlewares)
24 | handler = ApiHandler(output_formatter, response_time_limit)
25 | pages = PagesHandler(debug)
26 | stats = WSstatsHandler()
27 | chat = WSChatHandler(output_formatter)
28 | consumers = [asyncio.ensure_future(i.call_service(agent.process)) for i in consumers]
29 |
30 | async def on_startup(app):
31 | app['consumers'] = consumers
32 | app['agent'] = agent
33 | app['client_session'] = session
34 | app['websockets'] = []
35 | app['logger_stats'] = logger_stats
36 | asyncio.ensure_future(agent.state_manager.prepare_db())
37 |
38 | async def on_shutdown(app):
39 | for c in app['consumers']:
40 | c.cancel()
41 | if app['client_session']:
42 | await app['client_session'].close()
43 | tasks = asyncio.all_tasks()
44 | for task in tasks:
45 | task.cancel()
46 |
47 | app.router.add_post('', handler.handle_api_request)
48 | app.router.add_options('', handler.options)
49 | app.router.add_get('/api/dialogs/{dialog_id}', handler.dialog)
50 | app.router.add_get('/api/user/{user_external_id}', handler.dialogs_by_user)
51 | app.router.add_get('/ping', pages.ping)
52 | app.router.add_options('/ping', pages.options)
53 | app.router.add_get('/debug/current_load', stats.ws_page)
54 | app.router.add_options('/debug/current_load', stats.options)
55 | app.router.add_get('/debug/current_load/ws', stats.ws_handler)
56 | app.router.add_get('/chat', chat.ws_page)
57 | app.router.add_options('/chat', chat.options)
58 | app.router.add_get('/chat/ws', chat.ws_handler)
59 | app.router.add_post('/rating/dialog', handler.dialog_rating)
60 | app.router.add_options('/rating/dialog', handler.options)
61 | app.router.add_post('/rating/utterance', handler.utterance_rating)
62 | app.router.add_options('/rating/utterance', handler.options)
63 |
64 | app.on_startup.append(on_startup)
65 | app.on_shutdown.append(on_shutdown)
66 | aiohttp_jinja2.setup(app, loader=jinja2.PackageLoader('deeppavlov_agent.http_api', 'templates'))
67 | return app
68 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/messages.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, TypeVar
2 |
3 |
4 | class MessageBase:
5 | agent_name: str
6 | msg_type: str
7 |
8 | def __init__(self, msg_type: str, agent_name: str):
9 | self.msg_type = msg_type
10 | self.agent_name = agent_name
11 |
12 | @classmethod
13 | def from_json(cls, message_json):
14 | return cls(**message_json)
15 |
16 | def to_json(self) -> dict:
17 | return self.__dict__
18 |
19 |
20 | TMessageBase = TypeVar('TMessageBase', bound=MessageBase)
21 |
22 |
23 | class ServiceTaskMessage(MessageBase):
24 | payload: Dict
25 |
26 | def __init__(self, agent_name: str, payload: Dict) -> None:
27 | super().__init__('service_task', agent_name)
28 | self.payload = payload
29 |
30 |
31 | class ServiceResponseMessage(MessageBase):
32 | response: Any
33 | task_id: str
34 |
35 | def __init__(self, task_id: str, agent_name: str, response: Any) -> None:
36 | super().__init__('service_response', agent_name)
37 | self.task_id = task_id
38 | self.response = response
39 |
40 |
41 | class ServiceErrorMessage(MessageBase):
42 | formatted_exc: str
43 |
44 | def __init__(self, task_id: str, agent_name: str, formatted_exc: str) -> None:
45 | super().__init__('error', agent_name)
46 | self.task_id = task_id
47 | self.formatted_exc = formatted_exc
48 |
49 | @property
50 | def exception(self) -> Exception:
51 | return Exception(self.formatted_exc)
52 |
53 |
54 | class ToChannelMessage(MessageBase):
55 | channel_id: str
56 | user_id: str
57 | response: str
58 |
59 | def __init__(self, agent_name: str, channel_id: str, user_id: str, response: str) -> None:
60 | super().__init__('to_channel_message', agent_name)
61 | self.channel_id = channel_id
62 | self.user_id = user_id
63 | self.response = response
64 |
65 |
66 | class FromChannelMessage(MessageBase):
67 | channel_id: str
68 | user_id: str
69 | utterance: str
70 | reset_dialog: bool
71 |
72 | def __init__(self, agent_name: str, channel_id: str, user_id: str, utterance: str, reset_dialog: bool) -> None:
73 | super().__init__('from_channel_message', agent_name)
74 | self.channel_id = channel_id
75 | self.user_id = user_id
76 | self.utterance = utterance
77 | self.reset_dialog = reset_dialog
78 |
79 |
80 | _message_wrappers_map = {
81 | 'service_task': ServiceTaskMessage,
82 | 'service_response': ServiceResponseMessage,
83 | 'to_channel_message': ToChannelMessage,
84 | 'from_channel_message': FromChannelMessage,
85 | 'error': ServiceErrorMessage
86 | }
87 |
88 |
89 | def get_transport_message(message_json: dict) -> TMessageBase:
90 | message_type = message_json.pop('msg_type')
91 |
92 | if message_type not in _message_wrappers_map:
93 | raise ValueError(f'Unknown transport message type: {message_type}')
94 |
95 | message_wrapper_class: TMessageBase = _message_wrappers_map[message_type]
96 |
97 | return message_wrapper_class.from_json(message_json)
98 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/base.py:
--------------------------------------------------------------------------------
1 | from typing import List, Callable, TypeVar, Dict, Any, Optional
2 |
3 |
4 | class AgentGatewayBase:
5 | _on_service_callback: Optional[Callable]
6 | _on_channel_callback: Optional[Callable]
7 |
8 | def __init__(self, on_service_callback: Optional[Callable] = None,
9 | on_channel_callback: Optional[Callable] = None, *args, **kwargs):
10 |
11 | super(AgentGatewayBase, self).__init__(*args, **kwargs)
12 | self._on_service_callback = on_service_callback
13 | self._on_channel_callback = on_channel_callback
14 |
15 | @property
16 | def on_service_callback(self):
17 | return self._on_service_callback
18 |
19 | @on_service_callback.setter
20 | def on_service_callback(self, callback: Callable):
21 | self._on_service_callback = callback
22 |
23 | @property
24 | def on_channel_callback(self):
25 | return self._on_channel_callback
26 |
27 | @on_channel_callback.setter
28 | def on_channel_callback(self, callback: Callable):
29 | self._on_channel_callback = callback
30 |
31 | async def send_to_service(self, service: str, dialog: Dict) -> None:
32 | raise NotImplementedError
33 |
34 | async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> None:
35 | raise NotImplementedError
36 |
37 |
38 | class ServiceGatewayConnectorBase:
39 | _service_config: dict
40 |
41 | def __init__(self, service_config: Dict) -> None:
42 | self._service_config = service_config
43 |
44 | async def send_to_service(self, dialogs: List[Dict]) -> List[Any]:
45 | raise NotImplementedError
46 |
47 |
48 | class ServiceGatewayBase:
49 | _to_service_callback: Callable
50 |
51 | def __init__(self, to_service_callback: Callable, *args, **kwargs) -> None:
52 | super(ServiceGatewayBase, self).__init__(*args, **kwargs)
53 | self._to_service_callback = to_service_callback
54 |
55 |
56 | class ChannelGatewayConnectorBase:
57 | _config: dict
58 | _channel_id: str
59 | _on_channel_callback: Callable
60 |
61 | def __init__(self, config: Dict, on_channel_callback: Callable) -> None:
62 | self._config = config
63 | self._channel_id = self._config['channel']['id']
64 | self._on_channel_callback = on_channel_callback
65 |
66 | async def send_to_channel(self, user_id: str, response: str) -> None:
67 | raise NotImplementedError
68 |
69 |
70 | class ChannelGatewayBase:
71 | _to_channel_callback: Callable
72 |
73 | def __init__(self, to_channel_callback: Callable, *args, **kwargs) -> None:
74 | super(ChannelGatewayBase, self).__init__(*args, **kwargs)
75 | self._to_channel_callback = to_channel_callback
76 |
77 | async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, reset_dialog: bool) -> None:
78 | raise NotImplementedError
79 |
80 |
81 | TAgentGateway = TypeVar('TAgentGateway', bound=AgentGatewayBase)
82 | TServiceGatewayConnectorBase = TypeVar('TServiceGatewayConnectorBase', bound=ServiceGatewayConnectorBase)
83 | TServiceGateway = TypeVar('TServiceGateway', bound=ServiceGatewayBase)
84 | TChannelGatewayConnectorBase = TypeVar('TChannelGatewayConnectorBase', bound=ChannelGatewayConnectorBase)
85 | TChannelGateway = TypeVar('TChannelGateway', bound=ChannelGatewayBase)
86 |
--------------------------------------------------------------------------------
/deeppavlov_agent/utils/http_api_test.py:
--------------------------------------------------------------------------------
1 | import aiohttp
2 | import asyncio
3 | import argparse
4 | import csv
5 | import json
6 | from time import time
7 | from random import randrange
8 | import uuid
9 | from tqdm import tqdm
10 |
11 | '''
12 | structure of dialog file (-df) should be written in json
13 | {
14 | : [, , ...],
15 | : [, , ...],
16 | : [, , ...],
17 | ...
18 | }
19 | structure of phrase file (-pf) simple text file. One phrase per line
20 | '''
21 |
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('-u', '--url', type=str)
24 | parser.add_argument('-uc', '--usercount', help='count of test users, which will send the message',
25 | type=int, default=10)
26 | parser.add_argument('-pc', '--phrasecount', help='count of phrases in single dialog',
27 | type=int, default=10)
28 | parser.add_argument('-pf', '--phrasesfile', help='name of the file with phrases for dialog', type=str, default="")
29 | parser.add_argument('-df', '--dialogfile', help='name of the file with predefined dialogs', type=str, default="")
30 | parser.add_argument('-of', '--outputfile', help='name of the output file', type=str, default='output.csv')
31 |
32 | args = parser.parse_args()
33 | payloads = {}
34 |
35 | if args.dialogfile:
36 | try:
37 | with open(args.dialogfile, 'r') as file:
38 | payloads = json.load(file)
39 | except Exception as e:
40 | raise e
41 | elif args.phrasesfile:
42 | try:
43 | with open(args.phrasesfile, 'r') as file:
44 | phrases = [line.rstrip('\n') for line in file]
45 | except Exception as e:
46 | raise e
47 | payloads = {uuid.uuid4().hex: [phrases[randrange(len(phrases))] for j in range(args.phrasecount)] for i in
48 | range(args.usercount)}
49 | else:
50 | raise ValueError('You should provide either predefined dialog (-df) or file with phrases (-pf)')
51 |
52 |
53 | async def perform_test_dialogue(session, url, uuid, payloads):
54 | result = []
55 | for i in tqdm(payloads, desc=uuid):
56 | request_body = {'user_id': uuid, 'payload': i}
57 | start_time = time()
58 | async with session.post(url, json=request_body, timeout=None) as resp:
59 | resp.raise_for_status()
60 |
61 | response = await resp.json()
62 | end_time = time()
63 | if response['user_id'] != uuid:
64 | print('INFO, request returned wrong uuid')
65 | result.append([uuid, start_time, end_time, end_time - start_time, len(i), i])
66 |
67 | return result
68 |
69 |
70 | async def run(url, payloads, out_filename):
71 | tasks = []
72 | async with aiohttp.ClientSession() as session:
73 | for k, v in payloads.items():
74 | task = asyncio.ensure_future(perform_test_dialogue(session, url, k, v))
75 | tasks.append(task)
76 | responses = await asyncio.gather(*tasks)
77 | result = [['uuid', 'send timestamp', 'receive timestamp', 'processing_time', 'phrase length', 'phrase text']]
78 | for i in responses:
79 | result.extend(i)
80 | with open(out_filename, 'w', newline='') as f:
81 | writer = csv.writer(f, delimiter=' ')
82 | for row in result:
83 | writer.writerow(row)
84 |
85 |
86 | if __name__ == '__main__':
87 | loop = asyncio.get_event_loop()
88 | loop.set_debug(True)
89 | future = asyncio.ensure_future(run(args.url, payloads, args.outputfile))
90 | loop.run_until_complete(future)
91 |
--------------------------------------------------------------------------------
/deeppavlov_agent/setup_agent.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import yaml
5 |
6 | from .settings import (DB_CLASS, DB_CONFIG, OVERWRITE_LAST_CHANCE,
7 | OVERWRITE_TIMEOUT, PIPELINE_CONFIG,
8 | RESPONSE_LOGGER, STATE_MANAGER_CLASS,
9 | WORKFLOW_MANAGER_CLASS)
10 | from .core.agent import Agent
11 | from .core.connectors import EventSetOutputConnector
12 | from .core.log import LocalResponseLogger
13 | from .core.pipeline import Pipeline
14 | from .core.service import Service
15 | from .parse_config import PipelineConfigParser
16 |
17 |
18 | def merge_two_configs(d1, d2):
19 | for k, v in d2.items():
20 | if k in d1:
21 | if isinstance(v, dict) and isinstance(d1[k], dict):
22 | merge_two_configs(d1[k], v)
23 | else:
24 | d1[k] = v
25 | else:
26 | d1[k] = v
27 |
28 |
29 | def setup_agent(pipeline_configs=None):
30 | with open(DB_CONFIG, 'r') as db_config:
31 | if DB_CONFIG.endswith('.json'):
32 | db_data = json.load(db_config)
33 | elif DB_CONFIG.endswith('.yml'):
34 | db_data = yaml.load(db_config, Loader=yaml.FullLoader)
35 | else:
36 | raise ValueError(f'unknown format for db_config file: {DB_CONFIG}')
37 |
38 | if db_data.pop('env', False):
39 | for k, v in db_data.items():
40 | db_data[k] = os.getenv(v)
41 |
42 | db = DB_CLASS(**db_data)
43 |
44 | sm = STATE_MANAGER_CLASS(db.get_db())
45 | if pipeline_configs:
46 | pipeline_data = {}
47 | for name in pipeline_configs:
48 | with open(name, 'r') as pipeline_config:
49 | if name.endswith('.json'):
50 | merge_two_configs(pipeline_data, json.load(pipeline_config))
51 | elif name.endswith('.yml'):
52 | merge_two_configs(pipeline_data, yaml.load(pipeline_config, Loader=yaml.FullLoader))
53 | else:
54 | raise ValueError(f'unknown format for pipeline_config file from command line: {name}')
55 |
56 | else:
57 | with open(PIPELINE_CONFIG, 'r') as pipeline_config:
58 | if PIPELINE_CONFIG.endswith('.json'):
59 | pipeline_data = json.load(pipeline_config)
60 | elif PIPELINE_CONFIG.endswith('.yml'):
61 | pipeline_data = yaml.load(pipeline_config, Loader=yaml.FullLoader)
62 | else:
63 | raise ValueError(f'unknown format for pipeline_config file from setitngs: {PIPELINE_CONFIG}')
64 |
65 | pipeline_config = PipelineConfigParser(sm, pipeline_data)
66 |
67 | input_srv = Service('input', None, sm.add_human_utterance, 1, ['input'])
68 | responder_srv = Service('responder', EventSetOutputConnector('responder').send,
69 | sm.save_dialog, 1, ['responder'])
70 |
71 | last_chance_srv = None
72 | if not OVERWRITE_LAST_CHANCE:
73 | last_chance_srv = pipeline_config.last_chance_service
74 | timeout_srv = None
75 | if not OVERWRITE_TIMEOUT:
76 | timeout_srv = pipeline_config.timeout_service
77 |
78 | pipeline = Pipeline(pipeline_config.services, input_srv, responder_srv, last_chance_srv, timeout_srv)
79 |
80 | response_logger = LocalResponseLogger(RESPONSE_LOGGER)
81 |
82 | agent = Agent(pipeline, sm, WORKFLOW_MANAGER_CLASS(), response_logger=response_logger)
83 | if pipeline_config.gateway:
84 | pipeline_config.gateway.on_channel_callback = agent.register_msg
85 | pipeline_config.gateway.on_service_callback = agent.process
86 |
87 | return agent, pipeline_config.session, pipeline_config.workers
88 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/templates/services_ws_highcharts.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {% block title %}Dialogs list{% endblock %}
3 | {% block head %}
4 | {{ super() }}
5 |
6 |
7 |
8 | {% endblock %}
9 | {% block content %}
10 |
11 |
12 |
121 | {% endblock %}
--------------------------------------------------------------------------------
/docs/source/built_in/state_manager_method.rst:
--------------------------------------------------------------------------------
1 | Built-in StateManager
2 | =====================
3 |
4 | Built-in StateManager is responsible for all database read and write operations, and it's working with MongoDB database.
5 | You can assign it's methods to services in your pipeline in order to properly save their responses to dialogs state.
6 | You can read more on the pipeline configuration in :ref:`services-config`
7 |
8 | Available methods
9 | =================
10 |
11 | Each of the methods have a following input parameters, which are filled automatically by agent during message processing.
12 |
13 | * ``dialog`` - dialog object, which will be updated
14 | * ``payload`` - response of the service with output formatter applied
15 | * ``label`` - label of the service
16 | * ``kwargs`` - minor arguments which are also provided by agent
17 |
18 | You can use several state manager methods in your pipeline:
19 |
20 | 1. **add_annotation**
21 | * Adds a record to ``annotations`` section of the last utterance in dialog
22 | * ``label`` is used as a key
23 | * ``payload`` is used as a value
24 |
25 | 2. **add_annotation_prev_bot_utt**
26 | * Adds a record to ``annotations`` section of the second utterance from the end of the dialog
27 | * Only works if that utterance is bot utterance
28 | * Suitable for annotating last bot utterance on the next dialog round
29 | * ``label`` is used as a key
30 | * ``payload`` is used as a value
31 |
32 | 3. **add_hypothesis**
33 | * Adds a record to ``hypotheses`` section of the last utterance in dialog
34 | * Works only for human utterance, since bot utterance doesn't have such section
35 | * Accepts list of hypotheses dicts, provided by service
36 | * Two new keys are added to each hypothesis: ``service_name`` and ``annotations``
37 | * ``label`` is used as a value for ``service_name`` key
38 | * Empty dict is used as a value for ``annotations`` key
39 |
40 | 4. **add_hypothesis_annotation**
41 | * Adds an annotation to a single element of the ``hypotheses`` section of the last utterance in dialog under ``annotations`` key
42 | * In order to identify a certain hypothesis, it's index is used and stored in agent
43 | * ``label`` is used as a key
44 | * ``payload`` is used as a value
45 |
46 | 5. **add_text**
47 | * Adds a value to ``text`` field of the last utterance in dialog
48 | * Suitable for modifying a response in a bot utterance (original text can be found in ``orig_text`` field)
49 | * ``payload`` us used as a value
50 |
51 | 6. **add_bot_utterance**
52 | * This method is intended to be associated with response selector service
53 | * Adds a new bot utterance to the dialog
54 | * Modifies associated user and bot objects
55 | * We consider, that payload will be a single hypothesis, which was chosen as a bot response. So it will be parsed to different fields of bot utterance
56 | * ``text`` and ``orig_text`` fields of new bot utterance are filled with ``text`` value from payload
57 | * ``active_skill`` field is filled with ``skill_name`` value from payload
58 | * ``confidence`` field is filled with ``confidence`` value from payload
59 | * ``annotations`` from payload are copyed to ``annotations`` field of bot utterance
60 | * We expect, that skills will return ``text`` and ``confidence`` fields at least. ``skill_name`` and ``annotations`` are created within ``add_hypothesis`` method
61 |
62 | 7. **add_bot_utterance_last_chance**
63 | * This method is intended to be associated with a failure processing service, like timeout or last chance responder
64 | * It is very similar in processing to ``add_bot_utterance``, but it performs an additional check on the type of a last utterance in dialog
65 | * If the last utterance is a human utterance the method acts as an ``add_bot_utterance`` one
66 | * Otherwise, it will skip a stage with creating a new bot utterance and inserting it at the end of the dialog
67 |
68 |
69 | There are two additional state manager methods, which are automatically assigned during agent's initialisation.
70 |
71 | 1. **add_human_utterance**
72 | * This method is assigned to an input service, which is created automatically during agent's initialisation process
73 | * Adds a new human utterance to the dialog
74 | * ``payload`` is used for ``text`` field of the new human utterance
75 |
76 | 2. **save_dialog**
77 | * This method is assigned to a responder service, which is created automatically during agent's initialisation process
78 | * It just saves a dialog to database
79 |
--------------------------------------------------------------------------------
/docs/source/api/services_http_api.rst:
--------------------------------------------------------------------------------
1 | These types of dialog services can be connected to the agent's conversational pipeline:
2 |
3 | * **Annotator**
4 | * **Skill Selector**
5 | * **Skills**
6 | * **Response Selector**
7 | * **Postprocessor**
8 |
9 |
10 | Input Format
11 | ============
12 |
13 | All services should accept an input in an agent ``state`` format. This format is described `here `__.
14 | If an input format of a service differs from the agent state format then a **formatter** function should be implemented.
15 | This formatter function receives a request in agent state format and returns a request in format supported by the service.
16 |
17 | Output Format
18 | =============
19 |
20 | All services should provide an output in an agent ``state`` format. This format is described `here `__.
21 | To use the same formatter for input and output set the ``mode=='out'`` flag.
22 |
23 | Annotator
24 | =========
25 |
26 | Annotator service returns a free-form response.
27 |
28 | For example, the NER annotator may return a dictionary with ``tokens`` and ``tags`` keys:
29 |
30 | .. code:: json
31 |
32 | {"tokens": ["Paris"], "tags": ["I-LOC"]}
33 |
34 | Sentiment annotator can return a list of labels:
35 |
36 | .. code:: json
37 |
38 | ["neutral", "speech"]
39 |
40 | Also, Sentiment annotator can return just a string:
41 |
42 | .. code:: json
43 |
44 | "neutral"
45 |
46 | Skill Selector
47 | ==============
48 |
49 | Skill Selector service should return a list of names for skills selected to generate a candidate response for a dialog.
50 |
51 | For example:
52 |
53 | .. code:: json
54 |
55 | ["chitchat", "hello_skill"]
56 |
57 |
58 | Skill
59 | =====
60 |
61 | Skill service should return a **list of dicts** where each dict corresponds to a single candidate response.
62 | Each candidate response entry requires ``text`` and ``confidence`` keys.
63 | The Skill can update **Human** or **Bot** profile.
64 | To do this, it should pack these attributes into ``human_attributes`` and ``bot_attributes`` keys.
65 |
66 | All attributes in ``human_attributes`` and ``bot_attributes`` will overwrite current **Human** and **Bot**
67 | attribute values in agent state. And if there are no such attributes,
68 | they will be stored under ``attributes`` key inside **Human** or **Bot**.
69 |
70 | The minimum required response of a skill is a 2-key dictionary:
71 |
72 |
73 | .. code:: json
74 |
75 | [{"text": "hello",
76 | "confidence": 0.33}]
77 |
78 | But it's possible to extend it with ``human_attributes`` and ``bot_attributes`` keys:
79 |
80 | .. code:: json
81 |
82 | [{"text": "hello",
83 | "confidence": 0.33,
84 | "human_attributes":
85 | {"name": "Vasily"},
86 | "bot_attributes":
87 | {"persona": ["I like swimming.", "I have a nice swimming suit."]}}]
88 |
89 | Everything sent to ``human_attributes`` and ``bot_attributes`` keys will update `user` field in the same
90 | utterance for the human and in the next utterance for the bot. Please refer to agent state_ documentation for more information about the **User** object updates.
91 |
92 | Also it's possible for a skill to send any additional key to the state:
93 |
94 | .. code:: json
95 |
96 | [{"text": "hello",
97 | "confidence": 0.33,
98 | "any_key": "any_value"}]
99 |
100 |
101 | Response Selector
102 | =================
103 |
104 | Unlike Skill Selector, Response Selector service should select a *single* skill as a source of the
105 | final version of response. The service returns a name of the selected skill, text (might be
106 | overwritten from the original skill response) and confidence (also might be overwritten):
107 |
108 | .. code:: json
109 |
110 | {"skill_name": "chitchat",
111 | "text": "Hello, Joe!",
112 | "confidence": 0.3}
113 |
114 | Also it's possible for a Response Selector to overwrite any ``human`` or ``bot`` attributes:
115 |
116 | .. code:: json
117 |
118 | {"skill_name": "chitchat",
119 | "text": "Hello, Joe!",
120 | "confidence": 0.3,
121 | "human_attributes": {"name": "Ivan"}}
122 |
123 | Postprocessor
124 | =============
125 |
126 | Postprocessor service can rewrite an utterance selected by the Response Selector. For example, it can
127 | take a user's name from the state and add it to the final answer.
128 |
129 | If a response was modified by Postprocessor then a new version goes the ``text`` field of the final
130 | utterance and shown to the user, and the utterance selected by Response Selector goes to the ``orig_text`` field.
131 |
132 | .. code:: json
133 |
134 | "Goodbye, Joe!"
135 |
136 |
137 | .. _state: https://deeppavlov-agent.readthedocs.io/en/latest/_static/api.html
138 |
--------------------------------------------------------------------------------
/deeppavlov_agent/tests/workflow_manager_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from ..core.workflow_manager import WorkflowManager
3 | from uuid import uuid4
4 |
5 |
6 | class TestDialog:
7 | def __init__(self, id):
8 | self.id = id
9 |
10 |
11 | class TestService:
12 | def __init__(self, name):
13 | self.name = name
14 |
15 |
16 | class TestWorkflowManagerDialog(unittest.TestCase):
17 | def setUp(self):
18 | self.workflow = WorkflowManager()
19 | self.dialog_id = uuid4().hex
20 | self.workflow.add_workflow_record(TestDialog(self.dialog_id))
21 |
22 | def test_internal_params(self):
23 | self.assertTrue(self.dialog_id in self.workflow.workflow_records)
24 | self.assertEqual(1, len(self.workflow.workflow_records))
25 |
26 | def test_add_another_dialog(self):
27 | another_dialog_id = uuid4().hex
28 | self.workflow.add_workflow_record(TestDialog(another_dialog_id))
29 | self.assertTrue(self.dialog_id in self.workflow.workflow_records)
30 | self.assertTrue(another_dialog_id in self.workflow.workflow_records)
31 | self.assertEqual(2, len(self.workflow.workflow_records))
32 |
33 | def test_add_duplicate_dialog(self):
34 | with self.assertRaises(ValueError):
35 | self.workflow.add_workflow_record(TestDialog(self.dialog_id))
36 |
37 | def test_flush_record(self):
38 | workflow_record = self.workflow.flush_record(self.dialog_id)
39 | self.assertTrue(isinstance(workflow_record, dict))
40 | self.assertEqual(workflow_record['dialog'].id, self.dialog_id)
41 |
42 | def test_add_task(self):
43 | payload = uuid4().hex
44 | task_service = TestService('testservice')
45 | task_id = self.workflow.add_task(self.dialog_id, task_service, payload, 1)
46 | self.assertTrue(task_id is not None)
47 | self.assertEqual(1, len(self.workflow.tasks))
48 | self.assertTrue(task_id in self.workflow.tasks)
49 |
50 | def test_complete_task(self):
51 | payload = uuid4().hex
52 | response = '123'
53 | task_service = TestService('testservice')
54 | task_id = self.workflow.add_task(self.dialog_id, task_service, payload, 1)
55 | workflow_record, task = self.workflow.complete_task(task_id, response)
56 | self.assertTrue(isinstance(task, dict))
57 | self.assertTrue(isinstance(workflow_record, dict))
58 | self.assertEqual(task['service'].name, task_service.name)
59 | self.assertEqual(task['dialog'], workflow_record['dialog'].id)
60 |
61 | def test_double_complete_task(self):
62 | payload = uuid4().hex
63 | response = '123'
64 | task_service = TestService('testservice')
65 | task_id = self.workflow.add_task(self.dialog_id, task_service, payload, 1)
66 | self.workflow.complete_task(task_id, response)
67 | workflow_record, task = self.workflow.complete_task(task_id, response)
68 | self.assertTrue(workflow_record is None)
69 | self.assertTrue(task is None)
70 |
71 | def test_next_tasks(self):
72 | payload = uuid4().hex
73 | response = '123'
74 | done_service = TestService(uuid4().hex)
75 | waiting_service = TestService(uuid4().hex)
76 | skipped_service = TestService(uuid4().hex)
77 |
78 | self.workflow.skip_service(self.dialog_id, skipped_service)
79 | task_id = self.workflow.add_task(self.dialog_id, done_service, payload, 1)
80 | self.workflow.complete_task(task_id, response)
81 | self.workflow.add_task(self.dialog_id, waiting_service, payload, 1)
82 |
83 | done, waiting, skipped = self.workflow.get_services_status(self.dialog_id)
84 | self.assertTrue(done_service.name in done)
85 | self.assertTrue(waiting_service.name in waiting)
86 | self.assertTrue(skipped_service.name in skipped)
87 |
88 | def test_flush(self):
89 | payload = uuid4().hex
90 | response = '123'
91 | done_service = TestService(uuid4().hex)
92 | waiting_service = TestService(uuid4().hex)
93 | skipped_service = TestService(uuid4().hex)
94 |
95 | self.workflow.skip_service(self.dialog_id, skipped_service)
96 | done_task_id = self.workflow.add_task(self.dialog_id, done_service, payload, 1)
97 | self.workflow.complete_task(done_task_id, response)
98 | waiting_task_id = self.workflow.add_task(self.dialog_id, waiting_service, payload, 1)
99 |
100 | workflow_record = self.workflow.flush_record(self.dialog_id)
101 | self.assertEqual(self.dialog_id, workflow_record['dialog'].id)
102 |
103 | workflow_record, late_task = self.workflow.complete_task(waiting_task_id, response)
104 | self.assertEqual(self.dialog_id, workflow_record['dialog'].id)
105 | self.assertTrue('dialog' in late_task)
106 | self.assertEqual(self.dialog_id, late_task['dialog'])
107 |
108 |
109 | if __name__ == '__main__':
110 | unittest.main()
111 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/templates/chat.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {% block title %}Dialogs list{% endblock %}
3 | {% block head %}
4 | {{ super() }}
5 |
6 |
7 |
8 |
9 | {% endblock %}
10 | {% block content %}
11 |
17 |
24 |
25 |
26 |
27 |
28 |
119 | {% endblock %}
120 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/pipeline.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict, Counter
2 |
3 |
4 | class Pipeline:
5 | def __init__(self, services, input_service, responder_service, last_chance_service, timeout_service):
6 | self.last_chance_service = last_chance_service
7 | self.timeout_service = timeout_service
8 | wrong_names = [k for k, v in Counter([i.name for i in services]).items() if v != 1]
9 | if wrong_names:
10 | raise ValueError(f'there are some duplicate service names presented {wrong_names}')
11 |
12 | self.services = {i.name: i for i in services}
13 | wrong_links = self.process_service_names()
14 | if wrong_links:
15 | print('wrong links in config were detected: ', dict(wrong_links))
16 |
17 | self.add_input_service(input_service)
18 | self.add_responder_service(responder_service)
19 | self.fill_dependent_service_chains_and_required_services()
20 |
21 | def get_service_by_name(self, service_name):
22 | if not service_name:
23 | return None
24 |
25 | service = self.services.get(service_name, None)
26 | if not service:
27 | raise ValueError(f'service {service_name} does not exist')
28 | return service
29 |
30 | def process_service_names(self):
31 | wrong_names = defaultdict(list)
32 | for service in self.services.values():
33 | for name_prev_service in service.names_previous_services.union(service.names_required_previous_services):
34 | if name_prev_service not in self.services:
35 | wrong_names[service.name].append(name_prev_service)
36 | continue
37 | service.previous_services.add(self.services[name_prev_service])
38 | self.services[name_prev_service].next_services.add(service)
39 | return wrong_names # wrong names means that some service_names, used in previous services don't exist
40 |
41 | def get_next_services(self, done: set = None, waiting: set = None, skipped: set = None):
42 | done = done or set()
43 | waiting = waiting or set()
44 | skipped = skipped or set()
45 |
46 | if (self.last_chance_service and self.last_chance_service.name in done) or \
47 | (self.timeout_service and self.timeout_service.name in done):
48 | return [service for service in self.services.values() if service.is_responder()]
49 | completed_names = done | skipped
50 | service_names = set(self.services.keys())
51 | next_service_names = set()
52 | while service_names:
53 | sn = service_names.pop()
54 | service = self.services[sn]
55 | if {i.name for i in service.previous_services} <= completed_names:
56 | next_service_names.add(sn)
57 | else:
58 | for i in service.next_services:
59 | service_names.discard(i.name)
60 |
61 | next_service_names = next_service_names - completed_names - waiting
62 |
63 | next_services = []
64 | for sn in next_service_names:
65 | service = self.services[sn]
66 | if not {i.name for i in service.previous_services} <= skipped:
67 | next_services.append(service)
68 |
69 | if not next_services and not waiting:
70 | return [self.last_chance_service]
71 |
72 | return next_services
73 |
74 | def add_responder_service(self, service):
75 | if not service.is_responder():
76 | raise ValueError('service should be a responder')
77 | endpoints = [s for s in self.services.values() if not s.next_services and 'responder' not in s.tags]
78 | service.previous_services = set(endpoints)
79 | service.names_previous_services = {s.name for s in endpoints}
80 | self.services[service.name] = service
81 |
82 | for s in endpoints:
83 | self.services[s.name].next_services.add(service)
84 |
85 | def add_input_service(self, service):
86 | if not service.is_input():
87 | raise ValueError('service should be an input')
88 | starting_services = [s for s in self.services.values() if not s.previous_services]
89 | service.next_services = set(starting_services)
90 | self.services[service.name] = service
91 |
92 | for s in starting_services:
93 | self.services[s.name].previous_services.add(service)
94 |
95 | def topological_sort(self):
96 | order, nodes, state = [], set(self.services.keys()), {}
97 |
98 | def dfs(node, path):
99 | state[node] = 0
100 | for ns in self.services[node].next_services:
101 | ns_status = state.get(ns.name, None)
102 | if ns_status == 0:
103 | raise ValueError(f'Pipeline cycle was found {path}')
104 | elif ns_status == 1:
105 | continue
106 | nodes.discard(ns.name)
107 | dfs(ns.name, path + [ns.name])
108 | order.append(node)
109 | state[node] = 1
110 |
111 | starting_node = [i for i in self.services.values() if i.is_input()][0]
112 | nodes.discard(starting_node.name)
113 | dfs(starting_node.name, [])
114 | return order
115 |
116 | def fill_dependent_service_chains_and_required_services(self):
117 | for sn in self.topological_sort():
118 | service = self.services[sn]
119 | for i in service.names_required_previous_services:
120 | req = self.services[i]
121 | service.required_previous_services.add(req)
122 | req.dependent_services.add(service)
123 | for ds in service.dependent_services:
124 | req.dependent_services.add(ds)
125 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import logging.config
3 | from collections import defaultdict
4 | from datetime import datetime, timedelta
5 | from pathlib import Path
6 |
7 | import yaml
8 |
9 | from .service import Service
10 |
11 | agent_path = Path(__file__).resolve().parents[1]
12 |
13 |
14 | def init_logger():
15 | log_config_path = agent_path / 'log_config.yml'
16 |
17 | with log_config_path.open('r') as f:
18 | log_config = yaml.safe_load(f)
19 |
20 | log_dir_path = agent_path / 'logs'
21 | log_dir_path.mkdir(exist_ok=True)
22 |
23 | configured_loggers = [log_config.get('root', {})] + [logger for logger in
24 | log_config.get('loggers', {}).values()]
25 |
26 | used_handlers = {handler for log in configured_loggers for handler in log.get('handlers', [])}
27 |
28 | for handler_id, handler in list(log_config['handlers'].items()):
29 | if handler_id not in used_handlers:
30 | del log_config['handlers'][handler_id]
31 | elif 'filename' in handler.keys():
32 | filename = handler['filename']
33 |
34 | if filename[0] == '~':
35 | logfile_path = Path(filename).expanduser().resolve()
36 | elif filename[0] == '/':
37 | logfile_path = Path(filename).resolve()
38 | else:
39 | logfile_path = agent_path / filename
40 |
41 | handler['filename'] = str(logfile_path)
42 |
43 | logging.config.dictConfig(log_config)
44 |
45 |
46 | class BaseResponseLogger:
47 |
48 | def log_start(self, task_id: str, workflow_record: dict, service: Service) -> None:
49 | raise NotImplementedError
50 |
51 | def log_end(self, task_id: str, workflow_record: dict, service: Service) -> None:
52 | raise NotImplementedError
53 |
54 |
55 | class LocalResponseLogger(BaseResponseLogger):
56 | _enabled: bool
57 | _logger: logging.Logger
58 |
59 | def __init__(self, enabled: bool, cleanup_timedelta: int = 300) -> None:
60 | self._services_load = defaultdict(int)
61 | self._services_response_time = defaultdict(dict)
62 | self._tasks_buffer = dict()
63 | self._enabled = enabled
64 | self._timedelta = timedelta(seconds=cleanup_timedelta)
65 |
66 | if self._enabled:
67 | self._logger = logging.getLogger('service_logger')
68 | self._logger.setLevel(logging.DEBUG)
69 | fh = logging.FileHandler(agent_path / f'logs/{datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S_%f")}.log')
70 | fh.setLevel(logging.DEBUG)
71 | fh.setFormatter(logging.Formatter('%(message)s'))
72 | self._logger.addHandler(fh)
73 |
74 | def _log(self, time: datetime, task_id: str, workflow_record: dict, service: Service, status: str) -> None:
75 | service_name = service.name
76 | dialog_id = workflow_record['dialog'].id
77 | self._logger.info(f"{time.strftime('%Y-%m-%d %H:%M:%S.%f')}\t{dialog_id}\t{task_id}\t{status}\t{service_name}")
78 |
79 | def _cleanup(self, time):
80 | time_threshold = time - self._timedelta
81 |
82 | for key in list(self._tasks_buffer.keys()):
83 | if self._tasks_buffer[key] < time_threshold:
84 | del self._tasks_buffer[key]
85 | else:
86 | break
87 |
88 | for service_response_time in self._services_response_time.values():
89 | for start_time in list(service_response_time.keys()):
90 | if start_time < time_threshold:
91 | del service_response_time[start_time]
92 | else:
93 | break
94 |
95 | def log_start(self, task_id: str, workflow_record: dict, service: Service) -> None:
96 | start_time = datetime.utcnow()
97 |
98 | if service.is_input():
99 | self._services_load['agent'] += 1
100 | self._tasks_buffer[workflow_record['dialog'].id] = start_time
101 | elif not service.is_responder():
102 | self._tasks_buffer[task_id] = start_time
103 | self._services_load[service.label] += 1
104 |
105 | if self._enabled:
106 | self._log(start_time, task_id, workflow_record, service, 'start')
107 |
108 | def log_end(self, task_id: str, workflow_record: dict, service: Service, cancelled=False) -> None:
109 | end_time = datetime.utcnow()
110 |
111 | if service.is_responder():
112 | self._services_load['agent'] -= 1
113 | start_time = self._tasks_buffer.pop(workflow_record['dialog'].id, None)
114 | if start_time is not None and not cancelled:
115 | self._services_response_time['agent'][start_time] = (end_time - start_time).total_seconds()
116 | elif not service.is_input():
117 | start_time = self._tasks_buffer.pop(task_id, None)
118 | if start_time is not None:
119 | self._services_load[service.label] -= 1
120 | if not cancelled:
121 | self._services_response_time[service.label][start_time] = (end_time - start_time).total_seconds()
122 | self._cleanup(end_time)
123 | if self._enabled:
124 | self._log(end_time, task_id, workflow_record, service, 'end\t')
125 |
126 | def get_current_load(self):
127 | self._cleanup(datetime.now())
128 | response_time = {}
129 | for service_name, time_dict in self._services_response_time.items():
130 | sm = sum(time_dict.values())
131 | ct = len(time_dict)
132 | response_time[service_name] = sm / ct if ct else 0
133 | response = {
134 | 'current_load': dict(self._services_load),
135 | 'response_time': response_time
136 | }
137 | return response
138 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/workflow_manager.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from uuid import uuid4
3 | from typing import Optional, Dict, List
4 | from time import time
5 |
6 | from .state_schema import Dialog
7 | from .service import Service
8 |
9 |
10 | class WorkflowManager:
11 | def __init__(self):
12 | self.tasks = defaultdict(dict)
13 | self.workflow_records = defaultdict(dict)
14 |
15 | def add_workflow_record(self, dialog: Dialog, deadline_timestamp: Optional[float] = None, **kwargs) -> None:
16 | if str(dialog.id) in self.workflow_records.keys():
17 | raise ValueError(f'dialog with id {dialog.id} is already in workflow')
18 | workflow_record = {'dialog': dialog, 'services': defaultdict(dict), 'tasks': dict()}
19 | if deadline_timestamp:
20 | workflow_record['deadline_timestamp'] = deadline_timestamp
21 | workflow_record.update(kwargs)
22 | self.workflow_records[str(dialog.id)] = workflow_record
23 |
24 | def get_workflow_record(self, dialog_id):
25 | workflow_record = self.workflow_records.get(dialog_id, None)
26 | if workflow_record:
27 | return workflow_record
28 | return None
29 |
30 | def get_dialog_by_id(self, dialog_id: str) -> Dialog:
31 | workflow_record = self.workflow_records.get(dialog_id, None)
32 | if workflow_record:
33 | return workflow_record['dialog']
34 | return None
35 |
36 | def add_task(self, dialog_id: str, service: Service, payload: Dict, ind: int) -> str:
37 | workflow_record = self.workflow_records.get(dialog_id, None)
38 | if not workflow_record:
39 | return None
40 | task_id = uuid4().hex
41 | task_data = {'service': service, 'payload': payload, 'dialog': dialog_id, 'ind': ind}
42 | if service.name not in workflow_record['services']:
43 | workflow_record['services'][service.name] = {'pending_tasks': set(), 'done': False, 'skipped': False}
44 | workflow_record['services'][service.name][task_id] = {
45 | 'send': True, 'done': False, 'error': False,
46 | 'agent_send_time': time(), 'agent_done_time': None
47 | }
48 |
49 | workflow_record['services'][service.name]['pending_tasks'].add(task_id)
50 | workflow_record['tasks'][task_id] = {'task_data': task_data, 'task_object': None}
51 | self.tasks[task_id] = task_data
52 | return task_id
53 |
54 | def set_task_object(self, dialog_id, task_id, task_object):
55 | workflow_record = self.workflow_records.get(dialog_id, None)
56 | if workflow_record and task_id in workflow_record['tasks']:
57 | workflow_record['tasks'][task_id]['task_object'] = task_object
58 |
59 | def set_timeout_response_task(self, dialog_id, task_object):
60 | workflow_record = self.workflow_records.get(dialog_id, None)
61 | if workflow_record:
62 | workflow_record['timeout_response_task'] = task_object
63 |
64 | def get_pending_tasks(self, dialog_id):
65 | workflow_record = self.workflow_records.get(dialog_id, None)
66 | if workflow_record:
67 | return workflow_record['tasks']
68 |
69 | def skip_service(self, dialog_id: str, service: Service) -> None:
70 | workflow_record = self.workflow_records.get(dialog_id, None)
71 | if workflow_record:
72 | if service in workflow_record['services']:
73 | workflow_record['services'][service.name]['skipped'] = True
74 | else:
75 | workflow_record['services'][service.name] = {'pending_tasks': set(), 'done': False, 'skipped': True}
76 |
77 | def get_services_status(self, dialog_id: str) -> List:
78 | done = set()
79 | waiting = set()
80 | skipped = set()
81 | workflow_record = self.workflow_records.get(dialog_id, None)
82 | if workflow_record:
83 | for k, v in workflow_record['services'].items():
84 | if v['skipped'] or v.get('error', False):
85 | skipped.add(k)
86 | elif v['done']:
87 | done.add(k)
88 | else:
89 | waiting.add(k)
90 | return done, waiting, skipped
91 |
92 | def complete_task(self, task_id, response, **kwargs) -> Dict:
93 | task = self.tasks.pop(task_id, None)
94 | if not task:
95 | return None, None
96 |
97 | workflow_record = self.workflow_records.get(task['dialog'], None)
98 | if not workflow_record:
99 | return None, task
100 |
101 | workflow_record['tasks'].pop(task_id, None)
102 | workflow_record['services'][task['service'].name]['pending_tasks'].discard(task_id)
103 |
104 | if not workflow_record['services'][task['service'].name]['pending_tasks']:
105 | workflow_record['services'][task['service'].name]['done'] = True
106 | workflow_record['services'][task['service'].name][task_id]['agent_done_time'] = time()
107 |
108 | if isinstance(response, Exception):
109 | workflow_record['services'][task['service'].name][task_id]['error'] = True
110 | if not workflow_record['services'][task['service'].name]['pending_tasks']:
111 | workflow_record['services'][task['service'].name]['error'] = True
112 | else:
113 | workflow_record['services'][task['service'].name][task_id]['done'] = True
114 | workflow_record['services'][task['service'].name][task_id].update(**kwargs)
115 | return workflow_record, task
116 |
117 | def flush_record(self, dialog_id: str) -> Dict:
118 | workflow_record = self.workflow_records.pop(dialog_id, None)
119 | if not workflow_record:
120 | return None
121 | for i in workflow_record.pop('tasks', {}).keys():
122 | self.tasks[i]['workflow_record'] = workflow_record
123 |
124 | return workflow_record
125 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/agent.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from time import time
3 | from typing import Any
4 |
5 | from .log import BaseResponseLogger
6 | from .pipeline import Pipeline
7 | from .state_manager import StateManager
8 | from .workflow_manager import WorkflowManager
9 |
10 |
11 | class Agent:
12 | _response_logger: BaseResponseLogger
13 |
14 | def __init__(self,
15 | pipeline: Pipeline,
16 | state_manager: StateManager,
17 | workflow_manager: WorkflowManager,
18 | response_logger: BaseResponseLogger) -> None:
19 | self.pipeline = pipeline
20 | self.state_manager = state_manager
21 | self.workflow_manager = workflow_manager
22 | self._response_logger = response_logger
23 |
24 | def flush_record(self, dialog_id: str):
25 | workflow_record = self.workflow_manager.flush_record(dialog_id)
26 | if 'timeout_response_task' in workflow_record:
27 | workflow_record['timeout_response_task'].cancel()
28 | return workflow_record
29 |
30 | async def register_msg(self, utterance, deadline_timestamp=None,
31 | require_response=False, **kwargs):
32 | dialog = await self.state_manager.get_or_create_dialog(**kwargs)
33 | dialog_id = str(dialog.id)
34 | service = self.pipeline.get_service_by_name('input')
35 | message_attrs = kwargs.pop('message_attrs', {})
36 |
37 | if require_response:
38 | event = asyncio.Event()
39 | kwargs['event'] = event
40 | kwargs['hold_flush'] = True
41 |
42 | self.workflow_manager.add_workflow_record(
43 | dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs)
44 | task_id = self.workflow_manager.add_task(dialog_id, service, utterance, 0)
45 | self._response_logger.log_start(task_id, {'dialog': dialog}, service)
46 | asyncio.create_task(self.process(task_id, utterance, message_attrs=message_attrs))
47 | if deadline_timestamp:
48 | self.workflow_manager.set_timeout_response_task(
49 | dialog_id, asyncio.create_task(self.timeout_process(dialog_id, deadline_timestamp))
50 | )
51 |
52 | if require_response:
53 | await event.wait()
54 | return self.flush_record(dialog_id)
55 |
56 | async def process(self, task_id, response: Any = None, **kwargs):
57 | workflow_record, task_data = self.workflow_manager.complete_task(task_id, response, **kwargs)
58 | if not workflow_record:
59 | return
60 | service = task_data['service']
61 |
62 | self._response_logger.log_end(task_id, workflow_record, service)
63 |
64 | if isinstance(response, Exception):
65 | # Skip all services, which are depends on failured one
66 | for i in service.dependent_services:
67 | self.workflow_manager.skip_service(workflow_record['dialog'].id, i)
68 | else:
69 | response_data = service.apply_response_formatter(response)
70 | # Updating workflow with service response
71 | if service.state_processor_method:
72 | await service.state_processor_method(
73 | dialog=workflow_record['dialog'], payload=response_data,
74 | label=service.label,
75 | message_attrs=kwargs.pop('message_attrs', {}), ind=task_data['ind']
76 | )
77 |
78 | # Processing the case, when service is a skill selector
79 | if service and service.is_sselector():
80 | skipped_services = {s for s in service.next_services if s.label not in set(response_data)}
81 |
82 | for s in skipped_services:
83 | self.workflow_manager.skip_service(workflow_record['dialog'].id, s)
84 |
85 | # Flush record and return zero next services if service is is_responder
86 | elif service.is_responder():
87 | if not workflow_record.get('hold_flush'):
88 | self.flush_record(workflow_record['dialog'].id)
89 | return
90 |
91 | # Calculating next steps
92 | done, waiting, skipped = self.workflow_manager.get_services_status(workflow_record['dialog'].id)
93 | next_services = self.pipeline.get_next_services(done, waiting, skipped)
94 |
95 | await self.create_processing_tasks(workflow_record, next_services)
96 |
97 | async def create_processing_tasks(self, workflow_record, next_services):
98 | for service in next_services:
99 | tasks = service.apply_dialog_formatter(workflow_record)
100 | for ind, task_data in enumerate(tasks):
101 | task_id = self.workflow_manager.add_task(workflow_record['dialog'].id, service, task_data, ind)
102 | self._response_logger.log_start(task_id, workflow_record, service)
103 | self.workflow_manager.set_task_object(
104 | workflow_record['dialog'].id,
105 | task_id,
106 | asyncio.create_task(
107 | service.connector_func(
108 | payload={'task_id': task_id, 'payload': task_data}, callback=self.process
109 | )
110 | )
111 | )
112 |
113 | async def timeout_process(self, dialog_id, deadline_timestamp):
114 | await asyncio.sleep(deadline_timestamp - time())
115 | workflow_record = self.workflow_manager.get_workflow_record(dialog_id)
116 | if not workflow_record:
117 | return
118 | next_services = [self.pipeline.timeout_service]
119 | for k, v in self.workflow_manager.get_pending_tasks(dialog_id).items():
120 | v['task_object'].cancel()
121 | self._response_logger.log_end(k, workflow_record, v['task_data']['service'], True)
122 |
123 | await self.create_processing_tasks(workflow_record, next_services)
124 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/connectors.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from typing import Any, Callable, Dict, List
3 | from collections import defaultdict
4 |
5 | import aiohttp
6 |
7 | from .transport.base import ServiceGatewayConnectorBase
8 |
9 |
10 | class HTTPConnector:
11 | def __init__(self, session: aiohttp.ClientSession, url: str):
12 | self.session = session
13 | self.url = url
14 |
15 | async def send(self, payload: Dict, callback: Callable):
16 | try:
17 | async with self.session.post(self.url, json=payload['payload']) as resp:
18 | resp.raise_for_status()
19 | response = await resp.json()
20 | await callback(
21 | task_id=payload['task_id'],
22 | response=response[0]
23 | )
24 | except Exception as e:
25 | response = e
26 | await callback(
27 | task_id=payload['task_id'],
28 | response=response
29 | )
30 |
31 |
32 | class AioQueueConnector:
33 | def __init__(self, queue):
34 | self.queue = queue
35 |
36 | async def send(self, payload: Dict, **kwargs):
37 | await self.queue.put(payload)
38 |
39 |
40 | class QueueListenerBatchifyer:
41 | def __init__(self, session, url, queue, batch_size):
42 | self.session = session
43 | self.url = url
44 | self.queue = queue
45 | self.batch_size = batch_size
46 |
47 | async def call_service(self, process_callable):
48 | while True:
49 | batch = []
50 | rest = self.queue.qsize()
51 | for _ in range(min(self.batch_size, rest)):
52 | item = await self.queue.get()
53 | batch.append(item)
54 | if batch:
55 | model_payload = self.glue_tasks(batch)
56 | async with self.session.post(self.url, json=model_payload) as resp:
57 | response = await resp.json()
58 | for task, task_response in zip(batch, response):
59 | asyncio.create_task(
60 | process_callable(
61 | task_id=task['task_id'],
62 | response=task_response
63 | )
64 | )
65 | await asyncio.sleep(0.1)
66 |
67 | def glue_tasks(self, batch):
68 | if len(batch) == 1:
69 | return batch[0]['payload']
70 | else:
71 | result = {k: [] for k in batch[0]['payload'].keys()}
72 | for el in batch:
73 | for k in result.keys():
74 | result[k].extend(el['payload'][k])
75 | return result
76 |
77 |
78 | class ConfidenceResponseSelectorConnector:
79 | async def send(self, payload: Dict, callback: Callable):
80 | try:
81 | response = payload['payload']['utterances'][-1]['hypotheses']
82 | best_skill = max(response, key=lambda x: x['confidence'])
83 | await callback(
84 | task_id=payload['task_id'],
85 | response=best_skill
86 | )
87 | except Exception as e:
88 | await callback(
89 | task_id=payload['task_id'],
90 | response=e
91 | )
92 |
93 |
94 | class EventSetOutputConnector:
95 | def __init__(self, service_name: str):
96 | self.service_name = service_name
97 |
98 | async def send(self, payload, callback: Callable):
99 | event = payload['payload'].get('event', None)
100 | if not event or not isinstance(event, asyncio.Event):
101 | raise ValueError("'event' key is not presented in payload")
102 | await callback(
103 | task_id=payload['task_id'],
104 | response=" "
105 | )
106 | event.set()
107 |
108 |
109 | class AgentGatewayToChannelConnector:
110 | pass
111 |
112 |
113 | class AgentGatewayToServiceConnector:
114 | _to_service_callback: Callable
115 | _service_name: str
116 |
117 | def __init__(self, to_service_callback: Callable, service_name: str):
118 | self._to_service_callback = to_service_callback
119 | self._service_name = service_name
120 |
121 | async def send(self, payload: Dict, **_kwargs):
122 | await self._to_service_callback(payload=payload, service_name=self._service_name)
123 |
124 |
125 | class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase):
126 | _session: aiohttp.ClientSession
127 | _url: str
128 | _service_name: str
129 |
130 | def __init__(self, service_config: Dict) -> None:
131 | super().__init__(service_config)
132 | self._session = aiohttp.ClientSession()
133 | self._service_name = service_config['name']
134 | self._url = service_config['url']
135 |
136 | async def send_to_service(self, payloads: List[Dict]) -> List[Any]:
137 | batch = defaultdict(list)
138 | for payload in payloads:
139 | for key, value in payload.items():
140 | batch[key].extend(value)
141 | async with await self._session.post(self._url, json=batch) as resp:
142 | responses_batch = await resp.json()
143 |
144 | return responses_batch
145 |
146 |
147 | class PredefinedTextConnector:
148 | def __init__(self, response_text, annotations=None):
149 | self.response_text = response_text
150 | self.annotations = annotations or {}
151 |
152 | async def send(self, payload: Dict, callback: Callable):
153 | await callback(
154 | task_id=payload['task_id'],
155 | response={'text': self.response_text, 'annotations': self.annotations}
156 | )
157 |
158 |
159 | class PredefinedOutputConnector:
160 | def __init__(self, output):
161 | self.output = output
162 |
163 | async def send(self, payload: Dict, callback: Callable):
164 | await callback(
165 | task_id=payload['task_id'],
166 | response=self.output
167 | )
168 |
--------------------------------------------------------------------------------
/docs/source/intro/overview.rst:
--------------------------------------------------------------------------------
1 | Architecture Overview
2 | =====================
3 |
4 | Modern virtual assistants such as Amazon Alexa and Google assistants integrate and orchestrate different
5 | conversational skills to address a wide spectrum of user's tasks.
6 | **DeepPavlov Agent** is a framework for development of scalable and production ready *multi-skill virtual assistants*,
7 | complex dialogue systems and chatbots.
8 |
9 | Key features:
10 |
11 | * scalability and reliability in highload environment due to micro-service architecture
12 | * ease of adding and orchestrating conversational skills
13 | * shared memory of dialog state_ and NLP annotations accessible to all skills
14 |
15 | **Core concepts of DeepPavlov Agent architecture:**
16 |
17 | * ``Utterance`` is a single message produced by a human or a bot;
18 |
19 | * ``Service`` is a NLP model or any other external service that supports a REST API.
20 |
21 | DeepPavlov Agent orchestrates following types of services:
22 |
23 | * ``Annotator`` is a service for NLP preprocessing of an utterance. It can implement some basic text processing like spell correction, named entity recognition, etc.;
24 |
25 | * ``Skill`` is a service producing a conversational response for a current dialogue state;
26 |
27 | * ``Skill Selector`` is a service that selects a subset of available skills
28 | for producing candidate responses;
29 |
30 | * ``Response Selector`` is a service selecting out of available candidates a response to be sent to the user;
31 |
32 | * ``Postprocessor`` is a service postprocessing a response utterance. It can make some basic things
33 | like adding a user name, inserting emojis, etc.
34 |
35 | * ``Postprocessed Response`` is a final postprocessed conversational agent utterance that is shown to the user.
36 |
37 | * ``State`` stores current dialogs between users and a conversational agent as well as other
38 | infromation serialized in a **json** format. State is used to share information
39 | across the services and stores all required information about the current dialogs.
40 | Dialogue state is documented `here `__.
41 |
42 | .. image:: ../_static/Agent_Pipeline_v2.png
43 | :height: 400
44 | :align: center
45 | :alt: Diagram of DeepPavlov Agent Architecture
46 |
47 | Installation
48 | ============
49 |
50 | Deeppavlov agent requires python >= 3.7 and can be installed from pip.
51 |
52 | .. code:: bash
53 |
54 | pip install deeppavlov_agent
55 |
56 |
57 | Running the Agent
58 | =================
59 |
60 | Agent can be run inside a container or on a local machine. The default Agent port is **4242**.
61 | To launch the agent enter:
62 |
63 | .. code:: bash
64 |
65 | python -m deeppavlov_agent.run -ch http_client -p 4242 -pl pipeline_conf.json -db db_conf.json -rl -d
66 |
67 | Command parameters are:
68 |
69 | * -ch - output channel for agent. Could be either ``http_client`` or ``cmd_client``
70 | * -p - port for http_client, default value is 4242
71 | * -pl - pipeline config path, you can use multiple pipeline configs at the time, next one will update previous
72 | * -d - database config path
73 | * -rl - include response logger
74 | * -d - launch in debug mode (additional data in http output)
75 |
76 |
77 | **HTTP api server**
78 | -------------------
79 |
80 | 1. **Web server accepts POST requests with application/json content-type**
81 |
82 | Request should be in form:
83 |
84 | .. code:: javascript
85 |
86 | {
87 | "user_id": "unique id of user",
88 | "payload": "phrase, which should be processed by agent"
89 | }
90 |
91 | Example of running request with curl:
92 |
93 | .. code:: bash
94 |
95 | curl --header "Content-Type: application/json" \
96 | --request POST \
97 | --data '{"user_id":"xyz","payload":"hello"}' \
98 | http://localhost:4242
99 |
100 | Agent returns a json response:
101 |
102 | .. code:: javascript
103 |
104 | {
105 | "user_id": "same user id as in request",
106 | "response": "phrase, which were generated by skills in order to respond"
107 | }
108 |
109 | In case of wrong format, HTTP errors will be returned.
110 |
111 | 2. **Arbitrary input format of the Agent Server**
112 |
113 | If you want to send anything to the Agent, except
114 | ``user_id`` and ``payload``, just pass it as an additional key-value item, for example:
115 |
116 | .. code:: bash
117 |
118 | curl --header "Content-Type: application/json" \
119 | --request POST \
120 | --data '{"user_id":"xyz","payload":"hello", "my_custom_dialog_id": 111}' \
121 | http://localhost:4242
122 |
123 | All additional items will be stored in the Agents ``state`` into the ``attributes`` field of a ``HumanUtterance``.
124 | Dialogue state is documented `here `__
125 |
126 | 3. **Retrieve dialogs from the database through GET requests**
127 |
128 | Dialogs' history is returned in json format which can be easily prettifyed with various browser extensions.
129 |
130 | Logs can be accessed at (examples are shown for the case when the agent is running on http://localhost:4242):
131 |
132 | * http://localhost:4242/api/dialogs/ - provides exact dialog
133 | * http://localhost:4242/api/user/ - provides all dialogs by user_id
134 |
135 | 4. **Load analytics**
136 |
137 | Number of processing tasks and average response time for both the agent and separate services are
138 | provided in a real time on the page http://localhost:4242/debug/current_load .
139 |
140 |
141 | Analyzing the data
142 | ==================
143 |
144 | History of the agent's ``state`` for all dialogues is stored to a Mongo DB. The state_ includes utterences from user with corresponding responses. It also includes all the additional data generated by agent's services.
145 | Following Mongo collections can be dumped separately:
146 |
147 | * Human
148 | * Bot
149 | * User (Human & Bot)
150 | * HumanUtterance
151 | * BotUtterance
152 | * Utterance (HumanUtterance & BotUtterance)
153 | * Dialog
154 |
155 |
156 | .. _state: https://deeppavlov-agent.readthedocs.io/en/latest/_static/api.html
157 |
--------------------------------------------------------------------------------
/docs/source/built_in/connectors.rst:
--------------------------------------------------------------------------------
1 | Built-in connectors
2 | ===================
3 |
4 | Generally, connector is a python class with a method ``send``. It can be either a model, nn or rule based, or implementation of some transport protocols.
5 | Although, we strongly recommend to implement nn models as an external services.
6 |
7 | We have two different connectors for HTTP protocol as a built-in ones. Single sample and batchifying. Of course you can send a batch of samples to your model using single sample connector, but in this case you should form the batch with proper dialog formatter.
8 | Batchifying connector will form batch from samples, available at the time, but can't guarantee actual batch size, only it's maximum size.
9 |
10 | There are three more connectors, which can be used for different purposes. Each of them can be configurend as a *python* connector with it's name
11 | You can read more on the connectors configuration in :ref:`connectors-config`.
12 |
13 | Built-in python connectors
14 | ==========================
15 |
16 | ConfidenceResponseSelectorConnector
17 | -----------------------------------
18 |
19 | This connector provides a simple response selection functionality. It chooses a best hypothesis based on its ``confidence`` parameter. In order to use it, you should consider a few things:
20 |
21 | * You don't need to define a dialog formatter (if you use built-in state manager)
22 | * You need to ensure, that all of your skills (or services with assighed ``add_hypothesis`` SM method) provides a ``confidence`` value somehow
23 | * It returns a chosen hypothesis, so you don't need to define output formatter as well
24 | * No special configuration parameters are needed
25 |
26 | So the basic configuration for it is very simple:
27 |
28 | .. code:: json
29 |
30 | {"response_selector": {
31 | "connector": {
32 | "protocol": "python",
33 | "class_name": "ConfidenceResponseSelectorConnector"
34 | },
35 | "state_manager_method": "add_bot_utterance",
36 | "previous_services": ["place previous skill names here"]
37 | }}
38 |
39 | PredefinedTextConnector
40 | -----------------------
41 |
42 | This connector can be used in order to provide a simple way to answer in time, or in case of errors in your pipeline. It returns a basic parameters, which can be used to form a proper bot utterance.
43 |
44 | * ``text`` parameter will be a body of a bot utterance
45 | * Additionally, you can provide an ``annotations`` parameter, in case if you need to have a certain annotations for further dialog
46 | * There is no need to configure a dialog and response formatters
47 |
48 | This example configuration represents simple last chance service:
49 |
50 | .. code:: json
51 |
52 | {"last_chance_service": {
53 | "connector": {
54 | "protocol": "python",
55 | "class_name": "PredefinedTextConnector",
56 | "response_text": "Sorry, something went wrong inside. Please tell me, what did you say."
57 | "annotations": {"ner": "place your annotations here"}
58 | },
59 | "state_manager_method": "add_bot_utterance_last_chance",
60 | "tags": ["last_chance"]
61 | }}
62 |
63 | More on last chance and timeout service configuration here:
64 |
65 |
66 | PredefinedOutputConnector
67 | -------------------------
68 |
69 | This connector is quite similar to PredefinedTextConnector. It returns a predefined values, but instead of fixed ``text`` and ``annotations`` keys, it can be configured to return any arbitrary json compatible data structure.
70 | The main purpose of this connector class is testing of pipeline routing, formatting or outputs. You can make a dummy service, which will imitate (in terms of structure) the response of desired model.
71 | This connector have only one initialisation parameter:
72 |
73 | * ``output`` - list or dict, which will be passed to agent's callback as payload
74 |
75 | This example configuration represents a dummy service, representing skill:
76 |
77 | .. code:: json
78 |
79 | {"skill": {
80 | "connector": {
81 | "protocol": "python",
82 | "class_name": "PredefinedOutputConnector",
83 | "output": [{"text": "Hypotheses1", "confidence": 1}]
84 | },
85 | "dialog_formatter": "place your dialog formatter here",
86 | "response_formatter": "place your response formatter here",
87 | "state_manager_method": "add_hypothesis",
88 | "previous_services": ["list of the previous_services"]
89 | }}
90 |
91 | But you can imitate any skill type with this connector.
92 |
93 |
94 | Writing your own connectors
95 | ===========================
96 |
97 | In order to define your own connector, you should follow these requirements:
98 |
99 | * It should be a python class
100 | * You can pass initialisation parameters to it via :ref:`connectors-config` python class
101 | * You need to implement an asynchronous method ``send(self, payload: Dict, callback: Callable)``
102 | * It should return a result to agent using ``callback`` function
103 | * ``payload`` input parameter is a dict of following structure:
104 |
105 | .. code:: json
106 |
107 | {
108 | "task_id": "unique identifyer of processing task",
109 | "payload": "single task output, of the associated dialog formatter"
110 | }
111 |
112 | So basically, your connector should look like this:
113 |
114 | .. code:: python
115 |
116 | class MyConnector:
117 | def __init__(self, **kwargs):
118 | # Your code here
119 |
120 | async def send(self, payload: Dict, callback: Callable):
121 | try:
122 | # Write processing part here
123 | await callback(
124 | task_id=payload['task_id'],
125 | response=response # Supposing that result of the processing is stored in a variable named "response"
126 | )
127 | except Exception as e:
128 | # That part allows agent to correctly process service internal errors
129 | # and call a "last chane" service without stopping the ongoing dialogs
130 | response = e
131 | await callback(
132 | task_id=payload['task_id'],
133 | response=response
134 | )
135 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | # import os
16 | # import sys
17 | # sys.path.insert(0, os.path.abspath('.'))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'deeppavlov-agent'
23 | copyright = '2019, Moscow Institute of Physics and Technology (MIPT)'
24 | author = 'mipt'
25 |
26 | # The short X.Y version
27 | version = '0.1'
28 | # The full version, including alpha/beta/rc tags
29 | release = 'v0.1-alpha'
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | #
36 | # needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | 'sphinx.ext.autodoc',
43 | 'sphinx.ext.doctest',
44 | 'sphinx.ext.intersphinx',
45 | 'sphinx.ext.todo',
46 | 'sphinx.ext.coverage',
47 | 'sphinx.ext.imgmath',
48 | 'sphinx.ext.ifconfig',
49 | 'sphinx.ext.viewcode',
50 | 'sphinx.ext.githubpages',
51 | 'sphinx.ext.autosectionlabel',
52 | ]
53 |
54 | # Add any paths that contain templates here, relative to this directory.
55 | templates_path = ['_templates']
56 |
57 | # The suffix(es) of source filenames.
58 | # You can specify multiple suffix as a list of string:
59 | #
60 | source_suffix = ['.rst', '.md']
61 | # source_suffix = '.rst'
62 |
63 | # The master toctree document.
64 | master_doc = 'index'
65 |
66 | # The language for content autogenerated by Sphinx. Refer to documentation
67 | # for a list of supported languages.
68 | #
69 | # This is also used if you do content translation via gettext catalogs.
70 | # Usually you set "language" from the command line for these cases.
71 | language = None
72 |
73 | # List of patterns, relative to source directory, that match files and
74 | # directories to ignore when looking for source files.
75 | # This pattern also affects html_static_path and html_extra_path.
76 | exclude_patterns = []
77 |
78 | # The name of the Pygments (syntax highlighting) style to use.
79 | pygments_style = None
80 |
81 |
82 | # -- Options for HTML output -------------------------------------------------
83 |
84 | # The theme to use for HTML and HTML Help pages. See the documentation for
85 | # a list of builtin themes.
86 | #
87 | html_theme = 'sphinx_rtd_theme'
88 |
89 | # Theme options are theme-specific and customize the look and feel of a theme
90 | # further. For a list of options available for each theme, see the
91 | # documentation.
92 | #
93 | # html_theme_options = {}
94 |
95 | # Add any paths that contain custom static files (such as style sheets) here,
96 | # relative to this directory. They are copied after the builtin static files,
97 | # so a file named "default.css" will overwrite the builtin "default.css".
98 | html_static_path = ['_static']
99 |
100 | # Custom sidebar templates, must be a dictionary that maps document names
101 | # to template names.
102 | #
103 | # The default sidebars (for documents that don't match any pattern) are
104 | # defined by theme itself. Builtin themes are using these templates by
105 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
106 | # 'searchbox.html']``.
107 | #
108 | # html_sidebars = {}
109 |
110 |
111 | # -- Options for HTMLHelp output ---------------------------------------------
112 |
113 | # Output file base name for HTML help builder.
114 | htmlhelp_basename = 'dp-agentdoc'
115 |
116 |
117 | # -- Options for LaTeX output ------------------------------------------------
118 |
119 | latex_elements = {
120 | # The paper size ('letterpaper' or 'a4paper').
121 | #
122 | # 'papersize': 'letterpaper',
123 |
124 | # The font size ('10pt', '11pt' or '12pt').
125 | #
126 | # 'pointsize': '10pt',
127 |
128 | # Additional stuff for the LaTeX preamble.
129 | #
130 | # 'preamble': '',
131 |
132 | # Latex figure (float) alignment
133 | #
134 | # 'figure_align': 'htbp',
135 | }
136 |
137 | # Grouping the document tree into LaTeX files. List of tuples
138 | # (source start file, target name, title,
139 | # author, documentclass [howto, manual, or own class]).
140 | latex_documents = [
141 | (master_doc, 'dp-agent.tex', 'dp-agent Documentation',
142 | 'mipt', 'manual'),
143 | ]
144 |
145 |
146 | # -- Options for manual page output ------------------------------------------
147 |
148 | # One entry per manual page. List of tuples
149 | # (source start file, name, description, authors, manual section).
150 | man_pages = [
151 | (master_doc, 'dp-agent', 'dp-agent Documentation',
152 | [author], 1)
153 | ]
154 |
155 |
156 | # -- Options for Texinfo output ----------------------------------------------
157 |
158 | # Grouping the document tree into Texinfo files. List of tuples
159 | # (source start file, target name, title, author,
160 | # dir menu entry, description, category)
161 | texinfo_documents = [
162 | (master_doc, 'deeppavlov-agent', 'deeppavlov-agent Documentation',
163 | author, 'deeppavlov-agent', 'One line description of project.',
164 | 'Miscellaneous'),
165 | ]
166 |
167 |
168 | # -- Options for Epub output -------------------------------------------------
169 |
170 | # Bibliographic Dublin Core info.
171 | epub_title = project
172 |
173 | # The unique identifier of the text. This can be a ISBN number
174 | # or the project homepage.
175 | #
176 | # epub_identifier = ''
177 |
178 | # A unique identification for the text.
179 | #
180 | # epub_uid = ''
181 |
182 | # A list of files that should not be packed into the epub file.
183 | epub_exclude_files = ['search.html']
184 |
185 |
186 | # -- Extension configuration -------------------------------------------------
187 |
188 | # -- Options for intersphinx extension ---------------------------------------
189 |
190 | # Example configuration for intersphinx: refer to the Python standard library.
191 | intersphinx_mapping = {'https://docs.python.org/': None}
192 |
193 | # -- Options for todo extension ----------------------------------------------
194 |
195 | # If true, `todo` and `todoList` produce output, else they produce nothing.
196 | todo_include_todos = True
197 |
--------------------------------------------------------------------------------
/deeppavlov_agent/http_api/handlers.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from datetime import datetime
3 | from string import hexdigits
4 | from time import time
5 |
6 | import aiohttp
7 | import aiohttp_jinja2
8 | from aiohttp import web
9 |
10 |
11 | async def handle_command(payload, user_id, state_manager):
12 | if payload in {'/start', '/close'} and state_manager:
13 | await state_manager.drop_active_dialog(user_id)
14 | return True
15 |
16 |
17 | class ApiHandler:
18 | def __init__(self, output_formatter, response_time_limit=5):
19 | self.output_formatter = output_formatter
20 | self.response_time_limit = response_time_limit
21 |
22 | async def handle_api_request(self, request):
23 | response = {}
24 | register_msg = request.app['agent'].register_msg
25 | if request.method == 'POST':
26 | if 'content-type' not in request.headers \
27 | or not request.headers['content-type'].startswith('application/json'):
28 | raise web.HTTPBadRequest(reason='Content-Type should be application/json')
29 | data = await request.json()
30 |
31 | user_id = data.pop('user_id')
32 | payload = data.pop('payload', '')
33 |
34 | deadline_timestamp = None
35 | if self.response_time_limit:
36 | deadline_timestamp = time() + self.response_time_limit
37 |
38 | if not user_id:
39 | raise web.HTTPBadRequest(reason='user_id key is required')
40 |
41 | command_performed = await handle_command(payload, user_id, request.app['agent'].state_manager)
42 | if command_performed:
43 | return web.json_response({})
44 |
45 | response = await asyncio.shield(
46 | register_msg(utterance=payload, user_external_id=user_id,
47 | user_device_type=data.pop('user_device_type', 'http'),
48 | date_time=datetime.now(),
49 | location=data.pop('location', ''),
50 | channel_type='http_client',
51 | message_attrs=data, require_response=True,
52 | deadline_timestamp=deadline_timestamp)
53 | )
54 |
55 | if response is None:
56 | raise RuntimeError('Got None instead of a bot response.')
57 |
58 | return web.json_response(self.output_formatter(response['dialog'].to_dict()))
59 |
60 | async def dialog(self, request):
61 | state_manager = request.app['agent'].state_manager
62 | dialog_id = request.match_info['dialog_id']
63 | if len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id):
64 | dialog_obj = await state_manager.get_dialog_by_id(dialog_id)
65 | if not dialog_obj:
66 | raise web.HTTPNotFound(reason=f'dialog with id {dialog_id} does not exist')
67 | return web.json_response(dialog_obj.to_dict())
68 | raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string')
69 |
70 | async def dialogs_by_user(self, request):
71 | state_manager = request.app['agent'].state_manager
72 | user_external_id = request.match_info['user_external_id']
73 | dialogs = await state_manager.get_dialogs_by_user_ext_id(user_external_id)
74 | return web.json_response([i.to_dict() for i in dialogs])
75 |
76 | async def dialog_rating(self, request):
77 | state_manager = request.app['agent'].state_manager
78 | data = await request.json()
79 | dialog_id = data.pop('dialog_id')
80 | user_id = data.pop('user_id', None)
81 | rating = data.pop('rating')
82 | await state_manager.set_rating_dialog(user_id, dialog_id, rating)
83 | return web.Response()
84 |
85 | async def utterance_rating(self, request):
86 | state_manager = request.app['agent'].state_manager
87 | data = await request.json()
88 | user_id = data.pop('user_id', None)
89 | rating = data.pop('rating')
90 | utt_id = data.pop('utt_id')
91 | await state_manager.set_rating_utterance(user_id, utt_id, rating)
92 | return web.Response()
93 |
94 | async def options(self, request):
95 | return web.Response(headers={'Access-Control-Allow-Methods': 'POST, OPTIONS'})
96 |
97 |
98 | class PagesHandler:
99 | def __init__(self, debug=False):
100 | self.debug = debug
101 |
102 | async def ping(self, request):
103 | return web.json_response("pong")
104 |
105 | async def options(self, request):
106 | return web.Response(headers={'Access-Control-Allow-Methods': 'GET, OPTIONS'})
107 |
108 |
109 | class WSstatsHandler:
110 | def __init__(self):
111 | self.update_time = 0.5
112 |
113 | @aiohttp_jinja2.template('services_ws_highcharts.html')
114 | async def ws_page(self, request):
115 | return {}
116 |
117 | async def ws_handler(self, request):
118 | ws = web.WebSocketResponse()
119 | await ws.prepare(request)
120 | request.app['websockets'].append(ws)
121 | logger_stats = request.app['logger_stats']
122 | while True:
123 | data = dict(logger_stats.get_current_load())
124 | await ws.send_json(data)
125 | await asyncio.sleep(self.update_time)
126 |
127 | return ws
128 |
129 | async def options(self, request):
130 | return web.Response(headers={'Access-Control-Allow-Methods': 'GET, OPTIONS'})
131 |
132 |
133 | class WSChatHandler:
134 | def __init__(self, output_formatter):
135 | self.output_formatter = output_formatter
136 |
137 | @aiohttp_jinja2.template('chat.html')
138 | async def ws_page(self, request):
139 | return {}
140 |
141 | async def ws_handler(self, request):
142 | register_msg = request.app['agent'].register_msg
143 | ws = web.WebSocketResponse()
144 | await ws.prepare(request)
145 | while True:
146 | msg = await ws.receive()
147 | if msg.type == aiohttp.WSMsgType.text:
148 | data = msg.json()
149 | user_id = data.pop('user_id')
150 | payload = data.pop('payload', '')
151 | deadline_timestamp = None
152 | if not user_id:
153 | raise web.HTTPBadRequest(reason='user_id key is required')
154 | command_performed = await handle_command(payload, user_id, request.app['agent'].state_manager)
155 | if command_performed:
156 | await ws.send_json('command_performed')
157 | continue
158 |
159 | response = await register_msg(
160 | utterance=payload, user_external_id=user_id,
161 | user_device_type=data.pop('user_device_type', 'websocket'),
162 | date_time=datetime.now(),
163 | location=data.pop('location', ''),
164 | channel_type='ws_client',
165 | message_attrs=data, require_response=True,
166 | deadline_timestamp=deadline_timestamp
167 | )
168 | if response is None:
169 | raise RuntimeError('Got None instead of a bot response.')
170 | await ws.send_json(self.output_formatter(response['dialog'].to_dict()))
171 | else:
172 | await ws.close()
173 | break
174 |
175 | return ws
176 |
177 | async def options(self, request):
178 | return web.Response(headers={'Access-Control-Allow-Methods': 'GET, OPTIONS'})
179 |
--------------------------------------------------------------------------------
/deeppavlov_agent/core/state_manager.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | from datetime import datetime
4 |
5 | from .state_schema import Bot, BotUtterance, Dialog, Human, HumanUtterance
6 | import logging
7 |
8 |
9 | class StateManager:
10 | def __init__(self, db):
11 | self._db = db
12 |
13 | async def add_human_utterance(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
14 | dialog.add_human_utterance()
15 | dialog.utterances[-1].text = payload
16 | dialog.utterances[-1].user = dialog.human.to_dict()
17 | dialog.utterances[-1].attributes = kwargs.get('message_attrs', {})
18 |
19 | async def add_hypothesis(self, dialog: Dialog, payload: Dict, label: str, **kwargs):
20 | hypothesis = {'skill_name': label, 'annotations': {}}
21 | for h in payload:
22 | dialog.utterances[-1].hypotheses.append({**hypothesis, **h})
23 |
24 | async def add_annotation(self, dialog: Dialog, payload: Dict, label: str, **kwargs):
25 | dialog.utterances[-1].annotations[label] = payload
26 |
27 | async def add_annotation_prev_bot_utt(self, dialog: Dialog, payload: Dict, label: str, **kwargs):
28 | if len(dialog.utterances) > 1:
29 | dialog.utterances[-2].annotations[label] = payload
30 | dialog.utterances[-2].actual = False
31 |
32 | async def add_hypothesis_annotation(self, dialog: Dialog, payload: Dict, label: str, **kwargs):
33 | ind = kwargs['ind']
34 | dialog.utterances[-1].hypotheses[ind]['annotations'][label] = payload
35 |
36 | async def add_hypothesis_annotation_batch(self, dialog: Dialog, payload: Dict, label: str, **kwargs):
37 | if isinstance(dialog.utterances[-1], BotUtterance):
38 | return
39 | if len(dialog.utterances[-1].hypotheses) != len(payload["batch"]):
40 | for i in range(len(dialog.utterances[-1].hypotheses)):
41 | dialog.utterances[-1].hypotheses[i]['annotations'][label] = {}
42 | else:
43 | for i in range(len(payload["batch"])):
44 | dialog.utterances[-1].hypotheses[i]['annotations'][label] = payload["batch"][i]
45 |
46 | async def add_text(self, dialog: Dialog, payload: str, label: str, **kwargs):
47 | dialog.utterances[-1].text = payload
48 |
49 | async def update_human(self, human: Human, active_skill: Dict):
50 | attributes = active_skill.get('human_attributes', {})
51 | for attr_name, attr_value in attributes.items():
52 | if attr_name in human.to_dict():
53 | setattr(human, attr_name, attr_value)
54 | elif attr_name in human.profile:
55 | human.profile[attr_name] = attr_value
56 | else:
57 | human.attributes[attr_name] = attr_value
58 |
59 | async def update_bot(self, bot: Bot, active_skill: Dict):
60 | attributes = active_skill.get('bot_attributes', {})
61 | for attr_name, attr_value in attributes.items():
62 | if attr_name in bot.to_dict():
63 | setattr(bot, attr_name, attr_value)
64 | else:
65 | bot.attributes[attr_name] = attr_value
66 |
67 | async def add_bot_utterance(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
68 | await self.update_human(dialog.human, payload)
69 | await self.update_bot(dialog.bot, payload)
70 | dialog.add_bot_utterance()
71 | dialog.utterances[-1].text = payload['text']
72 | dialog.utterances[-1].active_skill = payload['skill_name']
73 | dialog.utterances[-1].confidence = payload['confidence']
74 | dialog.utterances[-1].annotations = payload.get('annotations', {})
75 | dialog.utterances[-1].user = dialog.bot.to_dict()
76 |
77 | async def add_bot_utterance_last_chance(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
78 | if isinstance(dialog.utterances[-1], HumanUtterance):
79 | dialog.add_bot_utterance()
80 | dialog.utterances[-1].text = payload['text']
81 | dialog.utterances[-1].active_skill = label
82 | dialog.utterances[-1].confidence = 0
83 | dialog.utterances[-1].annotations = payload['annotations']
84 | dialog.utterances[-1].user = dialog.bot.to_dict()
85 |
86 | async def add_bot_utterance_last_chance_overwrite(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
87 | if isinstance(dialog.utterances[-1], HumanUtterance):
88 | dialog.add_bot_utterance()
89 | dialog.utterances[-1].text = payload['text']
90 | dialog.utterances[-1].active_skill = label
91 | dialog.utterances[-1].confidence = 0
92 | dialog.utterances[-1].annotations = payload['annotations']
93 | dialog.utterances[-1].user = dialog.bot.to_dict()
94 |
95 | async def add_failure_bot_utterance(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
96 | dialog.add_bot_utterance()
97 | dialog.utterances[-1].text = payload
98 | dialog.utterances[-1].active_skill = label
99 | dialog.utterances[-1].confidence = 0
100 | dialog.utterances[-1].user = dialog.bot.to_dict()
101 |
102 | async def save_dialog(self, dialog: Dialog, payload: Dict, label: str, **kwargs) -> None:
103 | await dialog.save(self._db)
104 |
105 | async def get_or_create_dialog(self, user_external_id, channel_type, **kwargs):
106 | return await Dialog.get_or_create_by_ext_id(self._db, user_external_id, channel_type)
107 |
108 | async def get_dialog_by_id(self, dialog_id):
109 | return await Dialog.get_by_id(self._db, dialog_id)
110 |
111 | async def get_dialogs_by_user_ext_id(self, user_external_id):
112 | return await Dialog.get_many_by_ext_id(self._db, user_external_id)
113 |
114 | async def get_all_dialogs(self):
115 | return await Dialog.get_all(self._db)
116 |
117 | async def drop_active_dialog(self, user_external_id):
118 | user = await Human.get_or_create(self._db, user_external_id)
119 | await Dialog.drop_active(self._db, user._id)
120 |
121 | async def set_rating_dialog(self, user_external_id, dialog_id, rating):
122 | dialog = await Dialog.get_by_dialog_id(self._db, dialog_id, False)
123 | if not dialog:
124 | return False
125 | if 'ratings' not in dialog.attributes:
126 | dialog.attributes['ratings'] = []
127 | dialog.attributes['ratings'].append({'rating': rating, 'user_external_id': user_external_id, 'datetime': datetime.now()})
128 | await dialog.save(self._db)
129 |
130 | async def set_rating_utterance(self, user_external_id, utt_id, rating):
131 | utt = await BotUtterance.get_by_id(self._db, utt_id)
132 | if not utt:
133 | return False
134 | if 'ratings' not in utt.attributes:
135 | utt.attributes['ratings'] = []
136 | utt.attributes['ratings'].append({'rating': rating, 'user_external_id': user_external_id, 'datetime': datetime.now()})
137 | await utt.save(self._db)
138 |
139 | async def drop_and_rating_active_dialog(self, user_external_id, rating):
140 | user = await Human.get_or_create(self._db, user_external_id)
141 | await Dialog.set_rating_drop_active(self._db, user._id, rating)
142 |
143 | async def prepare_db(self):
144 | await BotUtterance.prepare_collection(self._db)
145 | await HumanUtterance.prepare_collection(self._db)
146 | await Human.prepare_collection(self._db)
147 | await Dialog.prepare_collection(self._db)
148 |
149 | async def get_channels(self):
150 | return await Dialog.get_channels(self._db)
151 |
--------------------------------------------------------------------------------
/docs/source/config/config.rst:
--------------------------------------------------------------------------------
1 | Agent Configuration
2 | ====================
3 |
4 | Configuration of pipeline and database for the **Agent** can be defined
5 | in ``json`` or ``yml`` file.
6 |
7 | Database Config Description
8 | ===========================
9 |
10 | Database configuration parameters are provided via ``db_conf`` file. Currently, agent supports Mongo DB.
11 |
12 | All default values are taken from `Mongo DB documentation `__.
13 | Please refer to these docs if you need to change anything.
14 |
15 | Example of a database config:
16 |
17 | .. code-block:: json
18 |
19 | {
20 | "env": false,
21 | "host": "mongo",
22 | "port": 27017,
23 | "name": "dp_agent"
24 | }
25 |
26 | * **env**
27 | * If set to **false** (or not mentioned), specified parameters' values will be used for db initialisation. Otherwise, agent will try to get an environmental variable by name, associated with parameter.
28 | * **host**
29 | * A database host, or env variable, where database host name is stored.
30 | * **port**
31 | * A database port, or env variable, where database port is stored.
32 | * **name**
33 | * An name of the database, or env variable, where name of the database is stored.
34 |
35 |
36 | Pipeline Config Description
37 | ===========================
38 |
39 | Pipeline configuration parameters are specified in ``pipeline_conf`` file.
40 | There are two different sections in ``pipeline_conf`` to configure Connectors and Services.
41 |
42 | .. _services-config:
43 |
44 | **Services Config**
45 | -------------------
46 |
47 | Service is a single node of pipeline graph, or a single step in processing of user message.
48 | In ``pipeline_conf`` all services are grouped under ``service`` key.
49 |
50 | Example of a service config:
51 |
52 | .. code-block:: json
53 |
54 | {"group_name": {
55 | "service_label": {
56 | "dialog_formatter": "dialog formatter",
57 | "response_formatter": "response formatter",
58 | "connector": "used connector",
59 | "previous_services": "list of previous services",
60 | "required_previous_services": "list of previous services",
61 | "state_manager_method": "associated state manager method",
62 | "tags": "list of tags"
63 | }
64 | }
65 | }
66 |
67 | * **group name**
68 | * This is an optional key. If it is specified then services can be referenced by their `group name` in ``previous_services`` and ``required_previous_services``.
69 | * If `group name` is specified then the service name is ``.``.
70 | * **service_label**
71 | * Label of the service. Used as a unique service name, if service is not grouped.
72 | * Passed to a state manager method, associated with the service. So,``service_label`` is saved in state.
73 | * **dialog_formatter**
74 | * Generates list of tasks for services from a dialog state.
75 | * Can be configured as ``:``.
76 | * Formatter can generate several tasks from the same dialog, for example, if you want to annotate all hypotheses.
77 | * Each generated task corresponds to a single valid request payload to be processed by service without further formatting.
78 | * **response_formatter**
79 | * Maps a service response to the format of dialog state.
80 | * Can be configured as ``:``.
81 | * Optional parameter. If not specified then unformatted service output is sent to state manager method.
82 | * **connector**
83 | * Specifies a connector to a service. Can be configured here, or in `connectors` section.
84 | * Can be configured as ``:``.
85 | * **previous_services**
86 | * List of services to be executed (or skipped, or respond with an error) before sending data to the service.
87 | * Should contain either group names or service names.
88 | * **required_previous_services**
89 | * List of services to be completed correctly before the service, because it depends on their output.
90 | * If at least one of the ``required_previous_services`` is skipped or finished with an error, the service is not executed.
91 | * Should contain either group names or service names.
92 | * **state_manager_method**
93 | * Name of a ``StateManager`` class method to be executed after the service response.
94 | * **tags**
95 | * Tags, associated with the service to indicate a specific behaviour.
96 | * **selector** - corresponds to skill selector service. This service returns a list of skills selected for response generation.
97 | * **timeout** - corresponds to timeout service. This service is called when processing time exceeds specified limit.
98 | * **last_chance** - corresponds to last chance service. This service is called if other services in pipeline have returned an error, and further processing is impossible.
99 |
100 |
101 | .. _connectors-config:
102 |
103 | **Connectors config**
104 | ---------------------
105 |
106 | Connector represents a function, where tasks are sent in order to process.
107 | Can be implementation of some data transfer protocol or model implemented in python.
108 | Since agent is based on asynchronous execution, and can be slowed down by blocking synchronous parts,
109 | it is strongly advised to implement computational heavy services separate from agent,
110 | and use some protocols (like http) for data transfer.
111 |
112 | There are several possibilities, to configure connector:
113 |
114 | 1. *Built-in HTTP*
115 |
116 | .. code:: json
117 |
118 | {"connector name": {
119 | "protocol": "http",
120 | "url": "connector url",
121 | "batch_size": "batch size for the service"
122 | }
123 | }
124 |
125 | * **connector name**
126 | * A name of the connector. Used in `services` part of the config, in order to associate service with the connector
127 | * **protocol**
128 | * http
129 | * **url**
130 | * Actual url, where an external service api is accessible. Should be in format ``http://:/``
131 | * **batch_size**
132 | * Represents a maximum task count, which will be sent to a service in a batch. If not specified is interpreted as 1
133 | * If the value is 1, an `HTTPConnector `__ class is used.
134 | * If the value is more than one, agent will use `AioQueueConnector `__. That connector sends data to asyncio queue. Same time, worker `QueueListenerBatchifyer `__, which collects data from queue, assembles batches and sends them to a service.
135 |
136 |
137 | 2. *Python class*
138 |
139 | .. code:: json
140 |
141 | {"connector name": {
142 | "protocol": "python",
143 | "class_name": "class name in 'python module name:class name' format",
144 | "other parameter 1": "",
145 | "other parameter 2": ""
146 | }
147 | }
148 |
149 | * **connector name**
150 | * Same as in HTTP connector case
151 | * **protocol**
152 | * python
153 | * **class_name**
154 | * Path to the connector's class in ``:`` format
155 | * Connector's class should implement asynchronous ``send(self, payload: Dict, callback: Callable)`` method
156 | * ``payload represents`` a single task, provided by a dialog formatter, associated with service, alongside with ``task_id``: :code:`{'task_id': some_uuid, 'payload': dialog_formatter_task_data}`
157 | * ``callback`` is an asynchronous function `process `__. You should call that with service response and task_id after processing
158 | * **other parameters**
159 | * Any json compatible parameters, which will be passed to the connector class initialisation as ``**kwargs``
160 |
--------------------------------------------------------------------------------
/deeppavlov_agent/parse_config.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from collections import defaultdict
3 | from importlib import import_module
4 | from typing import Dict
5 |
6 | import aiohttp
7 |
8 | from .core.connectors import (AgentGatewayToServiceConnector,
9 | AioQueueConnector, HTTPConnector,
10 | QueueListenerBatchifyer, PredefinedOutputConnector,
11 | PredefinedTextConnector, ConfidenceResponseSelectorConnector)
12 | from .core.service import Service, simple_workflow_formatter
13 | from .core.state_manager import StateManager
14 | from .core.transport.mapping import GATEWAYS_MAP
15 | from .core.transport.settings import TRANSPORT_SETTINGS
16 | from .state_formatters import all_formatters
17 |
18 |
19 | built_in_connectors = {
20 | "PredefinedOutputConnector": PredefinedOutputConnector,
21 | "PredefinedTextConnector": PredefinedTextConnector,
22 | "ConfidenceResponseSelectorConnector": ConfidenceResponseSelectorConnector
23 | }
24 |
25 |
26 | class PipelineConfigParser:
27 | def __init__(self, state_manager: StateManager, config: Dict):
28 | self.config = config
29 | self.state_manager = state_manager
30 | self.services = []
31 | self.services_names = defaultdict(set)
32 | self.last_chance_service = None
33 | self.timeout_service = None
34 | self.connectors = {}
35 | self.workers = []
36 | self.session = None
37 | self.gateway = None
38 | self.imported_modules = {}
39 |
40 | connectors_module_name = self.config.get('connectors_module', None)
41 | if connectors_module_name:
42 | self.connectors_module = import_module(connectors_module_name)
43 | else:
44 | self.connectors_module = None
45 |
46 | formatters_module_name = self.config.get('formatters_module', None)
47 | if formatters_module_name:
48 | self.formatters_module = import_module(formatters_module_name)
49 | else:
50 | self.formatters_module = None
51 |
52 | self.fill_connectors()
53 | self.fill_services()
54 |
55 | def setup_module_from_config(self, name_var):
56 | module = None
57 | connectors_module_name = self.config.get(name_var, None)
58 | if connectors_module_name:
59 | module = import_module(connectors_module_name)
60 | return module
61 |
62 | def get_session(self):
63 | if not self.session:
64 | self.session = aiohttp.ClientSession()
65 | return self.session
66 |
67 | def get_gateway(self, on_channel_callback=None, on_service_callback=None):
68 | if not self.gateway:
69 | transport_type = TRANSPORT_SETTINGS['transport']['type']
70 | gateway_cls = GATEWAYS_MAP[transport_type]['agent']
71 | self.gateway = gateway_cls(config=TRANSPORT_SETTINGS,
72 | on_service_callback=on_service_callback,
73 | on_channel_callback=on_channel_callback)
74 | return self.gateway
75 |
76 | def get_external_module(self, module_name: str):
77 | if module_name not in self.imported_modules:
78 | module = import_module(module_name)
79 | self.imported_modules[module_name] = module
80 | else:
81 | module = self.imported_modules[module_name]
82 | return module
83 |
84 | def make_connector(self, name: str, data: Dict):
85 | workers = []
86 | if data['protocol'] == 'http':
87 | connector = None
88 | workers = []
89 | if 'urllist' in data or 'num_workers' in data or data.get('batch_size', 1) > 1:
90 | queue = asyncio.Queue()
91 | batch_size = data.get('batch_size', 1)
92 | urllist = data.get('urllist', [data['url']] * data.get('num_workers', 1))
93 | connector = AioQueueConnector(queue)
94 | for url in urllist:
95 | workers.append(QueueListenerBatchifyer(self.get_session(), url, queue, batch_size))
96 | else:
97 | connector = HTTPConnector(self.get_session(), data['url'])
98 |
99 | elif data['protocol'] == 'AMQP':
100 | gateway = self.get_gateway()
101 | service_name = data.get('service_name') or data['connector_name']
102 | connector = AgentGatewayToServiceConnector(to_service_callback=gateway.send_to_service,
103 | service_name=service_name)
104 |
105 | elif data['protocol'] == 'python':
106 | params = data['class_name'].split(':')
107 | if len(params) == 1:
108 | if params[0] in built_in_connectors:
109 | connector_class = built_in_connectors[params[0]]
110 | module_provided_str = 'in deeppavlov_agent built in connectors'
111 | elif self.connectors_module:
112 | connector_class = getattr(self.connectors_module, params[0], None)
113 | module_provided_str = f'in {self.connectors_module.__name__} connectors module'
114 |
115 | if not connector_class:
116 | raise ValueError(f"Connector's python class {data['class_name']} from {name} "
117 | f"connector was not found ({module_provided_str})")
118 | elif len(params) == 2:
119 | connector_class = getattr(self.get_external_module(params[0]), params[1], None)
120 | else:
121 | raise ValueError(f"Expected class description in a `module.submodules:ClassName` form, "
122 | f"but got `{data['class_name']}` (in {name} connector)")
123 | others = {k: v for k, v in data.items() if k not in {'protocol', 'class_name'}}
124 | connector = connector_class(**others)
125 |
126 | self.workers.extend(workers)
127 | self.connectors[name] = connector
128 |
129 | def make_service(self, group: str, name: str, data: Dict):
130 | def check_ext_module(class_name):
131 | params = class_name.split(':')
132 | formatter_class = None
133 | if len(params) == 2:
134 | formatter_class = getattr(self.get_external_module(params[0]), params[1], None)
135 | elif len(params) == 1 and self.formatters_module:
136 | formatter_class = getattr(self.formatters_module, params[0], None)
137 | return formatter_class
138 |
139 | connector_data = data.get('connector', None)
140 | service_name = ".".join([i for i in [group, name] if i])
141 | if 'workflow_formatter' in data and not data['workflow_formatter']:
142 | workflow_formatter = None
143 | else:
144 | workflow_formatter = simple_workflow_formatter
145 | connector = None
146 | if isinstance(connector_data, str):
147 | connector = self.connectors.get(connector_data, None)
148 | elif isinstance(connector_data, dict):
149 | connector = self.connectors.get(service_name, None)
150 | if not connector:
151 | raise ValueError(f'connector in pipeline.{service_name} is not declared')
152 |
153 | sm_data = data.get('state_manager_method', None)
154 | if sm_data:
155 | sm_method = getattr(self.state_manager, sm_data, None)
156 | if not sm_method:
157 | raise ValueError(f"state manager doesn't have a method {sm_data} (declared in {service_name})")
158 | else:
159 | sm_method = None
160 |
161 | dialog_formatter = None
162 | response_formatter = None
163 |
164 | dialog_formatter_name = data.get('dialog_formatter', None)
165 | response_formatter_name = data.get('response_formatter', None)
166 | if dialog_formatter_name:
167 | if dialog_formatter_name in all_formatters:
168 | dialog_formatter = all_formatters[dialog_formatter_name]
169 | else:
170 | dialog_formatter = check_ext_module(dialog_formatter_name)
171 | if not dialog_formatter:
172 | raise ValueError(f"formatter {dialog_formatter_name} doesn't exist (declared in {service_name})")
173 | if response_formatter_name:
174 | if response_formatter_name in all_formatters:
175 | response_formatter = all_formatters[response_formatter_name]
176 | else:
177 | response_formatter = check_ext_module(response_formatter_name)
178 | if not response_formatter:
179 | raise ValueError(f"formatter {response_formatter_name} doesn't exist (declared in {service_name})")
180 |
181 | names_previous_services = set()
182 | for sn in data.get('previous_services', set()):
183 | names_previous_services.update(self.services_names.get(sn, set()))
184 | names_required_previous_services = set()
185 | for sn in data.get('required_previous_services', set()):
186 | names_required_previous_services.update(self.services_names.get(sn, set()))
187 | tags = data.get('tags', [])
188 | service = Service(
189 | name=service_name, connector_func=connector.send, state_processor_method=sm_method, tags=tags,
190 | names_previous_services=names_previous_services,
191 | names_required_previous_services=names_required_previous_services,
192 | workflow_formatter=workflow_formatter, dialog_formatter=dialog_formatter,
193 | response_formatter=response_formatter, label=name)
194 | if service.is_last_chance():
195 | self.last_chance_service = service
196 | elif service.is_timeout():
197 | self.timeout_service = service
198 | else:
199 | self.services.append(service)
200 |
201 | def fill_connectors(self):
202 | if 'connectors' in self.config:
203 | for k, v in self.config['connectors'].items():
204 | v.update({'connector_name': k})
205 | self.make_connector(f'connectors.{k}', v)
206 |
207 | # collect residual connectors, form skill names
208 | for k, v in self.config['services'].items():
209 | if 'connector' in v: # single service
210 | if isinstance(v['connector'], dict):
211 | if 'protocol' in v['connector']:
212 | self.make_connector(k, v['connector'])
213 | else:
214 | raise ValueError({f'connector in pipeline.{k} is declared incorrectly'})
215 | elif not isinstance(v['connector'], str):
216 | raise ValueError({f'connector in pipeline.{k} is declared incorrectly'})
217 | self.services_names[k].add(k)
218 | else: # grouped services
219 | for sk, sv in v.items():
220 | service_name = f'{k}.{sk}'
221 | if isinstance(sv['connector'], dict):
222 | if 'protocol' in sv['connector']:
223 | self.make_connector(service_name, sv['connector'])
224 | else:
225 | raise ValueError({f'connector in pipeline.{service_name} is declared incorrectly'})
226 | elif not isinstance(sv['connector'], str):
227 | raise ValueError({f'connector in pipeline.{service_name} is declared incorrectly'})
228 | self.services_names[k].add(service_name)
229 | self.services_names[service_name].add(service_name)
230 |
231 | def fill_services(self):
232 | for k, v in self.config['services'].items():
233 | if 'connector' in v: # single service
234 | self.make_service(None, k, v)
235 | else: # grouped services
236 | for sk, sv in v.items():
237 | self.make_service(k, sk, sv)
238 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/docs/source/_static/apispec/agent_v0.12.0.yml:
--------------------------------------------------------------------------------
1 | openapi: 3.0.1
2 | info:
3 | title: DeepPavlov Agent State API
4 | version: 0.12.0
5 | description: >-
6 | Agents built with DeepPavlov Agent communicate with their Services via HTTP, so
7 | endpoints should be specified.
8 | servers:
9 | - url: 'http://localhost:{port}/'
10 | description: Local development server
11 | variables:
12 | port:
13 | default: '4242'
14 | paths:
15 | /:
16 | get:
17 | summary: Root path
18 | responses:
19 | '200':
20 | description: Go to /apidocs/ to see graphical web UI for this API.
21 | '/api/v0/{skill_endpoint}/':
22 | post:
23 | parameters:
24 | - name: skill_endpoint
25 | in: path
26 | required: true
27 | schema:
28 | enum:
29 | - model
30 | summary: Generic skill endpoint
31 | description: >-
32 | An agent built with DeepPavlov Agent sends requests to the services endpoints in
33 | order to retrieve the answers.
34 | requestBody:
35 | description: Description of the request to be executed
36 | required: true
37 | content:
38 | application/json:
39 | schema:
40 | $ref: '#/components/schemas/RequestBodySchema'
41 | examples:
42 | general:
43 | $ref: '#/components/examples/GenericRequestBody'
44 | responses:
45 | '200':
46 | description: Request finished succesfully.
47 | content:
48 | application/json:
49 | schema:
50 | $ref: "#/components/schemas/ODQAResponse200Schema"
51 | examples:
52 | odqa:
53 | $ref: "#/components/examples/ODQAResponse"
54 | '404':
55 | description: This skill doesn't exsits.
56 | components:
57 | schemas:
58 | RequestBodySchema:
59 | type: object
60 | properties:
61 | id:
62 | description: REQUIRED. A unique id of the dialog.
63 | type: string
64 | location:
65 | description: 'REQUIRED. A free-formatted location where the dialog is happening.'
66 | type: string
67 | utterances:
68 | description: >-
69 | REQUIRED. A list of all utterances of the dialog. The last utterance always belongs
70 | to a human user.
71 | type: array
72 | items:
73 | oneOf:
74 | - $ref: '#/components/schemas/HumanUtterance'
75 | - $ref: '#/components/schemas/BotUtterance'
76 | human:
77 | $ref: '#/components/schemas/Human'
78 | bot:
79 | $ref: '#/components/schemas/Bot'
80 | channel_type:
81 | description: >-
82 | REQUIRED. A channel where the communication is happening. For example, "telegram",
83 | "facebook", "http".
84 | type: string
85 | Human:
86 | description: 'REQUIRED. A human user in the dialog.'
87 | type: object
88 | properties:
89 | id:
90 | description: REQUIRED. A unique is of the human user.
91 | type: string
92 | user_telegram_id:
93 | description: REQUIRED. A unique Telegram id of the human user.
94 | type: string
95 | user_type:
96 | description: REQUIRED. A user type. Here it is always “human”.
97 | type: string
98 | device_type:
99 | description: >-
100 | REQUIRED. A name of the device which is used by the user. For example, it can be "iphone" or "android".
101 | type: string
102 | persona:
103 | description: REQUIRED. A persona of the human user. It is stored as an array of sentences characterizing the human user. By default this is an empty array.
104 | type: array
105 | items:
106 | type: string
107 | profile:
108 | $ref: '#/components/schemas/Profile'
109 | attributes:
110 | description: Generic key-value attributes.
111 | type: object
112 | items:
113 | type: object
114 | Bot:
115 | description: >-
116 | REQUIRED. A bot user of the dialog. A bot is an agent with a particular skill set.
117 | type: object
118 | properties:
119 | id:
120 | description: REQUIRED. A unique is of the bot user.
121 | type: string
122 | user_type:
123 | description: REQUIRED. A user type. Here it is always “human”.
124 | type: string
125 | persona:
126 | description: REQUIRED. A persona of the bot user. It is stored as an array of sentences characterizing the human user. By default this is an empty array.
127 | type: array
128 | items:
129 | type: string
130 | attributes:
131 | description: Generic key-value attributes.
132 | type: object
133 | items:
134 | type: object
135 | Profile:
136 | description: REQUIRED. A personal information about the human user.
137 | type: object
138 | properties:
139 | gender:
140 | description: REQUIRED. A gender of the human user.
141 | type: string
142 | birthdate:
143 | description: REQUIRED. Birthdate
144 | type: string
145 | format: date
146 | name:
147 | description: REQUIRED. A name of the human user.
148 | type: string
149 | location:
150 | description: REQUIRED. A location of the human user.
151 | type: object
152 | home_coordinates:
153 | description: REQUIRED. Home coordinates of the human user.
154 | type: object
155 | work_coordinates:
156 | description: REQUIRED. Workplace coordinates of the human user.
157 | type: object
158 | occupation:
159 | description: REQUIRED. A profession of the human user.
160 | type: string
161 | income_per_year:
162 | description: REQUIRED. An income of the human user.
163 | type: number
164 | HumanUtterance:
165 | description: RESUIRED. An utterance of the human user.
166 | type: object
167 | properties:
168 | id:
169 | type: string
170 | description: REQUIRED. A unique id of the human utterance.
171 | text:
172 | type: string
173 | description: >-
174 | REQUIRED. Text of the human utterance. If this is the very first utterance of the dialog,
175 | it has the "/start" value.
176 | user:
177 | $ref: '#/components/schemas/Human'
178 | annotations:
179 | $ref: '#/components/schemas/Annotations'
180 | date_time:
181 | type: string
182 | format: datetime
183 | description: REQUIRED. A time of the utterance receiving by the agent server.
184 | hypotheses:
185 | type: array
186 | items:
187 | type: object
188 | description: >-
189 | Response candidates to this particular Utterance, generated by Skills.
190 | BotUtterance:
191 | description: RESUIRED. An utterance of the bot user.
192 | type: object
193 | properties:
194 | id:
195 | type: string
196 | description: REQUIRED. A unique id of the bot utterance.
197 | text:
198 | type: string
199 | description: >-
200 | REQUIRED. Text of the bot utterance.
201 | orig_text:
202 | type: string
203 | description: >-
204 | An original reponse given by the skill which can be transformed later by ResponseSelector
205 | or Postprocessor. If it was transformed, the transformed response goes to the "text" field
206 | and the original response is stored to the "orig_text" field. The field has value None by default.
207 | user:
208 | $ref: '#/components/schemas/Bot'
209 | annotations:
210 | $ref: '#/components/schemas/Annotations'
211 | date_time:
212 | type: string
213 | format: datetime
214 | description: REQUIRED. A time of the utterance receiving by the agent server.
215 | confidence:
216 | type: number
217 | description: Skill confidence in its response.
218 | active_skill:
219 | type: string
220 | description: >-
221 | A name of the skill which was responsible for the final bot response generation.
222 | Annotations:
223 | description: >-
224 | REQUIRED. The utterances annotations, or tags. The default values of the field is an empty array: []. If the dialog starts with "/start" utterance, this utterance is not being annotated.
225 | type: object
226 | ODQAResponse200Unit:
227 | description: >-
228 | A list of skill responses. Each response here is a hypothetical response to the same human utterance. So s skill should generate a number of possible reponses for each incoming human utterance.
229 | type: array
230 | items:
231 | type: object
232 | properties:
233 | text:
234 | description: A text reponse of the skill.
235 | type: string
236 | confidence:
237 | description: >-
238 | Skill confidence in its reponse.
239 | type: number
240 | ODQAResponse200Schema:
241 | description: >-
242 | A batch of lists or skill responses. A skill should provide a list of hypothetical answers for each incoming human utterance.
243 | properties:
244 | responses:
245 | type: array
246 | items:
247 | $ref: '#/components/schemas/ODQAResponse200Unit'
248 | examples:
249 | GenericRequestBody:
250 | description: one exaustive example
251 | value:
252 | id: 5d9b755eb8cd280022907f27
253 | location: lab
254 | utterances:
255 | - id: 5d9b755eb8cd280022907f29
256 | text: Hello
257 | user:
258 | id: 5d9b755eb8cd280022907f25
259 | user_telegram_id: vasily
260 | user_type: human
261 | device_type: cmd
262 | persona: []
263 | profile:
264 | name: None
265 | gender: None
266 | birthdate: None
267 | location: None
268 | home_coordinates: None
269 | work_coordinates: None
270 | occupation: None
271 | income_per_year: None
272 | attributes: {}
273 | annotations:
274 | ner:
275 | tokens:
276 | - Hello
277 | tags:
278 | - O
279 | date_time: '2019-10-07 20:26:54.409000'
280 | hypotheses:
281 | - skill_name: chitchat
282 | text: Hi!
283 | confidence: 0.6
284 | - skill_name: odqa
285 | text: to my friends
286 | confidence: 0.23
287 | - id: 5d9b755eb8cd280022907f28
288 | active_skill: chitchat
289 | confidence: 0.6
290 | text: Hi!
291 | orig_text: None
292 | user:
293 | id: 5d9b755eb8cd280022907f26
294 | user_type: bot
295 | persona: []
296 | attributes: {}
297 | annotations:
298 | bot_ner:
299 | tokens:
300 | - Hi
301 | - '!'
302 | tags:
303 | - O
304 | - O
305 | date_time: '2019-10-07 20:26:54.856000'
306 | - id: 5d9b7565b8cd280022907f2b
307 | text: What is your name?
308 | user:
309 | id: 5d9b755eb8cd280022907f25
310 | user_telegram_id: к5698
311 | user_type: human
312 | device_type: cmd
313 | persona: []
314 | profile:
315 | name: None
316 | gender: None
317 | birthdate: None
318 | location: None
319 | home_coordinates: None
320 | work_coordinates: None
321 | occupation: None
322 | income_per_year: None
323 | attributes: {}
324 | annotations:
325 | ner:
326 | tokens:
327 | - What
328 | - is
329 | - your
330 | - name
331 | - '?'
332 | tags:
333 | - O
334 | - O
335 | - O
336 | - O
337 | - O
338 | date_time: '2019-10-07 20:27:01.193000'
339 | hypotheses:
340 | - skill_name: chitchat
341 | text: My name is DeepPavlov Agent!
342 | confidence: 0.9
343 | - skill_name: odqa
344 | text: Alexander the Great
345 | confidence: 0.5
346 | - id: 5d9b7565b8cd280022907f2a
347 | active_skill: chitchat
348 | confidence: 0.6
349 | text: My name is DeepPavlov Agent!
350 | orig_text: None
351 | user:
352 | id: 5d9b755eb8cd280022907f26
353 | user_type: bot
354 | persona: []
355 | attributes: {}
356 | annotations:
357 | bot_ner:
358 | tokens:
359 | - My
360 | - name
361 | - is
362 | - DeepPavlov
363 | - Agent
364 | - '!'
365 | tags:
366 | - O
367 | - O
368 | - O
369 | - O
370 | - O
371 | - O
372 | date_time: '2019-10-07 20:27:01.367000'
373 | channel_type: cmd_client
374 | human:
375 | id: 5d9b755eb8cd280022907f25
376 | user_telegram_id: к5698
377 | user_type: human
378 | device_type: cmd
379 | persona: []
380 | profile:
381 | name: None
382 | gender: None
383 | birthdate: None
384 | location: None
385 | home_coordinates: None
386 | work_coordinates: None
387 | occupation: None
388 | income_per_year: None
389 | attributes: {}
390 | bot:
391 | id: 5d9b755eb8cd280022907f26
392 | user_type: bot
393 | persona: []
394 | attributes: {}
395 | version: 0.12.0
396 | ODQAResponse:
397 | description: An example of Open Domain Question Answering (ODQA) skill.
398 | value:
399 | responses:
400 | -
401 | - text: Peter the Great was born at 1672.
402 | confidence: 0.947
403 | - text: at 1672
404 | confidence: 0.998
405 | -
406 | - text: The Earth population is 7 billions.
407 | confidence: 0.3333
408 | - text: 7 billions
409 | confidence: 0.36
--------------------------------------------------------------------------------
/docs/source/_static/apispec/agent_v0.12.1.yml:
--------------------------------------------------------------------------------
1 | openapi: 3.0.1
2 | info:
3 | title: DeepPavlov Agent State API
4 | version: 0.12.1
5 | description: >-
6 | Agents built with DeepPavlov Agent communicate with their Services via HTTP, so
7 | endpoints should be specified.
8 | servers:
9 | - url: 'http://localhost:{port}/'
10 | description: Local development server
11 | variables:
12 | port:
13 | default: '4242'
14 | paths:
15 | /:
16 | get:
17 | summary: Root path
18 | responses:
19 | '200':
20 | description: Go to /apidocs/ to see graphical web UI for this API.
21 | '/api/v0/{skill_endpoint}/':
22 | post:
23 | parameters:
24 | - name: skill_endpoint
25 | in: path
26 | required: true
27 | schema:
28 | enum:
29 | - model
30 | summary: Generic skill endpoint
31 | description: >-
32 | An agent built with DeepPavlov Agent sends requests to the services endpoints in
33 | order to retrieve the answers.
34 | requestBody:
35 | description: Description of the request to be executed
36 | required: true
37 | content:
38 | application/json:
39 | schema:
40 | $ref: '#/components/schemas/RequestBodySchema'
41 | examples:
42 | general:
43 | $ref: '#/components/examples/GenericRequestBody'
44 | responses:
45 | '200':
46 | description: Request finished succesfully.
47 | content:
48 | application/json:
49 | schema:
50 | $ref: "#/components/schemas/ODQAResponse200Schema"
51 | examples:
52 | odqa:
53 | $ref: "#/components/examples/ODQAResponse"
54 | '404':
55 | description: This skill doesn't exsits.
56 | components:
57 | schemas:
58 | RequestBodySchema:
59 | type: object
60 | properties:
61 | id:
62 | description: REQUIRED. A unique id of the dialog.
63 | type: string
64 | location:
65 | description: 'REQUIRED. A free-formatted location where the dialog is happening.'
66 | type: string
67 | utterances:
68 | description: >-
69 | REQUIRED. A list of all utterances of the dialog. The last utterance always belongs
70 | to a human user.
71 | type: array
72 | items:
73 | oneOf:
74 | - $ref: '#/components/schemas/HumanUtterance'
75 | - $ref: '#/components/schemas/BotUtterance'
76 | human:
77 | $ref: '#/components/schemas/Human'
78 | bot:
79 | $ref: '#/components/schemas/Bot'
80 | channel_type:
81 | description: >-
82 | REQUIRED. A channel where the communication is happening. For example, "telegram",
83 | "facebook", "http".
84 | type: string
85 | Human:
86 | description: 'REQUIRED. A human user in the dialog.'
87 | type: object
88 | properties:
89 | id:
90 | description: REQUIRED. A unique is of the human user.
91 | type: string
92 | user_telegram_id:
93 | description: REQUIRED. A unique Telegram id of the human user.
94 | type: string
95 | user_type:
96 | description: REQUIRED. A user type. Here it is always “human”.
97 | type: string
98 | device_type:
99 | description: >-
100 | REQUIRED. A name of the device which is used by the user. For example, it can be "iphone" or "android".
101 | type: string
102 | persona:
103 | description: REQUIRED. A persona of the human user. It is stored as an array of sentences characterizing the human user. By default this is an empty array.
104 | type: array
105 | items:
106 | type: string
107 | profile:
108 | $ref: '#/components/schemas/Profile'
109 | attributes:
110 | description: Generic key-value attributes.
111 | type: object
112 | items:
113 | type: object
114 | Bot:
115 | description: >-
116 | REQUIRED. A bot user of the dialog. A bot is an agent with a particular skill set.
117 | type: object
118 | properties:
119 | id:
120 | description: REQUIRED. A unique is of the bot user.
121 | type: string
122 | user_type:
123 | description: REQUIRED. A user type. Here it is always “human”.
124 | type: string
125 | persona:
126 | description: REQUIRED. A persona of the bot user. It is stored as an array of sentences characterizing the human user. By default this is an empty array.
127 | type: array
128 | items:
129 | type: string
130 | attributes:
131 | description: Generic key-value attributes.
132 | type: object
133 | items:
134 | type: object
135 | Profile:
136 | description: REQUIRED. A personal information about the human user.
137 | type: object
138 | properties:
139 | gender:
140 | description: REQUIRED. A gender of the human user.
141 | type: string
142 | birthdate:
143 | description: REQUIRED. Birthdate
144 | type: string
145 | format: date
146 | name:
147 | description: REQUIRED. A name of the human user.
148 | type: string
149 | location:
150 | description: REQUIRED. A location of the human user.
151 | type: object
152 | home_coordinates:
153 | description: REQUIRED. Home coordinates of the human user.
154 | type: object
155 | work_coordinates:
156 | description: REQUIRED. Workplace coordinates of the human user.
157 | type: object
158 | occupation:
159 | description: REQUIRED. A profession of the human user.
160 | type: string
161 | income_per_year:
162 | description: REQUIRED. An income of the human user.
163 | type: number
164 | HumanUtterance:
165 | description: RESUIRED. An utterance of the human user.
166 | type: object
167 | properties:
168 | id:
169 | type: string
170 | description: REQUIRED. A unique id of the human utterance.
171 | text:
172 | type: string
173 | description: >-
174 | REQUIRED. Text of the human utterance. If this is the very first utterance of the dialog,
175 | it has the "/start" value.
176 | user:
177 | $ref: '#/components/schemas/Human'
178 | annotations:
179 | $ref: '#/components/schemas/Annotations'
180 | date_time:
181 | type: string
182 | format: datetime
183 | description: REQUIRED. A time of the utterance receiving by the agent server.
184 | hypotheses:
185 | type: array
186 | items:
187 | type: object
188 | description: >-
189 | Response candidates to this particular Utterance, generated by Skills.
190 | attributes:
191 | description: Generic key-value attributes.
192 | type: object
193 | items:
194 | type: object
195 | BotUtterance:
196 | description: RESUIRED. An utterance of the bot user.
197 | type: object
198 | properties:
199 | id:
200 | type: string
201 | description: REQUIRED. A unique id of the bot utterance.
202 | text:
203 | type: string
204 | description: >-
205 | REQUIRED. Text of the bot utterance.
206 | orig_text:
207 | type: string
208 | description: >-
209 | An original reponse given by the skill which can be transformed later by ResponseSelector
210 | or Postprocessor. If it was transformed, the transformed response goes to the "text" field
211 | and the original response is stored to the "orig_text" field. The field has value None by default.
212 | user:
213 | $ref: '#/components/schemas/Bot'
214 | annotations:
215 | $ref: '#/components/schemas/Annotations'
216 | date_time:
217 | type: string
218 | format: datetime
219 | description: REQUIRED. A time of the utterance receiving by the agent server.
220 | confidence:
221 | type: number
222 | description: Skill confidence in its response.
223 | active_skill:
224 | type: string
225 | description: >-
226 | A name of the skill which was responsible for the final bot response generation.
227 | Annotations:
228 | description: >-
229 | REQUIRED. The utterances annotations, or tags. The default values of the field is an empty array: []. If the dialog starts with "/start" utterance, this utterance is not being annotated.
230 | type: object
231 | ODQAResponse200Unit:
232 | description: >-
233 | A list of skill responses. Each response here is a hypothetical response to the same human utterance. So s skill should generate a number of possible reponses for each incoming human utterance.
234 | type: array
235 | items:
236 | type: object
237 | properties:
238 | text:
239 | description: A text reponse of the skill.
240 | type: string
241 | confidence:
242 | description: >-
243 | Skill confidence in its reponse.
244 | type: number
245 | ODQAResponse200Schema:
246 | description: >-
247 | A batch of lists or skill responses. A skill should provide a list of hypothetical answers for each incoming human utterance.
248 | properties:
249 | responses:
250 | type: array
251 | items:
252 | $ref: '#/components/schemas/ODQAResponse200Unit'
253 | examples:
254 | GenericRequestBody:
255 | description: one exaustive example
256 | value:
257 | id: 5d9b755eb8cd280022907f27
258 | location: lab
259 | utterances:
260 | - id: 5d9b755eb8cd280022907f29
261 | text: Hello
262 | user:
263 | id: 5d9b755eb8cd280022907f25
264 | user_telegram_id: vasily
265 | user_type: human
266 | device_type: cmd
267 | persona: []
268 | profile:
269 | name: None
270 | gender: None
271 | birthdate: None
272 | location: None
273 | home_coordinates: None
274 | work_coordinates: None
275 | occupation: None
276 | income_per_year: None
277 | attributes: {}
278 | annotations:
279 | ner:
280 | tokens:
281 | - Hello
282 | tags:
283 | - O
284 | date_time: '2019-10-07 20:26:54.409000'
285 | hypotheses:
286 | - skill_name: chitchat
287 | text: Hi!
288 | confidence: 0.6
289 | - skill_name: odqa
290 | text: to my friends
291 | confidence: 0.23
292 | - id: 5d9b755eb8cd280022907f28
293 | active_skill: chitchat
294 | confidence: 0.6
295 | text: Hi!
296 | orig_text: None
297 | user:
298 | id: 5d9b755eb8cd280022907f26
299 | user_type: bot
300 | persona: []
301 | attributes: {}
302 | annotations:
303 | bot_ner:
304 | tokens:
305 | - Hi
306 | - '!'
307 | tags:
308 | - O
309 | - O
310 | date_time: '2019-10-07 20:26:54.856000'
311 | - id: 5d9b7565b8cd280022907f2b
312 | text: What is your name?
313 | user:
314 | id: 5d9b755eb8cd280022907f25
315 | user_telegram_id: к5698
316 | user_type: human
317 | device_type: cmd
318 | persona: []
319 | profile:
320 | name: None
321 | gender: None
322 | birthdate: None
323 | location: None
324 | home_coordinates: None
325 | work_coordinates: None
326 | occupation: None
327 | income_per_year: None
328 | attributes: {}
329 | annotations:
330 | ner:
331 | tokens:
332 | - What
333 | - is
334 | - your
335 | - name
336 | - '?'
337 | tags:
338 | - O
339 | - O
340 | - O
341 | - O
342 | - O
343 | date_time: '2019-10-07 20:27:01.193000'
344 | hypotheses:
345 | - skill_name: chitchat
346 | text: My name is DeepPavlov Agent!
347 | confidence: 0.9
348 | - skill_name: odqa
349 | text: Alexander the Great
350 | confidence: 0.5
351 | - id: 5d9b7565b8cd280022907f2a
352 | active_skill: chitchat
353 | confidence: 0.6
354 | text: My name is DeepPavlov Agent!
355 | orig_text: None
356 | user:
357 | id: 5d9b755eb8cd280022907f26
358 | user_type: bot
359 | persona: []
360 | attributes: {}
361 | annotations:
362 | bot_ner:
363 | tokens:
364 | - My
365 | - name
366 | - is
367 | - DeepPavlov
368 | - Agent
369 | - '!'
370 | tags:
371 | - O
372 | - O
373 | - O
374 | - O
375 | - O
376 | - O
377 | date_time: '2019-10-07 20:27:01.367000'
378 | channel_type: cmd_client
379 | human:
380 | id: 5d9b755eb8cd280022907f25
381 | user_telegram_id: к5698
382 | user_type: human
383 | device_type: cmd
384 | persona: []
385 | profile:
386 | name: None
387 | gender: None
388 | birthdate: None
389 | location: None
390 | home_coordinates: None
391 | work_coordinates: None
392 | occupation: None
393 | income_per_year: None
394 | attributes: {}
395 | bot:
396 | id: 5d9b755eb8cd280022907f26
397 | user_type: bot
398 | persona: []
399 | attributes: {}
400 | version: 0.12.0
401 | ODQAResponse:
402 | description: An example of Open Domain Question Answering (ODQA) skill.
403 | value:
404 | responses:
405 | -
406 | - text: Peter the Great was born at 1672.
407 | confidence: 0.947
408 | - text: at 1672
409 | confidence: 0.998
410 | -
411 | - text: The Earth population is 7 billions.
412 | confidence: 0.3333
413 | - text: 7 billions
414 | confidence: 0.36
--------------------------------------------------------------------------------
/deeppavlov_agent/core/transport/gateways/rabbitmq.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import time
4 | from logging import getLogger
5 | from typing import Dict, List, Optional, Callable
6 |
7 | import aio_pika
8 | from aio_pika import Connection, Channel, Exchange, Queue, IncomingMessage, Message
9 |
10 | from ..base import AgentGatewayBase, ServiceGatewayBase, ChannelGatewayBase
11 | from ..messages import ServiceTaskMessage, ServiceResponseMessage, ToChannelMessage, FromChannelMessage
12 | from ..messages import TMessageBase, ServiceErrorMessage, get_transport_message
13 |
14 | AGENT_IN_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_in'
15 | AGENT_OUT_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_out'
16 | AGENT_QUEUE_NAME_TEMPLATE = '{agent_namespace}_q_agent_{agent_name}'
17 | AGENT_ROUTING_KEY_TEMPLATE = 'agent.{agent_name}'
18 |
19 | SERVICE_QUEUE_NAME_TEMPLATE = '{agent_namespace}_q_service_{service_name}'
20 | SERVICE_ROUTING_KEY_TEMPLATE = 'service.{service_name}'
21 |
22 | CHANNEL_QUEUE_NAME_TEMPLATE = '{agent_namespace}_{agent_name}_q_channel_{channel_id}'
23 | CHANNEL_ROUTING_KEY_TEMPLATE = 'agent.{agent_name}.channel.{channel_id}.any'
24 |
25 | logger = getLogger(__name__)
26 |
27 |
28 | # TODO: add proper RabbitMQ SSL authentication
29 | class RabbitMQTransportBase:
30 | _config: dict
31 | _loop: asyncio.AbstractEventLoop
32 | _agent_in_exchange: Exchange
33 | _agent_out_exchange: Exchange
34 | _connection: Connection
35 | _agent_in_channel: Channel
36 | _agent_out_channel: Channel
37 | _in_queue: Optional[Queue]
38 | _utterance_lifetime_sec: int
39 |
40 | def __init__(self, config: dict, *args, **kwargs):
41 | super(RabbitMQTransportBase, self).__init__(*args, **kwargs)
42 | self._config = config
43 | self._in_queue = None
44 | self._utterance_lifetime_sec = config['utterance_lifetime_sec']
45 |
46 | async def _connect(self) -> None:
47 | agent_namespace = self._config['agent_namespace']
48 |
49 | host = self._config['transport']['AMQP']['host']
50 | port = self._config['transport']['AMQP']['port']
51 | login = self._config['transport']['AMQP']['login']
52 | password = self._config['transport']['AMQP']['password']
53 | virtualhost = self._config['transport']['AMQP']['virtualhost']
54 |
55 | logger.info('Starting RabbitMQ connection...')
56 |
57 | while True:
58 | try:
59 | self._connection = await aio_pika.connect_robust(loop=self._loop, host=host, port=port, login=login,
60 | password=password, virtualhost=virtualhost)
61 |
62 | logger.info('RabbitMQ connected')
63 | break
64 | except ConnectionError:
65 | reconnect_timeout = 5
66 | logger.error(f'RabbitMQ connection error, making another attempt in {reconnect_timeout} secs')
67 | time.sleep(reconnect_timeout)
68 |
69 | self._agent_in_channel = await self._connection.channel()
70 | agent_in_exchange_name = AGENT_IN_EXCHANGE_NAME_TEMPLATE.format(agent_namespace=agent_namespace)
71 | self._agent_in_exchange = await self._agent_in_channel.declare_exchange(name=agent_in_exchange_name,
72 | type=aio_pika.ExchangeType.TOPIC)
73 | logger.info(f'Declared agent in exchange: {agent_in_exchange_name}')
74 |
75 | self._agent_out_channel = await self._connection.channel()
76 | agent_out_exchange_name = AGENT_OUT_EXCHANGE_NAME_TEMPLATE.format(agent_namespace=agent_namespace)
77 | self._agent_out_exchange = await self._agent_in_channel.declare_exchange(name=agent_out_exchange_name,
78 | type=aio_pika.ExchangeType.TOPIC)
79 | logger.info(f'Declared agent out exchange: {agent_out_exchange_name}')
80 |
81 | def disconnect(self):
82 | self._connection.close()
83 |
84 | async def _setup_queues(self) -> None:
85 | raise NotImplementedError
86 |
87 | async def _on_message_callback(self, message: IncomingMessage) -> None:
88 | raise NotImplementedError
89 |
90 |
91 | class RabbitMQAgentGateway(RabbitMQTransportBase, AgentGatewayBase):
92 | _agent_name: str
93 | _service_responded_events: Dict[str, asyncio.Event]
94 | _service_responses: Dict[str, dict]
95 |
96 | def __init__(self, config: dict,
97 | on_service_callback: Optional[Callable] = None,
98 | on_channel_callback: Optional[Callable] = None) -> None:
99 |
100 | super(RabbitMQAgentGateway, self).__init__(config=config,
101 | on_service_callback=on_service_callback,
102 | on_channel_callback=on_channel_callback)
103 |
104 | self._loop = asyncio.get_event_loop()
105 | self._agent_name = self._config['agent_name']
106 |
107 | self._loop.run_until_complete(self._connect())
108 | self._loop.run_until_complete(self._setup_queues())
109 | self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback))
110 | logger.info('Agent in queue started consuming')
111 |
112 | async def _setup_queues(self) -> None:
113 | agent_namespace = self._config['agent_namespace']
114 | in_queue_name = AGENT_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace, agent_name=self._agent_name)
115 | self._in_queue = await self._agent_in_channel.declare_queue(name=in_queue_name, durable=True)
116 | logger.info(f'Declared agent in queue: {in_queue_name}')
117 |
118 | routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name)
119 | await self._in_queue.bind(exchange=self._agent_in_exchange, routing_key=routing_key)
120 | logger.info(f'Queue: {in_queue_name} bound to routing key: {routing_key}')
121 |
122 | async def _on_message_callback(self, message: IncomingMessage) -> None:
123 | message_in: TMessageBase = get_transport_message(json.loads(message.body, encoding='utf-8'))
124 | await message.ack()
125 |
126 | if isinstance(message_in, ServiceResponseMessage):
127 | logger.debug(f'Received service response message {str(message_in.to_json())}')
128 | await self._loop.create_task(self._on_service_callback(task_id=message_in.task_id,
129 | response=message_in.response))
130 |
131 | elif isinstance(message_in, ServiceErrorMessage):
132 | logger.debug(f'Received service error message {str(message_in.to_json())}')
133 | await self._loop.create_task(self._on_service_callback(task_id=message_in.task_id,
134 | response=message_in.exception))
135 |
136 | elif isinstance(message_in, FromChannelMessage):
137 | logger.debug(f'Received message from channel {str(message_in.to_json())}')
138 | await self._loop.create_task(self._on_channel_callback(utterance=message_in.utterance,
139 | channel_id=message_in.channel_id,
140 | user_id=message_in.user_id,
141 | reset_dialog=message_in.reset_dialog))
142 |
143 | async def send_to_service(self, service_name: str, payload: dict) -> None:
144 | task = ServiceTaskMessage(agent_name=self._agent_name, payload=payload)
145 |
146 | message = Message(body=json.dumps(task.to_json()).encode('utf-8'),
147 | delivery_mode=aio_pika.DeliveryMode.PERSISTENT,
148 | expiration=self._utterance_lifetime_sec)
149 |
150 | routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=service_name)
151 | await self._agent_out_exchange.publish(message=message, routing_key=routing_key)
152 | logger.debug(f'Published task {payload["task_id"]} with routing key {routing_key}')
153 |
154 | async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> None:
155 | channel_message = ToChannelMessage(agent_name=self._agent_name,
156 | channel_id=channel_id,
157 | user_id=user_id,
158 | response=response)
159 |
160 | channel_message_json = channel_message.to_json()
161 | message = Message(body=json.dumps(channel_message_json).encode('utf-8'),
162 | delivery_mode=aio_pika.DeliveryMode.PERSISTENT,
163 | expiration=self._utterance_lifetime_sec)
164 |
165 | routing_key = CHANNEL_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name, channel_id=channel_id)
166 | await self._agent_out_exchange.publish(message=message, routing_key=routing_key)
167 | logger.debug(f'Published channel message: {str(channel_message_json)}')
168 |
169 |
170 | # TODO: add separate service infer timeouts
171 | class RabbitMQServiceGateway(RabbitMQTransportBase, ServiceGatewayBase):
172 | _service_name: str
173 | _batch_size: int
174 | _incoming_messages_buffer: List[IncomingMessage]
175 | _add_to_buffer_lock: asyncio.Lock
176 | _infer_lock: asyncio.Lock
177 |
178 | def __init__(self, config: dict, to_service_callback: Callable) -> None:
179 | super(RabbitMQServiceGateway, self).__init__(config=config, to_service_callback=to_service_callback)
180 | self._loop = asyncio.get_event_loop()
181 | self._service_name = self._config['service']['name']
182 | self._batch_size = self._config['service'].get('batch_size', 1)
183 |
184 | self._incoming_messages_buffer = []
185 | self._add_to_buffer_lock = asyncio.Lock()
186 | self._infer_lock = asyncio.Lock()
187 |
188 | self._loop.run_until_complete(self._connect())
189 | self._loop.run_until_complete(self._setup_queues())
190 | self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback))
191 | logger.info(f'Service in queue started consuming')
192 |
193 | async def _setup_queues(self) -> None:
194 | agent_namespace = self._config['agent_namespace']
195 |
196 | in_queue_name = SERVICE_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace,
197 | service_name=self._service_name)
198 |
199 | self._in_queue = await self._agent_out_channel.declare_queue(name=in_queue_name, durable=True)
200 | logger.info(f'Declared service in queue: {in_queue_name}')
201 |
202 | # TODO think if we can remove this workaround for bot annotators
203 | service_names = self._config['service'].get('names', []) or [self._service_name]
204 | for service_name in service_names:
205 | service_routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=service_name)
206 | await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=service_routing_key)
207 | logger.info(f'Queue: {in_queue_name} bound to routing key: {service_routing_key}')
208 |
209 | await self._agent_out_channel.set_qos(prefetch_count=self._batch_size * 2)
210 |
211 | async def _on_message_callback(self, message: IncomingMessage) -> None:
212 | await self._add_to_buffer_lock.acquire()
213 | self._incoming_messages_buffer.append(message)
214 | logger.debug('Incoming message received')
215 |
216 | if len(self._incoming_messages_buffer) < self._batch_size:
217 | self._add_to_buffer_lock.release()
218 |
219 | await self._infer_lock.acquire()
220 | try:
221 | messages_batch = self._incoming_messages_buffer
222 |
223 | if messages_batch:
224 | self._incoming_messages_buffer = []
225 |
226 | if self._add_to_buffer_lock.locked():
227 | self._add_to_buffer_lock.release()
228 | tasks_batch: List[ServiceTaskMessage] = [get_transport_message(json.loads(message.body,
229 | encoding='utf-8'))
230 | for message in messages_batch]
231 |
232 | # TODO: Think about proper infer errors and aknowledge handling
233 | processed_ok = await self._process_tasks(tasks_batch)
234 |
235 | if processed_ok:
236 | for message in messages_batch:
237 | await message.ack()
238 | else:
239 | for message in messages_batch:
240 | await message.reject()
241 |
242 | elif self._add_to_buffer_lock.locked():
243 | self._add_to_buffer_lock.release()
244 | finally:
245 | self._infer_lock.release()
246 |
247 | async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool:
248 | task_uuids_batch, payloads = \
249 | zip(*[(task.payload['task_id'], task.payload['payload']) for task in tasks_batch])
250 |
251 | logger.debug(f'Prepared for infering tasks {str(task_uuids_batch)}')
252 |
253 | try:
254 | responses_batch = await asyncio.wait_for(self._to_service_callback(payloads),
255 | self._utterance_lifetime_sec)
256 |
257 | results_replies = []
258 |
259 | for i, response in enumerate(responses_batch):
260 | results_replies.append(
261 | self._send_results(tasks_batch[i], response)
262 | )
263 |
264 | await asyncio.gather(*results_replies)
265 | logger.debug(f'Processed tasks {str(task_uuids_batch)}')
266 | return True
267 | except asyncio.TimeoutError:
268 | return False
269 |
270 | async def _send_results(self, task: ServiceTaskMessage, response: Dict) -> None:
271 | result = ServiceResponseMessage(agent_name=task.agent_name,
272 | task_id=task.payload["task_id"],
273 | response=response)
274 |
275 | message = Message(body=json.dumps(result.to_json()).encode('utf-8'),
276 | delivery_mode=aio_pika.DeliveryMode.PERSISTENT,
277 | expiration=self._utterance_lifetime_sec)
278 |
279 | routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=task.agent_name)
280 | await self._agent_in_exchange.publish(message=message, routing_key=routing_key)
281 | logger.debug(f'Sent response for task {str(task.payload["task_id"])} with routing key {routing_key}')
282 |
283 |
284 | class RabbitMQChannelGateway(RabbitMQTransportBase, ChannelGatewayBase):
285 | _agent_name: str
286 | _channel_id: str
287 |
288 | def __init__(self, config: dict, to_channel_callback: Callable) -> None:
289 | super(RabbitMQChannelGateway, self).__init__(config=config, to_channel_callback=to_channel_callback)
290 | self._loop = asyncio.get_event_loop()
291 | self._agent_name = self._config['agent_name']
292 | self._channel_id = self._config['channel']['id']
293 |
294 | self._loop.run_until_complete(self._connect())
295 | self._loop.run_until_complete(self._setup_queues())
296 | self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback))
297 | logger.info(f'Channel connector messages queue from agent started consuming')
298 |
299 | async def _setup_queues(self) -> None:
300 | agent_namespace = self._config['agent_namespace']
301 |
302 | in_queue_name = CHANNEL_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace,
303 | agent_name=self._agent_name,
304 | channel_id=self._channel_id)
305 |
306 | self._in_queue = await self._agent_out_channel.declare_queue(name=in_queue_name, durable=True)
307 | logger.info(f'Declared channel in queue: {in_queue_name}')
308 |
309 | routing_key = CHANNEL_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name, channel_id=self._channel_id)
310 | await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=routing_key)
311 | logger.info(f'Queue: {in_queue_name} bound to routing key: {routing_key}')
312 |
313 | async def _on_message_callback(self, message: IncomingMessage) -> None:
314 | message_json = json.loads(message.body, encoding='utf-8')
315 | message_to_channel: ToChannelMessage = ToChannelMessage.from_json(message_json)
316 | await self._loop.create_task(self._to_channel_callback(message_to_channel.user_id, message_to_channel.response))
317 | await message.ack()
318 | logger.debug(f'Processed message to channel: {str(message_json)}')
319 |
320 | async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, reset_dialog: bool) -> None:
321 | message_from_channel = FromChannelMessage(agent_name=self._agent_name,
322 | channel_id=channel_id,
323 | user_id=user_id,
324 | utterance=utterance,
325 | reset_dialog=reset_dialog)
326 |
327 | message_json = message_from_channel.to_json()
328 | message = Message(body=json.dumps(message_json).encode('utf-8'),
329 | delivery_mode=aio_pika.DeliveryMode.PERSISTENT,
330 | expiration=self._utterance_lifetime_sec)
331 |
332 | routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name)
333 | await self._agent_in_exchange.publish(message=message, routing_key=routing_key)
334 | logger.debug(f'Processed message to agent: {str(message_json)}')
335 |
--------------------------------------------------------------------------------