├── odqa.png
├── examples
    ├── img
    │   ├── gobot_policy.png
    │   ├── sc_ner_lr_no.png
    │   ├── sc_ner_lr_sc.png
    │   ├── gobot_database.png
    │   ├── gobot_example.png
    │   ├── gobot_pipeline.png
    │   ├── gobot_templates.png
    │   ├── sc_ner_lr_sc1.png
    │   ├── gobot_slotfiller.png
    │   ├── sc_ner_lr_cosine.png
    │   ├── sc_ner_lr_linear.png
    │   ├── sc_ner_lr_linear2.png
    │   ├── gobot_simple_example.png
    │   ├── gobot_simple_policy.png
    │   ├── sc_loss_comparison.png
    │   ├── sc_ner_lr_onecycle.png
    │   ├── sc_ner_lr_polynomial.png
    │   ├── sc_ner_lr_trapezoid.png
    │   ├── gobot_simple_pipeline.png
    │   ├── gobot_simple_templates.png
    │   ├── sc_ner_lr_exponential.png
    │   ├── sc_ner_lr_polynomial1.png
    │   └── sc_ner_lr_polynomial2.png
    ├── README.md
    ├── Pseudo-labeling for classification.ipynb
    ├── super_convergence_tutorial.ipynb
    ├── gobot_tutorial.ipynb
    └── DeepPavlov_MTL_Tutorial.ipynb
├── README.md
├── python_pipelines.ipynb
├── DP_BERT.ipynb
├── configs
    └── mtl_3task.json
├── faq_as_ranking.ipynb
├── yandex_faq.ipynb
├── DP_NER.ipynb
├── DP_hello_bot.ipynb
├── DP_autoFAQ.ipynb
├── dp_torch.ipynb
├── DP_autoFAQ_ru.ipynb
├── DP_tf.ipynb
└── DP_ODQA.ipynb


/odqa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/odqa.png


--------------------------------------------------------------------------------
/examples/img/gobot_policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_policy.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_no.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_no.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_sc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_sc.png


--------------------------------------------------------------------------------
/examples/img/gobot_database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_database.png


--------------------------------------------------------------------------------
/examples/img/gobot_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_example.png


--------------------------------------------------------------------------------
/examples/img/gobot_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_pipeline.png


--------------------------------------------------------------------------------
/examples/img/gobot_templates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_templates.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_sc1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_sc1.png


--------------------------------------------------------------------------------
/examples/img/gobot_slotfiller.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_slotfiller.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_cosine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_cosine.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_linear.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_linear2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_linear2.png


--------------------------------------------------------------------------------
/examples/img/gobot_simple_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_simple_example.png


--------------------------------------------------------------------------------
/examples/img/gobot_simple_policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_simple_policy.png


--------------------------------------------------------------------------------
/examples/img/sc_loss_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_loss_comparison.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_onecycle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_onecycle.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_polynomial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_polynomial.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_trapezoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_trapezoid.png


--------------------------------------------------------------------------------
/examples/img/gobot_simple_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_simple_pipeline.png


--------------------------------------------------------------------------------
/examples/img/gobot_simple_templates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/gobot_simple_templates.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_exponential.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_exponential.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_polynomial1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_polynomial1.png


--------------------------------------------------------------------------------
/examples/img/sc_ner_lr_polynomial2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/dp_notebooks/HEAD/examples/img/sc_ner_lr_polynomial2.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | #  Examples & Tutorials
 2 | 
 3 | * Tutorial for simple bot [[notebook]](gobot_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_tutorial.ipynb)
 4 | 
 5 | * Tutorial for intent classifier [[notebook]](classification_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/classification_tutorial.ipynb)
 6 | 
 7 | * Pseudo-labeling for classication task [[notebook]](Pseudo-labeling%20for%20classification.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/Pseudo-labeling%20for%20classification.ipynb)
 8 | 
 9 | * Optimal learning rate search in DeepPavlov [[notebook]](super_convergence_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/super_convergence_tutorial.ipynb)
10 | 
11 | # Links
12 | 
13 | More examples are available:
14 | * [github.com/deepmipt/dp_tutorials/](https://github.com/deepmipt/dp_tutorials)
15 | * [github.com/deepmipt/db_notebooks/](https://github.com/deepmipt/dp_notebooks).
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepPavlov articles with Python code
 2 | 
 3 | * [How to build ‘Hello World!’ bot with DeepPavlov in 4 steps](https://medium.com/deeppavlov/how-to-build-hello-world-bot-with-deeppavlov-in-4-steps-b8636563ff81) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_hello_bot.ipynb)
 4 | 
 5 | * [Simple intent recognition and question answering with DeepPavlov](https://medium.com/deeppavlov/simple-intent-recognition-and-question-answering-with-deeppavlov-c54ccf5339a9) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_autoFAQ.ipynb)
 6 | 
 7 | * [Open-domain question answering with DeepPavlov](https://medium.com/deeppavlov/open-domain-question-answering-with-deeppavlov-c665d2ee4d65) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_ODQA.ipynb)
 8 | 
 9 | * [Simple text classification skill of DeepPavlov](https://towardsdatascience.com/simple-text-classification-skill-of-deeppavlov-54bc1b61c9ea)
10 | 
11 | * [The BERT-based text classification models of DeepPavlov](https://towardsdatascience.com/the-bert-based-text-classification-models-of-deeppavlov-a85892f14d61) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_tf.ipynb)
12 | 
13 | * [19 entities for 104 languages: A new era of NER with the DeepPavlov multilingual BERT](https://towardsdatascience.com/19-entities-for-104-languages-a-new-era-of-ner-with-the-deeppavlov-multilingual-bert-1bfa6d413ea6) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_tf.ipynb)
14 | 
15 | * [BERT-based Cross-Lingual Question Answering with DeepPavlov](https://towardsdatascience.com/bert-based-cross-lingual-question-answering-with-deeppavlov-704242c2ac6f) with [Colab notebook](https://colab.research.google.com/github/deepmipt/dp_notebooks/blob/master/DP_tf.ipynb)
16 | 
17 | 


--------------------------------------------------------------------------------
/python_pipelines.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "e32cab03",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "### Building insults detector using python\n",
  9 |     "\n",
 10 |     "The code below is an alternative to writing a [config file](https://github.com/deepmipt/DeepPavlov/blob/0.17.1/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json) and using it with \n",
 11 |     "\n",
 12 |     "```python\n",
 13 |     "model = build_model('insults_kaggle_bert_torch', download=True)\n",
 14 |     "```"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "d37bb701",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "from deeppavlov import Element, Model\n",
 25 |     "from deeppavlov.core.commands.utils import expand_path\n",
 26 |     "from deeppavlov.core.data.simple_vocab import SimpleVocabulary\n",
 27 |     "from deeppavlov.download import download_resource\n",
 28 |     "from deeppavlov.models.classifiers.proba2labels import Proba2Labels\n",
 29 |     "from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor\n",
 30 |     "from deeppavlov.models.torch_bert.torch_transformers_classifier import TorchTransformersClassifierModel\n",
 31 |     "\n",
 32 |     "\n",
 33 |     "model_path = expand_path('~/.deeppavlov/models/classifiers/insults_kaggle_torch_bert')\n",
 34 |     "\n",
 35 |     "# Downloading pretrained model\n",
 36 |     "download_resource(\n",
 37 |     "    'http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz',\n",
 38 |     "    {expand_path('~/.deeppavlov/models/classifiers')}\n",
 39 |     ")\n",
 40 |     "\n",
 41 |     "preprocessor = TorchTransformersPreprocessor(\n",
 42 |     "    vocab_file='bert-base-uncased',\n",
 43 |     "    do_lower_case=True,\n",
 44 |     "    max_seq_length=64\n",
 45 |     ")\n",
 46 |     "\n",
 47 |     "classes_vocab = SimpleVocabulary(\n",
 48 |     "    save_path=model_path/'classes.dict',\n",
 49 |     "    load_path=model_path/'classes.dict'\n",
 50 |     ")\n",
 51 |     "\n",
 52 |     "classifier = TorchTransformersClassifierModel(\n",
 53 |     "    n_classes=classes_vocab.len,\n",
 54 |     "    return_probas=True,\n",
 55 |     "    pretrained_bert='bert-base-uncased',\n",
 56 |     "    save_path=model_path/'model',\n",
 57 |     "    load_path=model_path/'model',\n",
 58 |     "    optimizer='AdamW',\n",
 59 |     "    optimizer_parameters={'lr': 1e-05},\n",
 60 |     "    learning_rate_drop_patience=5,\n",
 61 |     "    learning_rate_drop_div=2.0\n",
 62 |     ")\n",
 63 |     "\n",
 64 |     "proba2labels = Proba2Labels(max_proba=True)\n",
 65 |     "\n",
 66 |     "model = Model(\n",
 67 |     "    x=['x'],\n",
 68 |     "    out=['y_pred_labels'],\n",
 69 |     "    pipe=[\n",
 70 |     "        Element(component=preprocessor, x=['x'], out=['bert_features']),\n",
 71 |     "        Element(component=classifier, x=['bert_features'], out=['y_pred_probas']),\n",
 72 |     "        Element(component=proba2labels, x=['y_pred_probas'], out=['y_pred_ids']),\n",
 73 |     "        Element(component=classes_vocab, x=['y_pred_ids'], out=['y_pred_labels'])\n",
 74 |     "    ]\n",
 75 |     ")\n",
 76 |     "\n",
 77 |     "# Using model\n",
 78 |     "model(['you are stupid', 'you are smart'])"
 79 |    ]
 80 |   }
 81 |  ],
 82 |  "metadata": {
 83 |   "kernelspec": {
 84 |    "display_name": "Python 3",
 85 |    "language": "python",
 86 |    "name": "python3"
 87 |   },
 88 |   "language_info": {
 89 |    "codemirror_mode": {
 90 |     "name": "ipython",
 91 |     "version": 3
 92 |    },
 93 |    "file_extension": ".py",
 94 |    "mimetype": "text/x-python",
 95 |    "name": "python",
 96 |    "nbconvert_exporter": "python",
 97 |    "pygments_lexer": "ipython3",
 98 |    "version": "3.7.7"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 5
103 | }
104 | 


--------------------------------------------------------------------------------
/DP_BERT.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# The BERT-based models of DeepPavlov\n"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "[DeepPavlov](https://deeppavlov.ai/?utm_source=medium&utm_medium=article&utm_campaign=bert) is a conversational artificial intelligence framework that contains all the components required for building chatbots. DeepPavlov is developed on top of the open-source machine learning frameworks [TensorFlow](https://www.tensorflow.org/) and [Keras](https://keras.io/). Here, I will describe how to use the BERT-based models of DeepPavlov.\n"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "First, install DeepPavlov and all the model's requirements."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "!pip install -q deeppavlov"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## BERT for text classification"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "First, install all the model’s requirements."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "!python -m deeppavlov install rusentiment_bert"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "You can interact with the model using the command line, where "
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "!python -m deeppavlov interact rusentiment_bert -d"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "from deeppavlov import configs, build_model\n",
 79 |     "model = build_model(configs.classifiers.rusentiment_bert, download=True)\n",
 80 |     "model(['I like this game'])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "You can read more about BERT-based text classification models [here](http://docs.deeppavlov.ai/en/master/components/classifiers.html#bert-models)."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "## BERT for named entity recognition"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "!python -m deeppavlov install ner_ontonotes_bert\n",
104 |     "!python -m deeppavlov interact ner_ontonotes_bert -d"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "from deeppavlov import configs, build_model\n",
114 |     "ner_model = build_model(configs.ner.ner_ontonotes_bert_mult, download=True)\n",
115 |     "ner_model(['World Curling Championship will be held in Antananarivo'])"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "You can read more about DeepPavlov’s NER models [here](http://docs.deeppavlov.ai/en/master/components/ner.html)."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "## BERT for question answering"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "!python -m deeppavlov install squad_bert"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "from deeppavlov import build_model, configs\n",
148 |     "model = build_model(configs.squad.squad_bert, download=True)\n",
149 |     "model(['DeepPavlov is a library for NLP and dialogue systems.'], ['What is DeepPavlov?'])"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "You can find out more about DeepPavlov’s question answering models [here](http://docs.deeppavlov.ai/en/master/components/ner.html). In addition, [here](https://medium.com/deeppavlov/open-domain-question-answering-with-deeppavlov-c665d2ee4d65) you can read about our open-domain question answering (ODQA) component.\n"
157 |    ]
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "kernelspec": {
162 |    "display_name": "Python 3",
163 |    "language": "python",
164 |    "name": "python3"
165 |   },
166 |   "language_info": {
167 |    "codemirror_mode": {
168 |     "name": "ipython",
169 |     "version": 3
170 |    },
171 |    "file_extension": ".py",
172 |    "mimetype": "text/x-python",
173 |    "name": "python",
174 |    "nbconvert_exporter": "python",
175 |    "pygments_lexer": "ipython3",
176 |    "version": "3.6.7"
177 |   }
178 |  },
179 |  "nbformat": 4,
180 |  "nbformat_minor": 2
181 | }
182 | 


--------------------------------------------------------------------------------
/configs/mtl_3task.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "dataset_reader": {
  3 |     "class_name": "multitask_reader",
  4 |     "task_defaults": {
  5 |       "class_name": "huggingface_dataset_reader",
  6 |       "path": "glue",
  7 |       "train": "train",
  8 |       "valid": "validation",
  9 |       "test": "test"
 10 |     },
 11 |     "tasks": {
 12 |       "rte": {"name": "rte"},
 13 |       "copa": {
 14 |         "path": "super_glue",
 15 |         "name": "copa"
 16 |       },
 17 |       "conll": {
 18 |         "class_name": "conll2003_reader",
 19 |         "use_task_defaults": false,
 20 |         "data_path": "{DOWNLOADS_PATH}/conll2003/",
 21 |         "dataset_name": "conll2003",
 22 |         "provide_pos": false
 23 |       }
 24 |     }
 25 |   },
 26 |   "dataset_iterator": {
 27 |     "class_name": "multitask_iterator",
 28 |     "num_train_epochs": "{NUM_TRAIN_EPOCHS}",
 29 |     "gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
 30 |     "seed": 42,
 31 |     "task_defaults": {
 32 |       "class_name": "huggingface_dataset_iterator",
 33 |       "label": "label",
 34 |       "use_label_name": false,
 35 |       "seed": 42
 36 |     },
 37 |     "tasks": {
 38 |       "rte": {
 39 |         "features": ["sentence1", "sentence2"]
 40 |       },
 41 |       "copa": {
 42 |         "features": ["contexts", "choices"]
 43 |       },
 44 |       "conll": {
 45 |         "class_name": "basic_classification_iterator",
 46 |         "seed": 42,
 47 |         "use_task_defaults": false
 48 |       }
 49 |     }
 50 |   },
 51 |   "chainer": {
 52 |     "in": ["x_rte", "x_copa", "x_conll"],
 53 |     "in_y": ["y_rte", "y_copa", "y_conll"],
 54 |     "pipe": [
 55 |       {
 56 |         "class_name": "multitask_pipeline_preprocessor",
 57 |         "possible_keys_to_extract": [0, 1],
 58 |         "preprocessors": [
 59 |           "TorchTransformersPreprocessor",
 60 |           "TorchTransformersMultiplechoicePreprocessor",
 61 |           "TorchTransformersNerPreprocessor"
 62 |         ],
 63 |         "do_lower_case": true,
 64 |         "n_task": 3,
 65 |         "vocab_file": "{BACKBONE}",
 66 |         "max_seq_length": 200,
 67 |         "max_subword_length": 15,
 68 |         "token_masking_prob": 0.0,
 69 |         "return_features": true,
 70 |         "in": ["x_rte", "x_copa", "x_conll"],
 71 |         "out": [
 72 |           "bert_features_rte",
 73 |           "bert_features_copa",
 74 |           "bert_features_conll"
 75 |         ]
 76 |       },
 77 |       {
 78 |         "id": "vocab_conll",
 79 |         "class_name": "simple_vocab",
 80 |         "unk_token": ["O"],
 81 |         "pad_with_zeros": true,
 82 |         "save_path": "{MODELS_PATH}/tag.dict",
 83 |         "load_path": "{MODELS_PATH}/tag.dict",
 84 |         "fit_on": ["y_conll"],
 85 |         "in": ["y_conll"],
 86 |         "out": ["y_ids_conll"]
 87 |       },
 88 | 	  {
 89 |         "id": "multitask_transformer",
 90 |         "class_name": "multitask_transformer",
 91 |         "optimizer_parameters": {"lr": 2e-5},
 92 |         "gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
 93 |         "learning_rate_drop_patience": 2,
 94 |         "learning_rate_drop_div": 2.0,
 95 |         "return_probas": true,
 96 |         "backbone_model": "{BACKBONE}",
 97 |         "save_path": "{MODEL_PATH}",
 98 |         "load_path": "{MODEL_PATH}",
 99 |         "tasks": {
100 |           "rte": {
101 |             "type": "classification",
102 |             "options": 2
103 |           },
104 |           "copa": {
105 |             "type": "multiple_choice",
106 |             "options": 2
107 |           },
108 |           "conll": {
109 |             "type": "sequence_labeling",
110 |             "options": "#vocab_conll.len"
111 |           }
112 |         },
113 |         "in": [
114 |           "bert_features_rte",
115 |           "bert_features_copa",
116 |           "bert_features_conll"
117 |         ],
118 |         "in_y": ["y_rte", "y_copa", "y_ids_conll"],
119 |         "out": [
120 |           "y_rte_pred_probas",
121 |           "y_copa_pred_probas",
122 |           "y_conll_pred_ids"
123 |         ]
124 |       },
125 |       {
126 |         "in": ["y_rte_pred_probas"],
127 |         "out": ["y_rte_pred_ids"],
128 |         "class_name": "proba2labels",
129 |         "max_proba": true
130 |       },
131 |       {
132 |         "in": ["y_copa_pred_probas"],
133 |         "out": ["y_copa_pred_ids"],
134 |         "class_name": "proba2labels",
135 |         "max_proba": true
136 |       },
137 |       {
138 |         "in": ["y_conll_pred_ids"],
139 |         "out": ["y_conll_pred_labels"],
140 |         "ref": "vocab_conll"
141 |       }
142 |     ],
143 |     "out": ["y_rte_pred_ids", "y_copa_pred_ids", "y_conll_pred_labels"]
144 |   },
145 |   "train": {
146 |     "epochs": "{NUM_TRAIN_EPOCHS}",
147 |     "batch_size": 32,
148 |     "metrics": [
149 |       {
150 |         "name": "multitask_accuracy",
151 |         "inputs": ["y_rte", "y_copa", "y_rte_pred_ids", "y_copa_pred_ids"]
152 |       },
153 |       {
154 |         "name": "ner_f1",
155 |         "inputs": ["y_conll", "y_conll_pred_labels"]
156 |       },
157 |       {
158 |         "name": "ner_token_f1",
159 |         "inputs": ["y_conll", "y_conll_pred_labels"]
160 |       },
161 |       {
162 |         "name": "accuracy",
163 |         "alias": "accuracy_rte",
164 |         "inputs": ["y_rte", "y_rte_pred_ids"]
165 |       },
166 |       {
167 |         "name": "accuracy",
168 |         "alias": "accuracy_copa",
169 |         "inputs": ["y_copa", "y_copa_pred_ids"]
170 |       },
171 |     ],
172 |     "validation_patience": 3,
173 |     "log_every_n_epochs": 1,
174 |     "show_examples": false,
175 |     "evaluation_targets": ["valid"],
176 |     "class_name": "torch_trainer",
177 |     "pytest_max_batches": 2
178 |   },
179 |   "metadata": {
180 |     "variables": {
181 |       "ROOT_PATH": "~/.deeppavlov",
182 |       "MODELS_PATH": "{ROOT_PATH}/models/multitask_example",
183 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
184 |       "BACKBONE": "distilbert-base-uncased",
185 |       "MODEL_PATH": "{MODELS_PATH}/{BACKBONE}_3task",
186 |       "NUM_TRAIN_EPOCHS": 5,
187 |       "GRADIENT_ACC_STEPS": 1
188 |     }
189 |   }
190 | }
191 | 


--------------------------------------------------------------------------------
/faq_as_ranking.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 26,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from deeppavlov import build_model, configs\n",
 10 |     "from deeppavlov.core.common.file import read_json\n",
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 23,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "config = read_json(configs.ranking.ranking_ubuntu_v2_bert_sep_interact)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 35,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "config['chainer']['pipe'][0]['load_path']=\"/tmp\"\n",
 30 |     "config['chainer']['pipe'][0]['save_path']=\"/tmp\""
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 27,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "faq = pd.read_csv(\"http://files.deeppavlov.ai/faq/school/faq_school_en.csv\")"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 47,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "faq['Answer'].to_csv(\"/tmp/responses.csv\", header=False, index=False)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 48,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "faq['Question'].to_csv(\"/tmp/contexts.csv\", header=False, index=False)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 51,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stderr",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "2019-12-04 20:28:10.399 INFO in 'deeppavlov.models.preprocessors.bert_preprocessor'['bert_preprocessor'] at line 306: Building BERT features for the response base...\n",
 70 |       "I1204 20:28:10.399302 140108899854144 bert_preprocessor.py:306] Building BERT features for the response base...\n",
 71 |       "2019-12-04 20:28:10.429 INFO in 'deeppavlov.models.preprocessors.bert_preprocessor'['bert_preprocessor'] at line 310: Building BERT features for the context base...\n",
 72 |       "I1204 20:28:10.429014 140108899854144 bert_preprocessor.py:310] Building BERT features for the context base...\n",
 73 |       "2019-12-04 20:28:10.435 WARNING in 'deeppavlov.core.models.serializable'['serializable'] at line 43: Load path '/home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model/model' differs from save path '/home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model' in 'infer' mode for BertSepRankerPredictor.\n",
 74 |       "W1204 20:28:10.435892 140108899854144 serializable.py:43] Load path '/home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model/model' differs from save path '/home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model' in 'infer' mode for BertSepRankerPredictor.\n",
 75 |       "2019-12-04 20:28:24.413 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 52: [loading model from /home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model/model]\n",
 76 |       "I1204 20:28:24.413031 140108899854144 tf_model.py:52] [loading model from /home/com/.deeppavlov/models/ubuntu_v2_uncased_bert_sep_predictor_model/model]\n",
 77 |       "2019-12-04 20:28:25.388 INFO in 'deeppavlov.models.bert.bert_ranker'['bert_ranker'] at line 380: Building BERT vector representations for the response base...\n",
 78 |       "I1204 20:28:25.388360 140108899854144 bert_ranker.py:380] Building BERT vector representations for the response base...\n",
 79 |       "2019-12-04 20:28:29.820 INFO in 'deeppavlov.models.bert.bert_ranker'['bert_ranker'] at line 388: Building BERT vector representations for the context base...\n",
 80 |       "I1204 20:28:29.820471 140108899854144 bert_ranker.py:388] Building BERT vector representations for the context base...\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "ranking = build_model(config, download=False)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 53,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "data": {
 95 |       "text/plain": [
 96 |        "[['\"Life and health insurance are obligatory for any foreign citizen arriving in Russia to study. The cost of life and health insurance is 8,200 rubles per year. A student needs to carry the insurance policy specifying the phone number of the insurance company and the emergency health service at all times. All Russian universities have medical offices for first aid and general medical care.\"'],\n",
 97 |        " [0.9010574817657471]]"
 98 |       ]
 99 |      },
100 |      "execution_count": 53,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "ranking([\"what about insurance?\"])"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 55,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "[['\"The preparatory course is a special educational program lasting one academic year — that is, between seven and 10 months. Students taking this course study Russian, mathematics, and physics. The course ends with an exam, and the passing students receive a certificate of completion.\"'],\n",
118 |        " [0.8985198736190796]]"
119 |       ]
120 |      },
121 |      "execution_count": 55,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "ranking([\"Do you have a preparatory courses?\"])"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.9"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 4
159 | }
160 | 


--------------------------------------------------------------------------------
/yandex_faq.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Автоматизация ответов на часто задаваемые вопросы в навыке для \"Алисы\" с помощью библиотеки DeepPavlov\n",
  8 |     "\n",
  9 |     "В этом туториале я расскажу как разработать и подключить навык классификации текста на основе библиотеки **DeepPavlov** к **Яндекс.Алиса**. Наш навык сможет приветствовать, прощаться и отвечать на вопросы (на основе списка FAQ - часто задаваемых вопросов). Более детальное руководство по классификации текста на основе библиотеки DeepPavlov вы сможете найти в статье [Simple intent recognition and question answering with DeepPavlov](https://medium.com/deeppavlov)."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Установка\n",
 17 |     "\n",
 18 |     "Для начала установите Python 3.6 и активируйте среду разработки. Затем скачайте библиотеку DeepPavlov."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "!source activate py36\n",
 28 |     "!pip install -q deeppavlov"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Разработка"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Создайте скиллы для приветствия, прощания, и скилл на случай если запрос пользователя не удовлетворяет ни одной из категорий. Параметр `responses` определяет варианты ответа навыка на запросы из параметра `patters`. Если при определении навыка параметр `pattens` не задан, то этот навык становится навыком \"заглушкой\", то есть он вызывается в том случае если ни один из навыков не сработал. Параметр `default_confidence` задает минимальный порог уверенности при котором навык-заглушка сработает."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "from deeppavlov.skills.pattern_matching_skill import PatternMatchingSkill\n",
 52 |     "\n",
 53 |     "hello = PatternMatchingSkill(responses=['Привет', 'Приветствую'], patterns=['Привет', 'Здравствуйте'])\n",
 54 |     "bye = PatternMatchingSkill(responses=['Пока', 'Всего доброго'], patterns=['Пока', 'До свидания'])\n",
 55 |     "fallback = PatternMatchingSkill(responses=['Пожалуйста перефразируйте'], default_confidence = 0.3)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "Создайте навык класса `SimilarityMatchingSkill`, который отвечает на запрос пользователя на основе списка часто задаваемых вопросов. Объект имеет следующие параметры\n",
 63 |     "* `data_path` - путь к csv файлу с данными\n",
 64 |     "* `x_col_name` - имя колонки с вопросами в csv файле (Question, по умолчанию)\n",
 65 |     "* `y_col_name` - имя колонки с ответами в csv файле (Answer, по умолчанию)\n",
 66 |     "* `edit_dict` - `dict` с параметрами конфигурации для перезаписи\n",
 67 |     "* `save_load_path` - путь куда сохранить натренированную модель\n",
 68 |     "* `train` - тренировать ли модель?"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "from deeppavlov.contrib.skills.similarity_matching_skill import SimilarityMatchingSkill\n",
 78 |     "\n",
 79 |     "faq = SimilarityMatchingSkill(data_path = 'http://files.deeppavlov.ai/faq/dataset_ru.csv',\n",
 80 |     "                              x_col_name = 'Question', \n",
 81 |     "                              y_col_name = 'Answer',\n",
 82 |     "                              save_load_path = './model',\n",
 83 |     "                              config_type = 'tfidf_autofaq',\n",
 84 |     "                              train = True)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "`SimilarityMatchingSkill` это класс навыка, который позволяет классифицировать вопросы пользователя. Натренировав модель и сохранив в `save_load_path`, вы можете использовать ее указав путь загрузки `faq = SimilarityMatchingSkill(load_path='./model')`."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "Последний шаг объединить все навыки в агент, и настроить параметр выбора навыка. Параметр `HighestConfidenceSelector` определяет, что будет вызван навык с наивысшей уверенностью (`confidence`)."
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "from deeppavlov.agents.default_agent.default_agent import DefaultAgent\n",
108 |     "from deeppavlov.agents.processors.highest_confidence_selector import HighestConfidenceSelector\n",
109 |     "\n",
110 |     "agent = DefaultAgent([hello, bye, faq, fallback], skills_selector=HighestConfidenceSelector())"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "Убедитесь, что агент корректо отвечает на запросы."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "print(agent(['Привет', 'где будет школа?', 'как получить задание?', 'мне нужна помощь', 'Пока']))"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Далее запустите сервер с указанием пути для запросов `endpoint='faq'` и порта подключения `port=5000`"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "from deeppavlov.utils.alice import start_agent_server\n",
143 |     "\n",
144 |     "start_agent_server(agent, host='0.0.0.0', port=5000, endpoint='/faq')"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Обратите внимание, что Яндекс.Диалоги в качестве **Webhook URL** требует указывать сервер с внешним IP адресом и доступом по протоколу https. Для быстрого прототипирования вы можете использовать [ngrok](https://ngrok.com/). **Ngrok** позволит вам создавать туннель для доступа к вашему серверу с **DeepPavlov** в локальной сети, для этого запустите `ngrok http 5000` на вашем сервере с DeepPavlov. В ответ на это будет создано два туннеля, по одному на протоколы http и https. Скопируйте адрес туннеля для https, добавьте к линку эндпоинт `/faq`, итоговый линк будет **Webhook URL** для нашего Яндекс.Диалога. Далее заполните поля необходимые для сохранения черновика диалога. Сохраните черновик и перейдите на вкладку Тестирование."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "# Ссылки\n",
159 |     "\n",
160 |     "[DeepPavlov repository](https://github.com/deepmipt/DeepPavlov)\n",
161 |     "\n",
162 |     "[DeepPavlov demo page](https://demo.ipavlov.ai)\n",
163 |     "\n",
164 |     "[DeepPavlov documentation](https://docs.deeppavlov.ai)\n",
165 |     "\n",
166 |     "[DeepPavlov blog](https://medium.com/deeppavlov)\n",
167 |     "\n",
168 |     "[DeepPavlov forum](https://forum.ipavlov.ai)"
169 |    ]
170 |   }
171 |  ],
172 |  "metadata": {
173 |   "kernelspec": {
174 |    "display_name": "Python 3",
175 |    "language": "python",
176 |    "name": "python3"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.6.8"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 2
193 | }
194 | 


--------------------------------------------------------------------------------
/examples/Pseudo-labeling for classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import json\n",
 10 |     "from pathlib import Path\n",
 11 |     "import numpy as np\n",
 12 |     "from copy import deepcopy\n",
 13 |     "import pandas as pd\n",
 14 |     "\n",
 15 |     "from deeppavlov.core.commands.train import read_data_by_config, train_evaluate_model_from_config\n",
 16 |     "from deeppavlov.core.commands.infer import interact_model, build_model\n",
 17 |     "from deeppavlov.core.commands.utils import expand_path, parse_config\n",
 18 |     "from deeppavlov.core.common.params import from_params\n",
 19 |     "from deeppavlov.core.common.errors import ConfigError"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# read unlabelled data for label propagation\n",
 29 |     "def read_unlabelled_data(UNLABELLED_DATA_PATH):\n",
 30 |     "    with open(UNLABELLED_DATA_PATH, \"r\") as f:\n",
 31 |     "        unlabelled_data = f.read().splitlines()\n",
 32 |     "    unlabelled_data = [x for x in unlabelled_data if x != '']\n",
 33 |     "    return unlabelled_data"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "scrolled": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "def make_pl_config(CONFIG_PATH):\n",
 45 |     "    config_path_pl = Path(CONFIG_PATH).parent / Path(Path(CONFIG_PATH).stem + \"_pl.json\")\n",
 46 |     "\n",
 47 |     "    with open(CONFIG_PATH, \"r\") as f:\n",
 48 |     "        config = json.load(f)\n",
 49 |     "    \n",
 50 |     "    config_pl = deepcopy(config)\n",
 51 |     "    config_pl[\"dataset_reader\"][\"train\"] = Path(config_pl[\"dataset_reader\"].get(\"train\", \"train.csv\")).stem + \"_pl.csv\"\n",
 52 |     "    \n",
 53 |     "    with open(config_path_pl, \"w\") as f:\n",
 54 |     "        json.dump(config_pl, f, indent=2)\n",
 55 |     "    \n",
 56 |     "    return config, config_pl"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "def save_extended_data(config, samples, labels, new_config = None):\n",
 66 |     "    train_data = read_data_by_config(deepcopy(config))\n",
 67 |     "    \n",
 68 |     "    for i in range(len(samples)):\n",
 69 |     "        train_data[\"train\"].append((samples[i], labels[i]))\n",
 70 |     "    df = pd.DataFrame(train_data[\"train\"], \n",
 71 |     "                      columns=[config[\"dataset_reader\"][\"x\"], \n",
 72 |     "                               config[\"dataset_reader\"][\"y\"]])\n",
 73 |     "    df[config[\"dataset_reader\"][\"y\"]] = df[config[\"dataset_reader\"][\"y\"]].apply(\n",
 74 |     "        lambda x: config[\"dataset_reader\"].get(\"class_sep\", \",\").join(x))\n",
 75 |     "    \n",
 76 |     "    if new_config is not None:\n",
 77 |     "        config = new_config\n",
 78 |     "    file = expand_path(Path(config[\"dataset_reader\"][\"data_path\"]) / \n",
 79 |     "                       Path(config[\"dataset_reader\"][\"train\"]))\n",
 80 |     "\n",
 81 |     "    if config[\"dataset_reader\"].get(\"format\", \"csv\") == \"csv\":\n",
 82 |     "        keys = ('sep', 'header', 'names')\n",
 83 |     "        df.to_csv(file, \n",
 84 |     "                  index=False,\n",
 85 |     "                  sep=config[\"dataset_reader\"].get(\"sep\", \",\")\n",
 86 |     "                 )\n",
 87 |     "    elif config[\"dataset_reader\"].get(\"format\", \"csv\") == \"json\":\n",
 88 |     "        keys = ('orient', 'lines')\n",
 89 |     "        df.to_json(file, \n",
 90 |     "                  index=False,\n",
 91 |     "                  orient=config[\"dataset_reader\"].get(\"orient\", None),\n",
 92 |     "                  lines=config[\"dataset_reader\"].get(\"lines\", False)\n",
 93 |     "                  )\n",
 94 |     "    else:\n",
 95 |     "        raise ConfigError(\"Can not work with current data format\")"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "scrolled": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# manually given parameters for pseudo-labeling\n",
107 |     "\n",
108 |     "# path to config file\n",
109 |     "CONFIG_PATH = \"../deeppavlov/configs/classifiers/convers_vs_info.json\"\n",
110 |     "# read config, compose new one, save it\n",
111 |     "config, config_pl = make_pl_config(CONFIG_PATH)\n",
112 |     "config, config_pl = parse_config(config), parse_config(config_pl)\n",
113 |     "config"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "# path to file with unlabelled data\n",
123 |     "UNLABELLED_DATA_PATH = expand_path(Path(config[\"dataset_reader\"][\"data_path\"])) / Path(\"question_L6.txt\")\n",
124 |     "# number of samples that are going to be labelled during one iteration of label propagation\n",
125 |     "ONE_ITERATION_PORTION = 100\n",
126 |     "# number of iterations\n",
127 |     "N_ITERATIONS = 10\n",
128 |     "CLASSES_VOCAB_ID_IN_PIPE = 0\n",
129 |     "CONFIDENT_PROBA = 0.9"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# read unlabelled dataset\n",
139 |     "unlabelled_data = read_unlabelled_data(UNLABELLED_DATA_PATH)\n",
140 |     "\n",
141 |     "# save initial dataset as extended\n",
142 |     "save_extended_data(config, [], [], new_config=config_pl)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "scrolled": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "available_unlabelled_ids = np.arange(len(unlabelled_data))\n",
154 |     "\n",
155 |     "np.random.seed(42)\n",
156 |     "\n",
157 |     "for i in range(N_ITERATIONS):\n",
158 |     "    samples = []\n",
159 |     "    labels = []\n",
160 |     "    \n",
161 |     "    ids_to_label = available_unlabelled_ids[\n",
162 |     "        np.random.randint(low=0, \n",
163 |     "                          high=len(available_unlabelled_ids), \n",
164 |     "                          size=ONE_ITERATION_PORTION)]\n",
165 |     "    available_unlabelled_ids = np.delete(available_unlabelled_ids, ids_to_label)\n",
166 |     "    train_evaluate_model_from_config(deepcopy(config_pl))\n",
167 |     "    model = build_model(deepcopy(config_pl))\n",
168 |     "    classes = np.array(list(from_params(\n",
169 |     "        deepcopy(config_pl[\"chainer\"][\"pipe\"][CLASSES_VOCAB_ID_IN_PIPE])).keys()))\n",
170 |     "\n",
171 |     "    for j, sample_id in enumerate(ids_to_label):\n",
172 |     "        prediction = model([unlabelled_data[sample_id]])[0]\n",
173 |     "        if len(np.where(np.array(prediction) > CONFIDENT_PROBA)[0]):\n",
174 |     "            samples.append(unlabelled_data[sample_id])\n",
175 |     "            labels.append(classes[np.where(np.array(prediction) > CONFIDENT_PROBA)])\n",
176 |     "    \n",
177 |     "    print(\"Iteration {}: add {} samples to train dataset\".format(i, len(samples)))\n",
178 |     "    save_extended_data(config_pl, samples, labels)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": []
187 |   }
188 |  ],
189 |  "metadata": {
190 |   "kernelspec": {
191 |    "display_name": "Python 3",
192 |    "name": "python3"
193 |   },
194 |   "accelerator": "GPU",
195 |   "language_info": {
196 |    "codemirror_mode": {
197 |     "name": "ipython",
198 |     "version": 3
199 |    },
200 |    "file_extension": ".py",
201 |    "mimetype": "text/x-python",
202 |    "name": "python",
203 |    "nbconvert_exporter": "python",
204 |    "pygments_lexer": "ipython3",
205 |    "version": "3.6.6"
206 |   }
207 |  },
208 |  "nbformat": 4,
209 |  "nbformat_minor": 2
210 | }
211 | 


--------------------------------------------------------------------------------
/DP_NER.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "colab_type": "text",
  7 |     "id": "7o7d-wdpgrUo"
  8 |    },
  9 |    "source": [
 10 |     "## Распознавание Именованных Сущностей с помощью библиотеки DeepPavlov\n",
 11 |     "---\n",
 12 |     "\n",
 13 |     "<img align=\"center\" height=\"27%\" width=\"27%\" src=\"https://deeppavlov.ai/docs/_static/ipavlov_logo.png\"/>\n",
 14 |     "\n",
 15 |     "\n",
 16 |     "Системы Распознавания Именованных Сущностей (NER) занимаются извлечение из текста таких объектов как \n",
 17 |     "**имена**, **названия организаций**, **названия географических объектов**. Данная задача как правило является компонентом в более крупной системе. Например, в диалоговой системе NER может быть использован для выделения имени собеседника. В библиотеке [DeepPavlov](https://github.com/deepmipt/DeepPavlov) есть ряд моделей которые решают данную задачу. Мы рассмотрим две модели решающие задачу NER на русском языке: [BERT](https://arxiv.org/pdf/1810.04805.pdf), показывающий на данный момент наилучшее качество, и [Bi-LSTM-CRF](https://arxiv.org/pdf/1603.01360.pdf), который несколько уступает в качестве при этом превосходит в скорости.\n",
 18 |     "\n",
 19 |     "[NER Demo](https://demo.ipavlov.ai/#/ru/ner)"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {
 25 |     "colab_type": "text",
 26 |     "id": "Bh3IQdJSgrUx"
 27 |    },
 28 |    "source": [
 29 |     "## Постановка задачи\n",
 30 |     "\n",
 31 |     "Задача НЕР может быть поставлена следующим образом: для заданной последовательность слов предсказать последовательность меток. Каждому входному слову сопоставляется метка из заданного множества меток. Пример: \n",
 32 |     " \n",
 33 |     "    Саша живет в Нижнем Новгороде\n",
 34 |     "    PER    O   O   LOC     LOC\n",
 35 |     "\n",
 36 |     "здесь **PER** - персона, **LOC** - локация. Однако, представленная разметка не позволяет разделять подряд идущие сущности. Для разделения таких сущностей используют префиксы B и I перед каждой меткой кроме O. Префикс B обозначает начало сущности, а I - продолжение. Тогда для примера выше будет следующая разметка:\n",
 37 |     "\n",
 38 |     "    Саша живет в Нижнем Новгороде\n",
 39 |     "    PER    O   O  B-LOC    I-LOC\n",
 40 |     "\n",
 41 |     "Разметка с префиксами B и O - наиболее распространённый способ разметки данных. Данный тип разметки часто называют **BIO** или **IOB**."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {
 47 |     "colab_type": "text",
 48 |     "id": "t6sLDEkYgrU0"
 49 |    },
 50 |    "source": [
 51 |     "## Dataset\n",
 52 |     "\n",
 53 |     "Рассматриваемые в данном notebook-е модели были обучены на датасете [1]. Данный датасет содержит 1000 новостей в которых размечены персоны (PER), локации (LOC) и организации (ORG). В силу того, что обучающая выборка содержит только новостные данные смена типов распознаваемых текстов может существенно отразиться на качестве работы системы. Например, при использовании модели обученной на новостях переход к распознавания диалогов службы поддрежки может существенно снизить качество работы системы. \n",
 54 |     "\n",
 55 |     "1. Mozharova V., Loukachevitch N., Two-stage approach in Russian named entity recognition // International FRUCT Conference on Intelligence, Social Media and Web, ISMW FRUCT 2016. Saint-Petersburg; Russian Federation, DOI 10.1109/FRUCT.2016.7584769 "
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {
 61 |     "colab_type": "text",
 62 |     "id": "GF4cJjNwgrU4"
 63 |    },
 64 |    "source": [
 65 |     "## Установка библиотеки"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "colab": {},
 73 |     "colab_type": "code",
 74 |     "id": "kXn5v0kzgrU7"
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "!pip install -q deeppavlov"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {
 84 |     "colab_type": "text",
 85 |     "id": "lgiy3YIdgrVL"
 86 |    },
 87 |    "source": [
 88 |     "## Установка зависимостей"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "colab": {},
 96 |     "colab_type": "code",
 97 |     "id": "EvqfR_9wgrVP",
 98 |     "scrolled": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "!python -m deeppavlov install ner_rus\n",
103 |     "!python -m deeppavlov install ner_rus_bert"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {
109 |     "colab_type": "text",
110 |     "id": "TCT7n3N2grVb"
111 |    },
112 |    "source": [
113 |     "## Использование моделей \n",
114 |     "\n",
115 |     "### BERT \n",
116 |     "\n",
117 |     "BERT - сеть архитектуры Transformer предобученная на задаче Masked Language Modelling (MLM). Модель осуществляющая НЕР использует [RuBERT](https://arxiv.org/abs/1905.07213) предобученный на русском языке. "
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "colab": {},
125 |     "colab_type": "code",
126 |     "id": "FG87RYHUgrVf"
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "from deeppavlov import configs, build_model\n",
131 |     "\n",
132 |     "ner = build_model(configs.ner.ner_rus_bert, download=True)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "colab": {},
140 |     "colab_type": "code",
141 |     "id": "9v3Fb_J7grVp"
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "tokens, tags = ner(['Саша живет в Нижнем Новгороде'])\n",
146 |     "for tok, tag in zip(tokens[0], tags[0]):\n",
147 |     "    print(f'{tok}\\t{tag}')"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {
153 |     "colab_type": "text",
154 |     "id": "7x5rfJGPgrV0"
155 |    },
156 |    "source": [
157 |     "### Bi-LSTM-CRF\n",
158 |     "Архитектура [Bi-LSTM-CRF](https://arxiv.org/pdf/1603.01360.pdf) проще BERT как по памяти, так и по сложности вычисления. Данная архитектура уступает в качетсве BERT, однако, работает заметно быстрее."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {
165 |     "colab": {},
166 |     "colab_type": "code",
167 |     "id": "NoB3OqHigrV9"
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "ner = build_model(configs.ner.ner_rus, download=True)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "colab": {},
179 |     "colab_type": "code",
180 |     "id": "hgMuVlNGgrWI"
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "tokens, tags = ner(['Саша живет в Нижнем Новгороде'])\n",
185 |     "for tok, tag in zip(tokens[0], tags[0]):\n",
186 |     "    print(f'{tok}\\t{tag}')"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {
192 |     "colab_type": "text",
193 |     "id": "mK3EA_UegrWS"
194 |    },
195 |    "source": [
196 |     "## DeepPavlov documentation: \n",
197 |     "\n",
198 |     "### http://docs.deeppavlov.ai/en/master/components/ner.html\n",
199 |     "\n",
200 |     "## GitHub Repository\n",
201 |     "\n",
202 |     "### https://github.com/deepmipt/DeepPavlov\n",
203 |     "\n",
204 |     "## DeepPavlov demo page\n",
205 |     "\n",
206 |     "### https://demo.ipavlov.ai\n",
207 |     "\n",
208 |     "## Forum\n",
209 |     "\n",
210 |     "### https://forum.ipavlov.ai\n"
211 |    ]
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "colab": {
216 |    "name": "DeepPavlov NER Alice.ipynb",
217 |    "provenance": [],
218 |    "version": "0.3.2"
219 |   },
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.6.8"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 2
240 | }
241 | 


--------------------------------------------------------------------------------
/DP_hello_bot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "colab_type": "text",
  7 |     "id": "b1ncTZNSDi-e"
  8 |    },
  9 |    "source": [
 10 |     "# DeepPavlov: Hello bot!\n",
 11 |     "\n",
 12 |     "\n",
 13 |     "This notebook contains essential information on how to a build pattern-matching chat-bot with the [DeepPavlov](https://github.com/deepmipt/DeepPavlov) framework. DeepPavlov is an open-source conversational AI framework. Before delving into the code it would be useful to explain at a high level how DeepPavlov works. \n",
 14 |     "\n",
 15 |     "The smallest building block of the library is **Component**. Component stands for any kind of function in an NLP pipeline. It can be implemented as a neural network, a non-neural ML model or a rule-based system. Besides that, Component can have a nested structure, i.e. Component can include other Components.\n",
 16 |     "\n",
 17 |     "Components can be joined into **Skill**. Skill solves a larger NLP task compared to Component. However, in terms of implementation Skills are not different from Components. \n",
 18 |     "\n",
 19 |     "**Agent** is supposed to be a multi-purpose dialogue system that comprises several Skills and can switch between them. It can be a dialogue system that contains a goal-oriented and chatbot skills and chooses which one to use for generating the answer depending on an user input.\n",
 20 |     "\n",
 21 |     "Now you know enough theory to implement an introductory level chatbot with the open-source DeepPavlov framework."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {
 27 |     "colab_type": "text",
 28 |     "id": "koWvWtkyE6cz"
 29 |    },
 30 |    "source": [
 31 |     "# Requirements\n",
 32 |     "\n",
 33 |     "First, install all required packages"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "colab": {
 41 |      "base_uri": "https://localhost:8080/",
 42 |      "height": 289
 43 |     },
 44 |     "colab_type": "code",
 45 |     "id": "YAM0K6D0E7OJ",
 46 |     "outputId": "bba84c3e-8714-4a09-d14c-6461c05b69f5"
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "!pip install deeppavlov"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {
 56 |     "colab_type": "text",
 57 |     "id": "8AHT9VCPFCmX"
 58 |    },
 59 |    "source": [
 60 |     "# Hello bot!"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {
 66 |     "colab_type": "text",
 67 |     "id": "SLD0l8RRDi-m"
 68 |    },
 69 |    "source": [
 70 |     "Possibly the most delightful part of discovering a new programming framework is the \"Hello, world!\" example. Our HelloBot will be able to recognize a greetings message and reply with * \"Hello world!\"* In addition, upon receiving a goodbye message, it will respond with one of the predefined goodbye messages.  Otherwise, it will reply with the message *\"I don’t understand, sorry\"*."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 1,
 76 |    "metadata": {
 77 |     "colab": {},
 78 |     "colab_type": "code",
 79 |     "id": "9MF7LQ51Di-n"
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "from deeppavlov.skills.pattern_matching_skill import PatternMatchingSkill\n",
 84 |     "from deeppavlov.agents.default_agent.default_agent import DefaultAgent\n",
 85 |     "from deeppavlov.agents.processors.highest_confidence_selector import HighestConfidenceSelector"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {
 91 |     "colab_type": "text",
 92 |     "id": "UPIvQ3KEDi-r"
 93 |    },
 94 |    "source": [
 95 |     "Then create Skills with corresponding patterns and predefined responses.\n",
 96 |     "\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 2,
102 |    "metadata": {
103 |     "colab": {},
104 |     "colab_type": "code",
105 |     "id": "m0Gs3NqeDi-s"
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "hello = PatternMatchingSkill(responses=['Hello world!'], patterns=[\"hi\", \"hello\", \"good day\"])\n",
110 |     "bye = PatternMatchingSkill(['Goodbye world!', 'See you around'], patterns=[\"bye\", \"chao\", \"see you\"])\n",
111 |     "fallback = PatternMatchingSkill([\"I don't understand, sorry\"])"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {
117 |     "colab_type": "text",
118 |     "id": "F8zEWcn-Di-v"
119 |    },
120 |    "source": [
121 |     "The Agent executes Skills and then takes the highest-confidence responses from them.\n"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 3,
127 |    "metadata": {
128 |     "colab": {},
129 |     "colab_type": "code",
130 |     "id": "mJ5NIu4MDi-x"
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "agent = DefaultAgent([hello, bye, fallback], skills_selector=HighestConfidenceSelector())"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {
140 |     "colab_type": "text",
141 |     "id": "oHIZ5X7WDi-z"
142 |    },
143 |    "source": [
144 |     "Give the floor to the HelloBot!"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 7,
150 |    "metadata": {
151 |     "colab": {},
152 |     "colab_type": "code",
153 |     "id": "NwvROayNDi-1"
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "[\"I don't understand, sorry\", 'See you around', \"I don't understand, sorry\"]"
160 |       ]
161 |      },
162 |      "execution_count": 7,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "agent(['HellSo deAR', 'Bye', 'Or not'])"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {
174 |     "colab_type": "text",
175 |     "id": "IXJeRGjWIwib"
176 |    },
177 |    "source": [
178 |     "In addition to the exact text matching, our framework supports regular expressions."
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "colab": {},
186 |     "colab_type": "code",
187 |     "id": "LIHCRCM0Ihvw"
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "hello = PatternMatchingSkill(responses=[\"Hello world!\"], patterns=[\"(hi|hello|good day)\"], regex = True)\n",
192 |     "sorry = PatternMatchingSkill(responses=[\"don’t be sorry\", \"Please don’t\"], patterns=[\"(sorry|excuse)\"], regex = True)\n",
193 |     "perhaps = PatternMatchingSkill(responses=[\"Please be more specific\"], patterns=[\"(.*)perhaps(.*)\"], regex = True)\n",
194 |     "\n",
195 |     "agent = DefaultAgent([hello, sorry, perhaps], skills_selector=HighestConfidenceSelector())\n",
196 |     "agent(['hi, how are you', 'I am sorry', 'perhaps I am not sure'])"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {
202 |     "colab_type": "text",
203 |     "id": "bpRVgfN2GkvA"
204 |    },
205 |    "source": [
206 |     "Do not hesitate to add more Skills in the same manner. Now you know how to create simple rule-based chatbots by using the DeepPavlov framework. In the next article, we will describe techniques that will help you develop more advanced chatbots.\n"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {
212 |     "colab_type": "text",
213 |     "id": "t4YWv_96Ftdv"
214 |    },
215 |    "source": [
216 |     "# Useful links\n",
217 |     "\n",
218 |     "[DeepPavlov repository](https://github.com/deepmipt/DeepPavlov)\n",
219 |     "\n",
220 |     "[DeepPavlov demo page](https://demo.ipavlov.ai)\n",
221 |     "\n",
222 |     "[DeepPavlov documentation](https://docs.deeppavlov.ai)"
223 |    ]
224 |   }
225 |  ],
226 |  "metadata": {
227 |   "colab": {
228 |    "collapsed_sections": [],
229 |    "name": "DP:hello_bot.ipynb",
230 |    "provenance": [],
231 |    "version": "0.3.2"
232 |   },
233 |   "kernelspec": {
234 |    "display_name": "Python 3",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.6.4"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 2
253 | }
254 | 


--------------------------------------------------------------------------------
/DP_autoFAQ.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "colab_type": "text",
  7 |     "id": "B7fbDvssiBrV"
  8 |    },
  9 |    "source": [
 10 |     "# Simple intent recognition and question answering with DeepPavlov\n",
 11 |     "\n",
 12 |     "This notebook consists of the [DeepPavlov](https://github.com/deepmipt/DeepPavlov) code snippets. The snippets show how to interact with the text classification models that were specifically developed to be effective when training data is limited. The popular use case scenario for these models is to classify user utterances into one of the FAQ questions and retrieve the corresponding answer (autoFAQ models). As a testbed, we use the students’ FAQ from the [MIPT website](https://mipt.ru/english/). The FAQ contains the most popular first-year students' questions with corresponding answers.\n",
 13 |     "The framework allows you to train models, fine-tune hyperparameters, and test models."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {
 19 |     "colab_type": "text",
 20 |     "id": "x_hoGWCfhxLh"
 21 |    },
 22 |    "source": [
 23 |     "# Requirements\n",
 24 |     "\n",
 25 |     "First, install all required packages"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "colab": {
 33 |      "base_uri": "https://localhost:8080/",
 34 |      "height": 683
 35 |     },
 36 |     "colab_type": "code",
 37 |     "id": "qhvF6Petcd0r",
 38 |     "outputId": "ec2f36c8-adc2-415d-cc1f-24031bc8d9ed"
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "!pip install -q deeppavlov"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {
 48 |     "colab_type": "text",
 49 |     "id": "EVWji-oNOl2I"
 50 |    },
 51 |    "source": [
 52 |     "# Model Description\n",
 53 |     "\n",
 54 |     "DeepPavlov contains several text classification models that work well on few training pairs. All the models are based on two major text representations: fastText word embeddings and tf-idf representation. The models described in the separated configuration files under the [config/faq](https://github.com/deepmipt/DeepPavlov/tree/master/deeppavlov/configs/faq) folder. The config file consists of four main sections: **dataset_reader**, **dataset_iterator**, **chainer**, and **train**.\n",
 55 |     "\n",
 56 |     "The **dataset_iterator** specifies how to split the data into train, valid, test sets. The **chainer** section of the configuration files contains a pipeline of the required components to interact with the models, i.e. tokenizer, lemmatizer, tf-idf vectorizer, and others. The tokenizer splits a string into tokens, lemmatizer converts all tokens into lemmas. The tf-idf vectorizer transforms the lemmas into tf-idf vectors. The component’s input and output are defined in the **in** and **out** keys correspondingly.\n",
 57 |     "\n",
 58 |     "Run the following cell to see a [configuration file](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/faq/tfidf_logreg_en_faq.json) based on logistic regression."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "%load https://raw.githubusercontent.com/deepmipt/DeepPavlov/master/deeppavlov/configs/faq/tfidf_logreg_en_faq.json"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {
 73 |     "colab_type": "text",
 74 |     "id": "u1hsQECYf5yX"
 75 |    },
 76 |    "source": [
 77 |     "# Interacting with the model\n",
 78 |     "\n",
 79 |     "The DeepPavlov framework contains several models pre-trained on the aforementioned MIPT FAQ corpus. The files with the pre-trained models defined in the **metadata: download** section of the model's configuration file. You can interact with the model by running it from the command line with **interact** parameter and the name of  the model's configuration file (-d indicates to download all required files). But first, **install** all the model requirements."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "colab": {
 87 |      "base_uri": "https://localhost:8080/",
 88 |      "height": 2111
 89 |     },
 90 |     "colab_type": "code",
 91 |     "id": "UydjsTwLfryF",
 92 |     "outputId": "17386d79-1c3b-4739-b502-67e2432a0b20"
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "!python -m deeppavlov install tfidf_logreg_en_faq"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "colab": {
104 |      "base_uri": "https://localhost:8080/",
105 |      "height": 615
106 |     },
107 |     "colab_type": "code",
108 |     "id": "H_YQYuj2_Xpx",
109 |     "outputId": "678c140a-74fb-4187-b70c-4b441c2c84c2"
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "!python -m deeppavlov interact tfidf_logreg_en_faq -d"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {
119 |     "colab_type": "text",
120 |     "id": "48TD-v-FO25O"
121 |    },
122 |    "source": [
123 |     "Alternatively, you can **build_model** from the Python code as on the example below. In addition, please make sure that you can navigate the configuration files by using Autocomplete (Tab key) with **configs** module."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "cellView": "code",
131 |     "colab": {
132 |      "base_uri": "https://localhost:8080/",
133 |      "height": 224
134 |     },
135 |     "colab_type": "code",
136 |     "id": "wMZqyzBYc2eV",
137 |     "outputId": "14910e68-f733-47bf-91d1-1c7646f79e6b"
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "from deeppavlov import configs\n",
142 |     "from deeppavlov.core.common.file import read_json\n",
143 |     "from deeppavlov.core.commands.infer import build_model\n",
144 |     "\n",
145 |     "faq = build_model(configs.faq.tfidf_logreg_en_faq, download = True)\n",
146 |     "a = faq([\"I need help\"])\n",
147 |     "a"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {
153 |     "colab_type": "text",
154 |     "id": "TJ_Uqj7K52En"
155 |    },
156 |    "source": [
157 |     "# Training the model\n",
158 |     "\n",
159 |     "You can train a model by running the library with **train** parameter, wherein the model will be trained on the dataset defined in the dataset_reader section of the configuration file. If **metrics** key along with either **validate_best** or **test_best** is defined in the train section, the model will be validated/tested on the corresponding set in the dataset_iterator section."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {
166 |     "colab": {},
167 |     "colab_type": "code",
168 |     "id": "xfOBVt_U5zE4"
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "!python -m deeppavlov train tfidf_logreg_en_faq"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {
178 |     "colab_type": "text",
179 |     "id": "93GqENh-PAHc"
180 |    },
181 |    "source": [
182 |     "Let's modify the training data and retrain the model."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {
189 |     "colab": {},
190 |     "colab_type": "code",
191 |     "id": "FEXzImAIPGEt"
192 |    },
193 |    "outputs": [],
194 |    "source": [
195 |     "%%bash\n",
196 |     "wget -q http://files.deeppavlov.ai/faq/school/faq_school_en.csv -O faq.csv\n",
197 |     "echo \"What's DeepPavlov?, DeepPavlov is an open-source conversational AI library\" >> faq.csv"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "colab": {},
205 |     "colab_type": "code",
206 |     "id": "aN-Yb8Vd4oD6"
207 |    },
208 |    "outputs": [],
209 |    "source": [
210 |     "from deeppavlov import configs, train_model\n",
211 |     "\n",
212 |     "model_config = read_json(configs.faq.tfidf_logreg_en_faq)\n",
213 |     "model_config[\"dataset_reader\"][\"data_path\"] = \"/content/faq.csv\"\n",
214 |     "model_config[\"dataset_reader\"][\"data_url\"] = None\n",
215 |     "faq = train_model(model_config)\n",
216 |     "a = faq([\"tell me about DeepPavlov\"])\n",
217 |     "a"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {
223 |     "colab_type": "text",
224 |     "id": "1s2RLyyYUogN"
225 |    },
226 |    "source": [
227 |     "# About Us\n",
228 |     "\n",
229 |     "We are iPavlov, our story started in 2017 when we decided to build a conversational AI framework that on the one hand will contain all required NLP components to build chatbots and on the other hand will be easy to use. Our work resulted in releasing DeepPavlov library. Our lab at MIPT is honored with Facebook AI Academic Partnership and NVIDIA GPU Research Center status. We successfully combine research and extreme coding in our week-long DeepHack.me hackathons — DeepHack.Game, DeepHack.Q&A and DeepHack.RL. We serve a global AI community by organizing NIPS Conversational Challenge to evaluate state-of-the-art techniques in the field of dialog systems and collect open source dialog datasets."
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {
235 |     "colab_type": "text",
236 |     "id": "yom6JD53QcXk"
237 |    },
238 |    "source": [
239 |     "# Useful links\n",
240 |     "\n",
241 |     "[DeepPavlov repository](https://github.com/deepmipt/DeepPavlov)\n",
242 |     "\n",
243 |     "[DeepPavlov demo page](https://demo.ipavlov.ai)\n",
244 |     "\n",
245 |     "[DeepPavlov documentation](https://docs.deeppavlov.ai)"
246 |    ]
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "colab": {
251 |    "collapsed_sections": [],
252 |    "name": "DP:autoFAQ.ipynb",
253 |    "provenance": [],
254 |    "version": "0.3.2"
255 |   },
256 |   "kernelspec": {
257 |    "display_name": "Python 3",
258 |    "language": "python",
259 |    "name": "python3"
260 |   },
261 |   "language_info": {
262 |    "codemirror_mode": {
263 |     "name": "ipython",
264 |     "version": 3
265 |    },
266 |    "file_extension": ".py",
267 |    "mimetype": "text/x-python",
268 |    "name": "python",
269 |    "nbconvert_exporter": "python",
270 |    "pygments_lexer": "ipython3",
271 |    "version": "3.6.4"
272 |   }
273 |  },
274 |  "nbformat": 4,
275 |  "nbformat_minor": 2
276 | }
277 | 


--------------------------------------------------------------------------------
/dp_torch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# DeepPavlov - an Open-Source Conversational AI Framework \n",
  8 |     "---\n",
  9 |     "\n",
 10 |     "[DeepPavlov](https://deeppavlov.ai/) Library is a conversational open-source library for Natural Language Processing (NLP) and Multiskill AI Assistant development. DeepPalvov is based on TensorFlow, Keras. And now it supports PyTorch. Moreover DeepPavlov supports Transformers from  Hugging face to enable you to use a wide variety of transformer-based models and Datasets from Hugging face with hundreds of datasets to train your model. \n",
 11 |     "\n",
 12 |     "You can read more about us in our [official blog](https://medium.com/deeppavlov). Also, feel free to test our BERT-based models by using our [demo](http://demo.deeppavlov.ai). And don’t forget DeepPavlov has a dedicated [forum](https://forum.deeppavlov.ai/), where any questions concerning the library and the models are welcome.\n"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "![image info](https://static.tildacdn.com/tild3762-3666-4530-b139-666433343863/_DeepPavlov_-5.png)"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Installation\n",
 27 |     "We support `Linux` and `Windows` platforms, `Python 3.6` and `Python 3.7`"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "!pip install deeppavlov"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## QuickStart"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "The [DeepPavlov](https://deeppavlov.ai/) NLP pipelines are defined in the separate configuration files under the *config/faq* folder. List of models is available on\n",
 51 |     "[the doc page](http://docs.deeppavlov.ai/en/master/features/overview.html)\n",
 52 |     "\n",
 53 |     "When you are decided on the model and a configuration file, there are two ways to use it\n",
 54 |     "\n",
 55 |     "* via **Command Line Interface (CLI)**\n",
 56 |     "* via **Python**\n",
 57 |     "* via **Rise API**"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## How to use DeepPavlov Text Classification in CLI\n",
 65 |     "\n",
 66 |     "Let’s demonstrate the DeepPavlov text classification models using the insult detection problem. It involves predicting whether a comment posted during a public discussion is considered insulting to one of the participants. Basically, this is a binary classification problem with only two classes: *Insult* and *Not Insult*. \n",
 67 |     "\n",
 68 |     "\n",
 69 |     "Before using the model you should install all it's requirements by running `install`. You can retrain the model by running it with `train` command. To get predictions from a model interactively through CLI, run `interact`. Dataset will be downloaded if `-d` flag is set."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "!python -m deeppavlov install insults_kaggle_bert_torch"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "!python -m deeppavlov train insults_kaggle_bert_torch"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "!python -m deeppavlov interact insults_kaggle_bert_torch -d"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "The detailed description of the commands can be found in our docs."
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "## DeepPavlov for Text Classification in Python"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "In order to interact with the model first you need to `build_model` the model. The `download=True` parameter indicates that we want to build already pretrained model."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "from deeppavlov import build_model, configs\n",
127 |     "model = build_model(configs.classifiers.insults_kaggle_bert_torch, download=True)\n",
128 |     "model(['hey, how are you?', 'You are so dumb!'])"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "You can evaluate the model by running `evaluate_model`"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "from deeppavlov import evaluate_model\n",
145 |     "performance = evaluate_model(configs.classifiers.insults_kaggle_bert_torch)\n",
146 |     "performance"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "If you want to train the model on your data you need to change `data_path` to folder with *train.csv*, *valid.csv*, *test.csv* and change `MODEL_PATH` where to save trained model."
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 19,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "name": "stdout",
163 |      "output_type": "stream",
164 |      "text": [
165 |       "{'class_name': 'basic_classification_reader',\n",
166 |       " 'data_path': '{DOWNLOADS_PATH}/insults_data',\n",
167 |       " 'x': 'Comment',\n",
168 |       " 'y': 'Class'}\n",
169 |       "{'DOWNLOADS_PATH': '{ROOT_PATH}/downloads',\n",
170 |       " 'MODELS_PATH': '{ROOT_PATH}/models',\n",
171 |       " 'MODEL_PATH': '{MODELS_PATH}/classifiers/insults_kaggle_torch_bert',\n",
172 |       " 'ROOT_PATH': '~/.deeppavlov',\n",
173 |       " 'TRANSFORMER': 'bert-base-uncased'}\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "import json\n",
179 |     "from pprint import pprint\n",
180 |     "from deeppavlov import configs\n",
181 |     "config = json.load(open(configs.classifiers.insults_kaggle_bert_torch))\n",
182 |     "\n",
183 |     "pprint(config['dataset_reader'])\n",
184 |     "pprint(config['metadata']['variables'])"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "Then, train the model"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "from deeppavlov import train_model\n",
201 |     "model = train_model(model_config)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "## DeepPavlov with Transformers support"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "Let's check how the text classification model performance depends on the transformer architecture. Before doing so, let's make sure that we include the transformer name to the default model path."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "!rm -rf ~/.deeppavlov/classifiers/insults_kaggle_torch_bert                            "
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 20,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "config['metadata']['variables']['MODEL_PATH'] = \"{MODELS_PATH}/classifiers/insults_kaggle_torch_bert/{TRANSFORMER}\""
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "Let's test the performance for three transformers `albert-base-v2`, `distilbert-base-uncased`, `bert-base-uncased`"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "import json\n",
250 |     "from pprint import pprint\n",
251 |     "from deeppavlov import train_model\n",
252 |     "results = {}\n",
253 |     "\n",
254 |     "transformers = ['albert-base-v2', 'distilbert-base-uncased', 'bert-base-uncased']\n",
255 |     "for transformer in transformers:\n",
256 |     "    config['metadata']['variables']['MODEL_PATH'] = \"{MODELS_PATH}/classifiers/insults_kaggle_torch_bert/{TRANSFORMER}\"\n",
257 |     "    config['metadata']['variables']['TRANSFORMER'] = transformer\n",
258 |     "    model = train_model(config, download=False)\n",
259 |     "    results[transformer] = evaluate_model(config)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 26,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "{'albert-base-v2': {'train': OrderedDict([('roc_auc', 0.9763),\n",
271 |        "               ('accuracy', 0.9412),\n",
272 |        "               ('f1_macro', 0.9231)]),\n",
273 |        "  'valid': OrderedDict([('roc_auc', 0.9223),\n",
274 |        "               ('accuracy', 0.875),\n",
275 |        "               ('f1_macro', 0.8324)]),\n",
276 |        "  'test': OrderedDict([('roc_auc', 0.8556),\n",
277 |        "               ('accuracy', 0.7597),\n",
278 |        "               ('f1_macro', 0.7508)])},\n",
279 |        " 'distilbert-base-uncased': {'train': OrderedDict([('roc_auc', 0.9847),\n",
280 |        "               ('accuracy', 0.9521),\n",
281 |        "               ('f1_macro', 0.9399)]),\n",
282 |        "  'valid': OrderedDict([('roc_auc', 0.9243),\n",
283 |        "               ('accuracy', 0.8731),\n",
284 |        "               ('f1_macro', 0.8373)]),\n",
285 |        "  'test': OrderedDict([('roc_auc', 0.8641),\n",
286 |        "               ('accuracy', 0.7826),\n",
287 |        "               ('f1_macro', 0.7788)])},\n",
288 |        " 'bert-base-uncased': {'train': OrderedDict([('roc_auc', 0.9813),\n",
289 |        "               ('accuracy', 0.9445),\n",
290 |        "               ('f1_macro', 0.9309)]),\n",
291 |        "  'valid': OrderedDict([('roc_auc', 0.9318),\n",
292 |        "               ('accuracy', 0.867),\n",
293 |        "               ('f1_macro', 0.8357)]),\n",
294 |        "  'test': OrderedDict([('roc_auc', 0.866),\n",
295 |        "               ('accuracy', 0.7902),\n",
296 |        "               ('f1_macro', 0.7887)])}}"
297 |       ]
298 |      },
299 |      "execution_count": 26,
300 |      "metadata": {},
301 |      "output_type": "execute_result"
302 |     }
303 |    ],
304 |    "source": [
305 |     "results"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {},
312 |    "outputs": [],
313 |    "source": [
314 |     "{'albert-base-v2': {'train': OrderedDict([('roc_auc', 0.9763),\n",
315 |     "               ('accuracy', 0.9412),\n",
316 |     "               ('f1_macro', 0.9231)]),\n",
317 |     "  'valid': OrderedDict([('roc_auc', 0.9223),\n",
318 |     "               ('accuracy', 0.875),\n",
319 |     "               ('f1_macro', 0.8324)]),\n",
320 |     "  'test': OrderedDict([('roc_auc', 0.8556),\n",
321 |     "               ('accuracy', 0.7597),\n",
322 |     "               ('f1_macro', 0.7508)])},\n",
323 |     " 'distilbert-base-uncased': {'train': OrderedDict([('roc_auc', 0.9847),\n",
324 |     "               ('accuracy', 0.9521),\n",
325 |     "               ('f1_macro', 0.9399)]),\n",
326 |     "  'valid': OrderedDict([('roc_auc', 0.9243),\n",
327 |     "               ('accuracy', 0.8731),\n",
328 |     "               ('f1_macro', 0.8373)]),\n",
329 |     "  'test': OrderedDict([('roc_auc', 0.8641),\n",
330 |     "               ('accuracy', 0.7826),\n",
331 |     "               ('f1_macro', 0.7788)])},\n",
332 |     " 'bert-base-uncased': {'train': OrderedDict([('roc_auc', 0.9813),\n",
333 |     "               ('accuracy', 0.9445),\n",
334 |     "               ('f1_macro', 0.9309)]),\n",
335 |     "  'valid': OrderedDict([('roc_auc', 0.9318),\n",
336 |     "               ('accuracy', 0.867),\n",
337 |     "               ('f1_macro', 0.8357)]),\n",
338 |     "  'test': OrderedDict([('roc_auc', 0.866),\n",
339 |     "               ('accuracy', 0.7902),\n",
340 |     "               ('f1_macro', 0.7887)])}}"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "Let's aggregate the performance on the test set for different transformer-based models. "
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "| Transformer | ROC-AUC | Accuracy | F1-macro\n",
355 |     "| --- | --- | --- | --- |\n",
356 |     "| bert-base-uncased | **0.866** | **0.7902** | **0.788** |\n",
357 |     "| albert-base-v2 |  0.8556 | 0.7597 | 0.7508 |\n",
358 |     "| distilbert-base-uncased |  0.8641 | 0.7826 | 0.7788  |"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "# Useful Links\n",
366 |     "---\n",
367 |     "\n",
368 |     "[DeepPavlov Repository](https://github.com/deepmipt/DeepPavlov)\n",
369 |     "\n",
370 |     "[DeepPavlov Demo Page](https://demo.deeppavlov.ai)\n",
371 |     "\n",
372 |     "[DeepPavlov Documentation](https://docs.deeppavlov.ai)\n",
373 |     "\n",
374 |     "[Our Forum](https://forum.deeppavlov.ai)\n",
375 |     "\n",
376 |     "[Our Medium](https://medium.com/deeppavlov)"
377 |    ]
378 |   }
379 |  ],
380 |  "metadata": {
381 |   "kernelspec": {
382 |    "display_name": "py36",
383 |    "language": "python",
384 |    "name": "py36"
385 |   },
386 |   "language_info": {
387 |    "codemirror_mode": {
388 |     "name": "ipython",
389 |     "version": 3
390 |    },
391 |    "file_extension": ".py",
392 |    "mimetype": "text/x-python",
393 |    "name": "python",
394 |    "nbconvert_exporter": "python",
395 |    "pygments_lexer": "ipython3",
396 |    "version": "3.6.10"
397 |   }
398 |  },
399 |  "nbformat": 4,
400 |  "nbformat_minor": 4
401 | }
402 | 


--------------------------------------------------------------------------------
/DP_autoFAQ_ru.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "\n",
 12 |     "<img src=\"https://static.tildacdn.com/tild3762-6664-4936-a364-663135373331/_1.png\" align=\"center\"/>\n",
 13 |     " \n",
 14 |     "<h1><center>Frequently asked question answering with DeepPavlov</center></h1>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {
 20 |     "colab_type": "text",
 21 |     "id": "B7fbDvssiBrV",
 22 |     "slideshow": {
 23 |      "slide_type": "notes"
 24 |     }
 25 |    },
 26 |    "source": [
 27 |     "Лаборатория нейронных сетей и глубокого обучения МФТИ работает над библиотекой [DeepPavlov](https://github.com/deepmipt/DeepPavlov) — библиотека для создания диалоговых систем. Она содержит набор претренированных компонент для анализа языка, с помощью которых можно эффективно решать задачи бизнеса.\n",
 28 |     "\n",
 29 |     "Одна из важных задач для бизнеса это организация ответов на часто задаваемые вопросы клиентов. Сделать это возможно через колл-центр, или виджет на сайте, наняв сотрудников специально для этого.\n",
 30 |     "\n",
 31 |     "В этом ноутбуке я хочу показать как использовать модели, которые отвечают на часто задаваемые вопросы клиентов. В рамках **DeepPavlov** эти модели называются autoF.A.Q. В качеcтве датасета, я буду использовать датасет вопросов и ответов лицеистов МФТИ."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {
 37 |     "colab_type": "text",
 38 |     "id": "x_hoGWCfhxLh",
 39 |     "slideshow": {
 40 |      "slide_type": "slide"
 41 |     }
 42 |    },
 43 |    "source": [
 44 |     "## Установка\n",
 45 |     "\n",
 46 |     "Для работы с библиотекой установите **Python 3.6**, активируйте среду разработки. Затем установите **DeepPavlov**."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "colab": {
 54 |      "base_uri": "https://localhost:8080/",
 55 |      "height": 683
 56 |     },
 57 |     "colab_type": "code",
 58 |     "id": "qhvF6Petcd0r",
 59 |     "outputId": "ec2f36c8-adc2-415d-cc1f-24031bc8d9ed",
 60 |     "slideshow": {
 61 |      "slide_type": "fragment"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "!pip install -q deeppavlov"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {
 72 |     "slideshow": {
 73 |      "slide_type": "slide"
 74 |     }
 75 |    },
 76 |    "source": [
 77 |     "## Датасет\n",
 78 |     "\n",
 79 |     "В качестве датасета выступает файл формата **csv**, с заголовками **Question** и **Answer**. Пример вопросов и ответов ниже."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 7,
 85 |    "metadata": {
 86 |     "slideshow": {
 87 |      "slide_type": "slide"
 88 |     }
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "                                             Question                                             Answer\n",
 96 |       "12                        Как войти в личный кабинет?  Подробно про личный кабинет вы можете прочитат...\n",
 97 |       "8   Я ошиблась в имени ребенка. Где можно его изме...  Некоторые данные можно поменять через личный к...\n",
 98 |       "40  Как проходит первый этап вступительных испытан...  Вступительные испытания на первом этапе предст...\n",
 99 |       "42       Какие экзамены надо сдавать в девятый класс?  Вступительные испытания на первом этапе предст...\n",
100 |       "13                      Как попасть в личный кабинет?  Подробно про личный кабинет вы можете прочитат...\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "import pandas as pd\n",
106 |     "df = pd.read_csv(\"http://files.deeppavlov.ai/faq/school/faq_school.csv\")\n",
107 |     "print(df.sample(frac=1)[:5].to_string())"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {
113 |     "colab_type": "text",
114 |     "id": "EVWji-oNOl2I"
115 |    },
116 |    "source": [
117 |     "## Описание моделей\n",
118 |     "\n",
119 |     "На вход модели подается вопрос, далее модель определяет наиболее близкий нашему вопрос из датасета, и возвращает соответствующий ответ. **DeepPavlov** содержит несколько моделей, которые решают проблему поиска ответов на популярные вопросы. Подробную информацию по деталям реализации моделей вы можно найти в нашей [справке](http://docs.deeppavlov.ai/en/master/skills/faq.html). Следующая команда показывает нам список моделей autoF.A.Q."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 1,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "data": {
129 |       "text/plain": [
130 |        "frozenset({'fasttext_avg_autofaq',\n",
131 |        "           'fasttext_tfidf_autofaq',\n",
132 |        "           'tfidf_autofaq',\n",
133 |        "           'tfidf_logreg_autofaq',\n",
134 |        "           'tfidf_logreg_en_faq'})"
135 |       ]
136 |      },
137 |      "metadata": {},
138 |      "output_type": "display_data"
139 |     }
140 |    ],
141 |    "source": [
142 |     "from IPython.display import display\n",
143 |     "from deeppavlov import configs\n",
144 |     "display(configs.faq.keys())"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Работа с моделями через **командную строку**\n",
152 |     "\n",
153 |     "Режим работы через командую строку позволяет установить зависимости модели (с помощью команды **install**), натренировать модель на основе датасета (**train**), и взаимодействовать с моделью (**interact**). Ниже приведены примеры всех этих команд."
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "# установка пакетов и файлов, требуемых для работы модели\n",
163 |     "!python -m deeppavlov install tfidf_autofaq\n",
164 |     "# тренировка модели\n",
165 |     "!python -m deeppavlov train tfidf_autofaq\n",
166 |     "# взаимодействие с моделью\n",
167 |     "!python -m deeppavlov interact tfidf_autofaq"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## Работа с моделями через **Python**\n",
175 |     "\n",
176 |     "Для того чтобы работать с моделями через **Python** необходимо импортировать файл конфигурации и метод **build_model**. Далее построить модель с помощью метода **build_model**. На вход модель принимает массив с текстовыми запросами, в качестве выхода модель возвращает соответствующие ответы с оценкой уверенности."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 3,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/plain": [
187 |        "[['Вы можете написать нам письмо на почту приемной кампании, с адреса, который указали при регистрации или обратиться к нашим сотрудниками через Telegram и предоставить информацию, которая позволит идентифицировать вашего ребенка (ФИО, дата рождения и адрес электронной почты, указанный при регистрации).'],\n",
188 |        " [0.3]]"
189 |       ]
190 |      },
191 |      "execution_count": 3,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "from deeppavlov import configs, build_model\n",
198 |     "faq = build_model(configs.faq.tfidf_autofaq, download = True)\n",
199 |     "faq(['мне нужен код регистрации'])"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "## Скилл **SimilarityMatchingSkill**"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "Кроме того библиотека **DeepPavlov** содержит в себе класс **SimilarityMatchingSkill** - упрощенный интерфейс работы с моделями autoF.A.Q. Чтобы продемонстрировать его работу, создайте объект класса **SimilarityMatchingSkill**, определив следующие параметры.\n",
214 |     "\n",
215 |     "* **data_path** - путь к csv файлу с данными\n",
216 |     "* **x_col_name** - имя колонки с вопросами в csv файле (Question, по умолчанию)\n",
217 |     "* **y_col_name** - имя колонки с ответами в csv файле (Answer, по умолчанию)\n",
218 |     "* **edit_dict** - dict с параметрами конфигурации для перезаписи\n",
219 |     "* **save_load_path** - путь куда сохранить натренированную модель\n",
220 |     "* **train** - тренировать ли модель?"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 5,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "from deeppavlov.contrib.skills.similarity_matching_skill import SimilarityMatchingSkill\n",
230 |     "\n",
231 |     "faq_skill = SimilarityMatchingSkill(data_path = 'http://files.deeppavlov.ai/faq/school/faq_school.csv',\n",
232 |     "                              x_col_name = 'Question', \n",
233 |     "                              y_col_name = 'Answer',\n",
234 |     "                              save_load_path = './model',\n",
235 |     "                              config_type = 'tfidf_autofaq',\n",
236 |     "                              train = False)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "Обратите внимание, что навыки (skills) в **DeepPavlov** в отличие от компонент имеют унифицированный вход, где первый параметр это массив текстовых обращений, второй параметр массив истории общений, третий параметр массив состояний. В навыке нашего класса обязательным является лишь первый параметр - массив текстовых обращений. Убедитесь в то, что навык корректо отвечает на запросы."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 6,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "(['Вступительные испытания на первом этапе представляют собой выполнение письменных работ по профильным предметам.'],\n",
255 |        " [0.2])"
256 |       ]
257 |      },
258 |      "execution_count": 6,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "faq_skill(['что такое вступительные экзамены'], [], [])"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "## Интеграция моделей в Яндекс.Алиса"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "Для того чтобы интегрировать нашу модель в навык Яндекс.Алиса. Создадим объект класса **DefaultAgent**."
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 7,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "from deeppavlov.agents.default_agent.default_agent import DefaultAgent\n",
288 |     "from deeppavlov.agents.processors.highest_confidence_selector import HighestConfidenceSelector\n",
289 |     "\n",
290 |     "agent = DefaultAgent([faq_skill], skills_selector=HighestConfidenceSelector())"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "Далее запустим сервер с указанием пути для запросов **endpoint='faq'** и порта подключения **port=5000**"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "name": "stdout",
307 |      "output_type": "stream",
308 |      "text": [
309 |       " * Serving Flask app \"deeppavlov.utils.alice.alice\" (lazy loading)\n",
310 |       " * Environment: production\n",
311 |       "   WARNING: Do not use the development server in a production environment.\n",
312 |       "   Use a production WSGI server instead.\n",
313 |       " * Debug mode: off\n"
314 |      ]
315 |     },
316 |     {
317 |      "name": "stderr",
318 |      "output_type": "stream",
319 |      "text": [
320 |       " * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "from deeppavlov.utils.alice import start_agent_server\n",
326 |     "\n",
327 |     "start_agent_server(agent, host='0.0.0.0', port=5000, endpoint='/faq')"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "Обратите внимание, что Яндекс.Диалоги в качестве **Webhook URL** требует указывать сервер с внешним IP адресом и доступом по протоколу https. Для быстрого прототипирования вы можете использовать [ngrok](https://ngrok.com/). Ngrok позволит вам создавать туннель для доступа к вашему серверу с **DeepPavlov** в локальной сети, для этого запустите **ngrok http 5000** на вашем сервере с **DeepPavlov**. В ответ на это будет создано два туннеля, по одному на протоколы **http** и **https**. Скопируйте адрес туннеля для **https**, добавьте к линку эндпоинт **/faq**, итоговый линк будет **Webhook URL** для нашего Яндекс.Диалога. Далее заполните поля необходимые для сохранения черновика диалога. Сохраните черновик и перейдите на вкладку Тестирование."
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "markdown",
339 |    "metadata": {
340 |     "colab_type": "text",
341 |     "id": "yom6JD53QcXk"
342 |    },
343 |    "source": [
344 |     "## Ссылки\n",
345 |     "\n",
346 |     "### [DeepPavlov documentation](http://docs.deeppavlov.ai/en/master/skills/faq.html)\n",
347 |     "\n",
348 |     "### [DeepPavlov Blog](https://medium.com/deeppavlov)\n",
349 |     "\n",
350 |     "### [Forum](https://forum.ipavlov.ai)\n",
351 |     "\n",
352 |     "### [GitHub Repository](https://github.com/deepmipt/DeepPavlov)\n",
353 |     "\n",
354 |     "### [DeepPavlov demo page](https://demo.ipavlov.ai)"
355 |    ]
356 |   }
357 |  ],
358 |  "metadata": {
359 |   "colab": {
360 |    "collapsed_sections": [],
361 |    "name": "DP:autoFAQ.ipynb",
362 |    "provenance": [],
363 |    "version": "0.3.2"
364 |   },
365 |   "kernelspec": {
366 |    "display_name": "Python 3",
367 |    "language": "python",
368 |    "name": "python3"
369 |   },
370 |   "language_info": {
371 |    "codemirror_mode": {
372 |     "name": "ipython",
373 |     "version": 3
374 |    },
375 |    "file_extension": ".py",
376 |    "mimetype": "text/x-python",
377 |    "name": "python",
378 |    "nbconvert_exporter": "python",
379 |    "pygments_lexer": "ipython3",
380 |    "version": "3.6.8"
381 |   }
382 |  },
383 |  "nbformat": 4,
384 |  "nbformat_minor": 2
385 | }
386 | 


--------------------------------------------------------------------------------
/DP_tf.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# DeepPavlov - an Open-Source Conversational AI Framework \n",
  8 |     "---\n",
  9 |     "\n",
 10 |     "<img align=\"center\" src=\"https://deeppavlov.ai/docs/_static/ipavlov_logo.png\"/>\n",
 11 |     "\n",
 12 |     "\n",
 13 |     "The open-source conversational AI framework [DeepPavlov](https://deeppavlov.ai/) offers a free and easy-to-use solution to build dialogue systems. DeepPavlov comes with a bunch of predefined components powered by [TensorFlow](https://www.tensorflow.org) and [Keras](https://keras.io) for solving NLP-related problems. The framework allows you to fine-tune hyperparameters and test models. You can check out the models in our [demo](https://demo.deeppavlov.ai)."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## Installation\n",
 21 |     "We support `Linux` and `Windows` platforms, `Python 3.6` and `Python 3.7`"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "!pip install deeppavlov"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## QuickStart"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "The [DeepPavlov](https://deeppavlov.ai/) NLP pipelines are defined in the separate configuration files under the *config/faq* folder. List of models is available on\n",
 45 |     "[the doc page](http://docs.deeppavlov.ai/en/master/features/overview.html)\n",
 46 |     "\n",
 47 |     "When you are decided on the model and a configuration file, there are two ways to use it\n",
 48 |     "\n",
 49 |     "* via **Command Line Interface (CLI)**\n",
 50 |     "* via **Python**"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Command Line Interface (CLI)\n",
 58 |     "\n",
 59 |     "To get predictions from a model interactively through CLI, run\n",
 60 |     "\n",
 61 |     "```bash \n",
 62 |     "    python -m deeppavlov interact <config_path> [-d]\n",
 63 |     "```\n",
 64 |     "\n",
 65 |     "* `-d` downloads required data -- pretrained model files and embeddings\n",
 66 |     "  (optional).\n",
 67 |     "\n",
 68 |     "You can train it in the same simple way:\n",
 69 |     "\n",
 70 |     "```bash\n",
 71 |     "    python -m deeppavlov train <config_path> [-d]\n",
 72 |     "```\n",
 73 |     "\n",
 74 |     "To train on your own data you need to modify dataset reader path in the\n",
 75 |     "[train config doc](http://docs.deeppavlov.ai/en/master/intro/config_description.html#train-config).\n",
 76 |     "The data format is specified in the corresponding model doc page. \n",
 77 |     "\n",
 78 |     "There are even more actions you can perform with configs:\n",
 79 |     "\n",
 80 |     "```bash \n",
 81 |     "    python -m deeppavlov <action> <config_path> [-d]\n",
 82 |     "```\n",
 83 |     "\n",
 84 |     "* `<action>` can be\n",
 85 |     "    * `download` to download model's data (same as `-d`),\n",
 86 |     "    * `train` to train the model on the data specified in the config file,\n",
 87 |     "    * `evaluate` to calculate metrics on the same dataset,\n",
 88 |     "    * `interact` to interact via CLI,\n",
 89 |     "    * `riseapi` to run a REST API server (see\n",
 90 |     "    [doc](http://docs.deeppavlov.ai/en/master/integrations/rest_api.html)),\n",
 91 |     "    * `interactbot` to run as a Telegram bot (see\n",
 92 |     "    [doc](http://docs.deeppavlov.ai/en/master/integrations/telegram.html)),\n",
 93 |     "    * `interactmsbot` to run a Miscrosoft Bot Framework server (see\n",
 94 |     "    [doc](http://docs.deeppavlov.ai/en/master/integrations/ms_bot.html)),\n",
 95 |     "    * `predict` to get prediction for samples from *stdin* or from\n",
 96 |     "      *<file_path>* if `-f <file_path>` is specified.\n",
 97 |     "* `<config_path>` specifies path (or name) of model's config file\n",
 98 |     "* `-d` downloads required data\n"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "### Python\n",
106 |     "\n",
107 |     "To get predictions from a model interactively through Python, run\n",
108 |     "\n",
109 |     "```python \n",
110 |     "    from deeppavlov import build_model\n",
111 |     "\n",
112 |     "    model = build_model(<config_path>, download=True)\n",
113 |     "\n",
114 |     "    # get predictions for 'input_text1', 'input_text2'\n",
115 |     "    model(['input_text1', 'input_text2'])\n",
116 |     "```\n",
117 |     "\n",
118 |     "* where `download=True` downloads required data from web -- pretrained model\n",
119 |     "  files and embeddings (optional),\n",
120 |     "* `<config_path>` is path to the chosen model's config file (e.g.\n",
121 |     "  `\"deeppavlov/configs/ner/ner_ontonotes_bert_mult.json\"`) or\n",
122 |     "  `deeppavlov.configs` attribute (e.g.\n",
123 |     "  `deeppavlov.configs.ner.ner_ontonotes_bert_mult` without quotation marks).\n",
124 |     "\n",
125 |     "You can train it in the same simple way:\n",
126 |     "\n",
127 |     "```python \n",
128 |     "    from deeppavlov import train_model \n",
129 |     "\n",
130 |     "    model = train_model(<config_path>, download=True)\n",
131 |     "```\n",
132 |     "\n",
133 |     "* `download=True` downloads pretrained model, therefore the pretrained\n",
134 |     "model will be, first, loaded and then train (optional).\n",
135 |     "\n",
136 |     "Dataset will be downloaded regardless of whether there was ``-d`` flag or\n",
137 |     "not.\n",
138 |     "\n",
139 |     "To train on your own data you need to modify dataset reader path in the\n",
140 |     "[train config doc](http://docs.deeppavlov.ai/en/master/intro/config_description.html#train-config).\n",
141 |     "The data format is specified in the corresponding model doc page. \n",
142 |     "\n",
143 |     "You can also calculate metrics on the dataset specified in your config file:\n",
144 |     "\n",
145 |     "```python\n",
146 |     "    from deeppavlov import evaluate_model \n",
147 |     "\n",
148 |     "    model = evaluate_model(<config_path>, download=True)\n",
149 |     "```"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "## Configuration Files"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "The config file consists of four main sections: **dataset_reader**, **dataset_iterator**, **chainer**, and **train**. The **dataset_reader** defines the dataset’s location along with the dataset format. After loading, the data is split into the train, validation, and test sets according to the **dataset_iterator** settings. The **chainer** section of the configuration files consists of three subsections. The **in** and **out** sections define an input and an output to the chainer, whereas the **pipe** section defines a pipeline of the required components to interact with the models."
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "%load https://raw.githubusercontent.com/deepmipt/DeepPavlov/master/deeppavlov/configs/faq/tfidf_logreg_en_faq.json"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "# BERT-based Models of DeepPavlov\n",
180 |     "\n",
181 |     "---\n",
182 |     "\n",
183 |     "The release of [BERT](https://arxiv.org/abs/1810.04805) (Bidirectional Encoder Representations from Transformers) made the year 2018 an inflection point for the Natural Language Processing community. BERT is a transformer-based technique for pretraining language representations, which produces state-of-the-art results across a wide array of NLP tasks. BERT has been uploaded to TensorFlow Hub and offers seamless integration with DeepPavlov. We integrated BERT into three downstream tasks: **text classification**, **named entity recognition** (and sequence tagging in general), and **question answering**. As a result, we achieved substantial improvements in all these tasks."
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "## BERT for Text Classification\n",
191 |     "\n",
192 |     "We use the DeepPavlov BERT-based text classification model to the sentiment analysis problem. It involves identifying a writer’s attitude toward a particular topic. The attitude could be *positive*, *negative*, and *neutral*."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "!python -m deeppavlov install insults_kaggle_bert"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "!python -m deeppavlov interact insults_kaggle_bert -d"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "from deeppavlov import configs, build_model\n",
220 |     "model = build_model(configs.classifiers.insults_kaggle_bert, download=True)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "model(['hey, how are you?', 'You are so dumb!'])"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "If you want to train model on your data you need to create configuration file and set up **data_path** to folder with *train.csv*, *valid.csv*, *test.csv* and change **MODEL_PATH** where to save trained model."
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "import json\n",
246 |     "from pprint import pprint\n",
247 |     "model_config = json.load(open(configs.classifiers.insults_kaggle_bert))\n",
248 |     "\n",
249 |     "pprint(model_config['dataset_reader'])\n",
250 |     "pprint(model_config['metadata']['variables'])"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "Then, train the model\n",
258 |     "\n",
259 |     "```bash\n",
260 |     "    python -m deeppavlov train config_name\n",
261 |     "```\n",
262 |     "or in Python\n",
263 |     "```python\n",
264 |     "    from deeppavlov import train_model\n",
265 |     "    model = train_model(model_config)\n",
266 |     "```"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "## BERT for Named Entity Recognition\n",
274 |     "\n",
275 |     "Given a sequence of tokens (words, and possibly punctuation marks), provide a tag from a predefined tag set for each token in the sequence. For example, we want to extract persons' and organizations' names from the text. Then for the input text:\n",
276 |     "\n",
277 |     "    Yan Goodfellow works for Google Brain\n",
278 |     "\n",
279 |     "a NER model needs to provide the following sequence of tags:\n",
280 |     "\n",
281 |     "    B-PER I-PER    O     O   B-ORG  I-ORG\n",
282 |     "\n",
283 |     "Where *B-* and *I-* prefixes stand for the beginning and inside of the entity, while *O* stands for out of tag or no tag. Markup with the prefix scheme is called *BIO markup*. This markup is introduced for distinguishing of consequent entities with similar types."
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "!python -m deeppavlov install ner_ontonotes_bert_mult"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "!python -m deeppavlov interact ner_ontonotes_bert_mult -d"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": [
310 |     "from deeppavlov import configs, build_model\n",
311 |     "ner_model = build_model(configs.ner.ner_ontonotes_bert_mult, download=True)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "ner_model(['Yan Goodfellow works for Google Brain'])"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "### Multilingual NER"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 14,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "from deeppavlov import configs, build_model\n",
337 |     "ner_model_ml = build_model(configs.ner.ner_ontonotes_bert_mult, download=True)"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": null,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "ner_model_ml([\n",
347 |     "\"Meteorologist Lachlan Stone said the snowfall in Queensland was an unusual occurrence \\\n",
348 |     "  in a state with a sub-tropical to tropical climate.\",\n",
349 |     "\"Церемония награждения пройдет 27 октября в развлекательном комплексе Hollywood and \\\n",
350 |     "  Highland Center в Лос-Анджелесе (штат Калифорния, США).\", \n",
351 |     "\"Das Orchester der Philharmonie Poznań widmet sich jetzt bereits zum zweiten \\\n",
352 |     "  Mal der Musik dieses aus Deutschland vertriebenen Komponisten. Waghalter \\\n",
353 |     "  stammte aus einer jüdischen Warschauer Familie.\"])"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {},
359 |    "source": [
360 |     "## BERT for Question Answering\n",
361 |     "\n",
362 |     "Context question answering is the task of finding an answer to a question over a given context (e.g, a paragraph from Wikipedia), where the answer to each question is a segment of the context."
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": null,
368 |    "metadata": {},
369 |    "outputs": [],
370 |    "source": [
371 |     "!python -m deeppavlov install squad_bert"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "!python -m deeppavlov interact squad_bert -d"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "from deeppavlov import build_model, configs\n",
390 |     "model_qa = build_model(configs.squad.squad_bert, download=True)"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "markdown",
395 |    "metadata": {},
396 |    "source": [
397 |     "### Multilingual QA"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {},
404 |    "outputs": [],
405 |    "source": [
406 |     "from deeppavlov import build_model, configs\n",
407 |     "model_qa_ml = build_model(configs.squad.squad_bert_multilingual_freezed_emb, download=True)  "
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": null,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "context_en = \"In meteorology, precipitation is any product of the condensation of atmospheric \\\n",
417 |     "  water vapor that falls under gravity. The main forms of precipitation include drizzle, rain, \\\n",
418 |     "  sleet, snow, graupel, and hail. Precipitation forms as smaller droplets coalesce via collision \\\n",
419 |     "  with other raindrops or ice crystals within a cloud. Short, intense periods of rain in scattered locations \\\n",
420 |     "  are called showers.\"\n",
421 |     "\n",
422 |     "context_fr = \"Les précipitations désignent tous les météores qui tombent dans une atmosphère \\\n",
423 |     "  et il peut s'agir de solides ou de liquides selon la composition et la température de cette dernière. \\\n",
424 |     "  Ce terme météorologique est le plus souvent au pluriel et désigne sur la Terre les hydrométéores \\\n",
425 |     "  (cristaux de glace ou gouttelettes d'eau) qui, ayant été soumis à des processus de condensation \\\n",
426 |     "  et d'agrégation à l'intérieur des nuages, sont devenus trop lourds pour demeurer en suspension \\\n",
427 |     "  dans l'atmosphère et tombent au sol ou s'évaporent en virga avant de l'atteindre. Par extension, \\\n",
428 |     "  le terme peut également être utilisé pour des phénomènes similaires sur d'autres planètes ou lunes ayant une atmosphère.\"\n",
429 |     "\n",
430 |     "model_qa_ml([context_en, context_fr, context_fr], \n",
431 |     "      [\"Where do water droplets collide with ice crystals to form precipitation?\", \n",
432 |     "       \"Sous quelle forme peut être précipitation?\", \n",
433 |     "       \"Where the term precipitation can be used?\"])"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "markdown",
438 |    "metadata": {},
439 |    "source": [
440 |     "# Useful Links\n",
441 |     "---\n",
442 |     "\n",
443 |     "[DeepPavlov Repository](https://github.com/deepmipt/DeepPavlov)\n",
444 |     "\n",
445 |     "[DeepPavlov Demo Page](https://demo.deeppavlov.ai)\n",
446 |     "\n",
447 |     "[DeepPavlov Documentation](https://docs.deeppavlov.ai)\n",
448 |     "\n",
449 |     "[Our Forum](https://forum.deeppavlov.ai)\n",
450 |     "\n",
451 |     "[Our Medium](https://medium.com/deeppavlov)"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": null,
457 |    "metadata": {},
458 |    "outputs": [],
459 |    "source": []
460 |   }
461 |  ],
462 |  "metadata": {
463 |   "kernelspec": {
464 |    "display_name": "Python 3",
465 |    "language": "python",
466 |    "name": "python3"
467 |   },
468 |   "language_info": {
469 |    "codemirror_mode": {
470 |     "name": "ipython",
471 |     "version": 3
472 |    },
473 |    "file_extension": ".py",
474 |    "mimetype": "text/x-python",
475 |    "name": "python",
476 |    "nbconvert_exporter": "python",
477 |    "pygments_lexer": "ipython3",
478 |    "version": "3.6.8"
479 |   }
480 |  },
481 |  "nbformat": 4,
482 |  "nbformat_minor": 2
483 | }
484 | 


--------------------------------------------------------------------------------
/examples/super_convergence_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/super_convergence_tutorial.ipynb)."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "!pip3 install deeppavlov"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "# Super Convergence in DeepPavlov"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "In [the paper by Leslie N. Smith, Nicholay Topin](https://arxiv.org/abs/1708.07120) authors introduced a phenomenon called \"super-convergence\", where \n",
 31 |     "  * <font color='green'>neural networks can be trained</font> an order of magnitude <font color='green'>faster</font> than with standard training methods,\n",
 32 |     "  * there is <font color='green'>a greater boost in performance</font> relative to standard training <font color='green'>when the amount of labeled training data is limited</font>."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "### Tutorial Plan:\n",
 40 |     "\n",
 41 |     "0. [What is Super Convergence?](#0.-What-is-Super-Convergence?)\n",
 42 |     "1. [DeepPavlov learning rate schedules](#1.-Learning-rate-schedules)\n",
 43 |     "     * [LRScheduledTFModel](#LRScheduledTFModel) [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/core/models/lr_scheduled_tf_model.py)\n",
 44 |     "     * [DecayType.NO](#DecayType.NO)\n",
 45 |     "     * [DecayType.LINEAR](#DecayType/LINEAR)\n",
 46 |     "     * [DecayType.COSINE](#DecayType/COSINE)\n",
 47 |     "     * [DecayType.EXPONENTIAL](#DecayType.EXPONENTIAL)\n",
 48 |     "     * [DecayType.POLYNOMIAL](#DecayType.POLYNOMIAL)\n",
 49 |     "     * [DecayType.ONECYCLE](#DecayType.ONECYCLE)\n",
 50 |     "     * [DecayType.TRAPEZOID](#DecayType.TRAPEZOID)\n",
 51 |     "     \n",
 52 |     "\n",
 53 |     "2. [DeepPavlov learning rate search](#2.-Optimal-learning-rate-search)\n",
 54 |     "3. [DeepPavlov Super Convergence](#3.-Super-Convergence)\n",
 55 |     "\n",
 56 |     "### Useful materials\n",
 57 |     "   * Original Super Convergence Paper [\"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates\" by Leslie N. Smith, Nicholay Topin](https://arxiv.org/abs/1708.07120)\n",
 58 |     "   * Post by Sylvian Gugger on [\"How do you find an optimal learning rate\"](https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html)\n",
 59 |     "   * [1cycle policy overview](https://sgugger.github.io/the-1cycle-policy.html#the-1cycle-policy)\n",
 60 |     "   * Post by fast.ai with results on CIFAR10, [\"Training Imagenet in 3 hours for 25dollars; and CIFAR10 for 0.26dollars\"](https://www.fast.ai/2018/04/30/dawnbench-fastai/)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### 0. What is Super Convergence?"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "The simplest explanation of what it is:\n",
 75 |     "  - method that helps to train complex neural models faster.\n",
 76 |     "\n",
 77 |     "As an example, see how it allows to train a resnet-56 on cifar10 to the same or a better precision than the authors in their original paper but with far less iterations.\n",
 78 |     "\n",
 79 |     "By training with high learning rates you can reach a model that gets 93% accuracy in 70 epochs which is less than 7k iterations (as opposed to the 64k iterations which made roughly 360 epochs in the original paper)."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "![cs_loss_comparison.png](img/sc_loss_comparison.png)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "One of the key elements of super-convergence is training with one learning rate cycle and a large maximum learning rate. A primary insight that allows super-convergence training is that large learning rates regularize the training, hence requiring a reduction of all other forms of regularization in order to preserve an optimal regularization balance.\n",
 94 |     "\n",
 95 |     "Experiments demonstrate super-convergence for Cifar-10/100, MNIST and Imagenet datasets, and resnet, wide-resnet, densenet, and inception architectures."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "### 1. Learning rate schedules"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "#### LRScheduledTFModel"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "`class LRScheduledTFModel` [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/core/models/lr_scheduled_tf_model.py):\n",
117 |     "  * initializes optimizer\n",
118 |     "  * updates learning rate and momentum according to a schedule configured in config\n",
119 |     "  * can search for an optimal learning rate\n",
120 |     "  \n",
121 |     "That means that your model doesn't need to handle learning rate and momentum placeholders and initialize optimizer. Just inherit your class from `LRScheduledMOdel`:\n",
122 |     "\n",
123 |     "```python\n",
124 |     "from deeppavlov.core.models.lr_scheduled_tf_model import LRScheduledTFModel\n",
125 |     "\n",
126 |     "class MyModel(LRScheduledTFModel):\n",
127 |     "```\n",
128 |     "\n",
129 |     "Examples of wrapped in `LRScheduledTFModel` models are:\n",
130 |     "   * Goal-Oriented Bot [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/models/go_bot/network.py) [[configs]](https://github.com/deepmipt/DeepPavlov/tree/master/deeppavlov/configs/go_bot)\n",
131 |     "   * Named Entity recognizer [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/models/ner/network.py) [[configs]](https://github.com/deepmipt/DeepPavlov/tree/master/deeppavlov/configs/ner)\n",
132 |     "   * SQUAD model [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/models/squad/squad.py) [[configs]](https://github.com/deepmipt/DeepPavlov/tree/master/deeppavlov/configs/squad)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "#### LRScheduledKerasModel "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "`class LRScheduledKerasModel` [[source]](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/core/models/keras_model.py):\n",
147 |     "  * updates learning rate and momentum according to a schedule configured in config\n",
148 |     "  * can search for an optimal learning rate\n",
149 |     "  \n",
150 |     "That means that your model doesn't need to handle learning rate and momentum placeholders, just need to initialize optimizer and compile model. Just inherit your class from `LRScheduledKerasModel`:\n",
151 |     "\n",
152 |     "```python\n",
153 |     "from deeppavlov.core.models.keras_model import LRScheduledKerasModel\n",
154 |     "\n",
155 |     "class MyModel(LRScheduledKerasModel):\n",
156 |     "```\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "#### Optimizer"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "You can set optimizer by:\n",
171 |     "\n",
172 |     "```json\n",
173 |     "{\n",
174 |     "    \"class_name\": \"my_model\",\n",
175 |     "    ...\n",
176 |     "    \"optimizer\": \"tf.train:AdadeltaOptimizer\"\n",
177 |     "}\n",
178 |     "```\n",
179 |     "\n",
180 |     "If no `optimizer` is mentioned then `tf.train:AdamOptimizer` will be used."
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "#### DecayType.NO"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "```json\n",
195 |     "{\n",
196 |     "    \"class_name\": \"my_model\",\n",
197 |     "    ...\n",
198 |     "    \"learning_rate\": 0.1,\n",
199 |     "    \"learning_rate_decay\": \"no\"\n",
200 |     "}\n",
201 |     "```\n",
202 |     "\n",
203 |     "or just\n",
204 |     "\n",
205 |     "```json\n",
206 |     "{\n",
207 |     "    \"class_name\": \"my_model\",\n",
208 |     "    ...\n",
209 |     "    \"learning_rate\": 0.1\n",
210 |     "}\n",
211 |     "```\n",
212 |     "\n",
213 |     "corresponds to the following learning rate update schedule:"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "![cs_ner_lr_no.png](img/sc_ner_lr_no.png)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "#### DecayType.LINEAR"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "```json\n",
235 |     "{\n",
236 |     "    \"class_name\": \"my_model\",\n",
237 |     "    ...\n",
238 |     "    \"learning_rate\": [0.01, 0.1]\n",
239 |     "    \"learning_rate_decay\": \"linear\",\n",
240 |     "    \"learning_rate_decay_batches\": 800\n",
241 |     "}\n",
242 |     "```\n",
243 |     "corresponds to :"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "![cs_ner_lr_linear.png](img/sc_ner_lr_linear.png)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "Or reverse `learning_rate` parameter to go from larger learning rate to smaller: \n",
258 |     "\n",
259 |     "```json\n",
260 |     "{\n",
261 |     "    \"class_name\": \"my_model\",\n",
262 |     "    ...\n",
263 |     "    \"learning_rate\": [0.1, 0.01]\n",
264 |     "    \"learning_rate_decay\": \"linear\",\n",
265 |     "    \"learning_rate_decay_batches\": 800\n",
266 |     "}\n",
267 |     "```"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "![cs_ner_lr_linear2.png](img/sc_ner_lr_linear2.png)"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "#### DecayType.COSINE"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "```json\n",
289 |     "{\n",
290 |     "    \"class_name\": \"my_model\",\n",
291 |     "    ...\n",
292 |     "    \"learning_rate\": [0.1, 0.01]\n",
293 |     "    \"learning_rate_decay\": \"cosine\",\n",
294 |     "    \"learning_rate_decay_batches\": 800\n",
295 |     "}\n",
296 |     "```"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "![cs_ner_lr_cosine.png](img/sc_ner_lr_cosine.png)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "markdown",
308 |    "metadata": {},
309 |    "source": [
310 |     "#### DecayType.EXPONENTIAL"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "```json\n",
318 |     "{\n",
319 |     "    \"class_name\": \"my_model\",\n",
320 |     "    ...\n",
321 |     "    \"learning_rate\": [0.1, 0.01]\n",
322 |     "    \"learning_rate_decay\": \"exponential\",\n",
323 |     "    \"learning_rate_decay_batches\": 800\n",
324 |     "}\n",
325 |     "```\n",
326 |     "corresponds to :"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "![cs_ner_lr_exponential.png](img/sc_ner_lr_exponential.png)"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "#### DecayType.POLYNOMIAL"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "```json\n",
348 |     "{\n",
349 |     "    \"class_name\": \"my_model\",\n",
350 |     "    ...\n",
351 |     "    \"learning_rate\": [0.1, 0.01]\n",
352 |     "    \"learning_rate_decay\": [\"polynomial\", 1.0],\n",
353 |     "    \"learning_rate_decay_batches\": 800\n",
354 |     "}\n",
355 |     "```\n",
356 |     "corresponds to:"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "markdown",
361 |    "metadata": {},
362 |    "source": [
363 |     "![cs_ner_lr_polynomial.png](img/sc_ner_lr_polynomial.png)"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "metadata": {},
369 |    "source": [
370 |     "Polynomial decay has a parameter of \"decay power\" (which was equal to `1.0` preciously).\n",
371 |     "\n",
372 |     "Let's try decay power value of `0.1`:\n",
373 |     "\n",
374 |     "```json\n",
375 |     "{\n",
376 |     "    \"class_name\": \"my_model\",\n",
377 |     "    ...\n",
378 |     "    \"learning_rate\": [0.1, 0.01]\n",
379 |     "    \"learning_rate_decay\": [\"polynomial\", 0.1],\n",
380 |     "    \"learning_rate_decay_batches\": 800\n",
381 |     "}\n",
382 |     "```"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "markdown",
387 |    "metadata": {},
388 |    "source": [
389 |     "![cs_ner_lr_polynomial1.png](img/sc_ner_lr_polynomial1.png)"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "And decay power value of `10`:\n",
397 |     "\n",
398 |     "```json\n",
399 |     "{\n",
400 |     "    \"class_name\": \"my_model\",\n",
401 |     "    ...\n",
402 |     "    \"learning_rate\": [0.1, 0.01]\n",
403 |     "    \"learning_rate_decay\": [\"polynomial\", 10],\n",
404 |     "    \"learning_rate_decay_batches\": 800\n",
405 |     "}\n",
406 |     "```"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {},
412 |    "source": [
413 |     "![cs_ner_lr_polynomial2.png](img/sc_ner_lr_polynomial2.png)"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "#### DecayType.ONECYCLE"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "```json\n",
428 |     "{\n",
429 |     "    \"class_name\": \"my_model\",\n",
430 |     "    ...\n",
431 |     "    \"learning_rate\": [0.01, 0.1]\n",
432 |     "    \"learning_rate_decay\": \"onecycle\",\n",
433 |     "    \"learning_rate_decay_batches\": 800\n",
434 |     "}\n",
435 |     "```\n",
436 |     "corresponds to :"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "![cs_ner_lr_onecycle.png](img/sc_ner_lr_onecycle.png)"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "markdown",
448 |    "metadata": {},
449 |    "source": [
450 |     "#### DecayType.TRAPEZOID"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "markdown",
455 |    "metadata": {},
456 |    "source": [
457 |     "```json\n",
458 |     "{\n",
459 |     "    \"class_name\": \"my_model\",\n",
460 |     "    ...\n",
461 |     "    \"learning_rate\": [0.01, 0.1]\n",
462 |     "    \"learning_rate_decay\": \"trapezoid\",\n",
463 |     "    \"learning_rate_decay_batches\": 800\n",
464 |     "}\n",
465 |     "```\n",
466 |     "corresponds to :"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "markdown",
471 |    "metadata": {},
472 |    "source": [
473 |     "![cs_ner_lr_trapezoid.png](img/sc_ner_lr_trapezoid.png)"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "markdown",
478 |    "metadata": {},
479 |    "source": [
480 |     "### 2. Optimal learning rate search"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "markdown",
485 |    "metadata": {},
486 |    "source": [
487 |     "You can also tune learning rate on data before training.\n",
488 |     "\n",
489 |     "Add `fit_on` and `fit_batch_size` in your component along with desired `learning_rate_decay` (+`learning_rate_decay_batches`), and `learning_rate` parameter will be set automatically.\n",
490 |     "\n",
491 |     "For example,\n",
492 |     "\n",
493 |     "```json\n",
494 |     "{\n",
495 |     "    \"class_name\": \"my_model\",\n",
496 |     "    ...\n",
497 |     "    \"learning_rate_decay\": \"trapezoid\",\n",
498 |     "    \"learning_rate_decay_batches\": 800,\n",
499 |     "    \n",
500 |     "    \"fit_batch_size\": 16,\n",
501 |     "    \"fit_on\": [\"x0\", \"x1\", \"x2\", \"y\"]\n",
502 |     "}\n",
503 |     "```\n",
504 |     "\n",
505 |     "will find an optimal `learning_rate` for your trapezoid update schedule.\n",
506 |     "\n",
507 |     "`DecayType.NO`, `DecayType.LINEAR`, `DecayType.POLYNOMIAL`, `DecayType.EXPONENTIAL`, `DecayType.ONECYCLE`, `DecayType.TRAPEZOID` are all supported in learning rate search mode."
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "### 3. Super Convergence"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "markdown",
519 |    "metadata": {},
520 |    "source": [
521 |     "Super Convergence is then equivalent to the following config parameters:\n",
522 |     "\n",
523 |     "```json\n",
524 |     "{\n",
525 |     "    \"class_name\": \"my_model\",\n",
526 |     "    ...\n",
527 |     "    \"learning_rate_decay\": \"onecycle\",\n",
528 |     "    \"learning_rate_decay_batches\": 1000, #hyperparameter\n",
529 |     "    \n",
530 |     "    \"fit_batch_size\": 16, #hyperparameter\n",
531 |     "    \"fit_on\": [\"x0\", \"x1\", \"x2\", \"y\"],\n",
532 |     "    \n",
533 |     "    \"momentum\": [0.95, 0.85],\n",
534 |     "    \"momentum_decay\": \"onecycle\",\n",
535 |     "    \"momentum_decay_batches\": 1000 #hyperparameter\n",
536 |     "}\n",
537 |     "```\n",
538 |     "for any optimizer. Which will result in similar to the following learning rate and momentum update schedules:"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "markdown",
543 |    "metadata": {},
544 |    "source": [
545 |     "![cs_ner_lr_sc.png](img/sc_ner_lr_sc.png)"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "markdown",
550 |    "metadata": {},
551 |    "source": [
552 |     "For `tf.train:AdamOptimizer` is it recommended to use trapezoid update schedule:\n",
553 |     "\n",
554 |     "```json\n",
555 |     "{\n",
556 |     "    \"class_name\": \"my_model\",\n",
557 |     "    ...\n",
558 |     "    \"optimizer\": \"tf.train:AdamOptimizer\",\n",
559 |     "    \"learning_rate_decay\": \"trapezoid\",\n",
560 |     "    \"learning_rate_decay_batches\": 1000, #hyperparameter\n",
561 |     "    \n",
562 |     "    \"fit_batch_size\": 16, #hyperparameter\n",
563 |     "    \"fit_on\": [\"x0\", \"x1\", \"x2\", \"y\"],\n",
564 |     "    \n",
565 |     "    \"momentum\": [0.95, 0.85],\n",
566 |     "    \"momentum_decay\": \"trapezoid\",\n",
567 |     "    \"momentum_decay_batches\": 1000 #hyperparameter\n",
568 |     "}\n",
569 |     "```"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "markdown",
574 |    "metadata": {},
575 |    "source": [
576 |     "![cs_ner_lr_sc1.png](img/sc_ner_lr_sc1.png)"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": null,
582 |    "metadata": {},
583 |    "outputs": [],
584 |    "source": []
585 |   }
586 |  ],
587 |  "metadata": {
588 |   "kernelspec": {
589 |    "display_name": "Python 3",
590 |    "name": "python3"
591 |   },
592 |   "accelerator": "GPU",
593 |   "language_info": {
594 |    "codemirror_mode": {
595 |     "name": "ipython",
596 |     "version": 3
597 |    },
598 |    "file_extension": ".py",
599 |    "mimetype": "text/x-python",
600 |    "name": "python",
601 |    "nbconvert_exporter": "python",
602 |    "pygments_lexer": "ipython3",
603 |    "version": "3.6.6"
604 |   },
605 |   "latex_envs": {
606 |    "LaTeX_envs_menu_present": true,
607 |    "autoclose": false,
608 |    "autocomplete": true,
609 |    "bibliofile": "biblio.bib",
610 |    "cite_by": "apalike",
611 |    "current_citInitial": 1,
612 |    "eqLabelWithNumbers": true,
613 |    "eqNumInitial": 1,
614 |    "hotkeys": {
615 |     "equation": "Ctrl-E",
616 |     "itemize": "Ctrl-I"
617 |    },
618 |    "labels_anchors": false,
619 |    "latex_user_defs": false,
620 |    "report_style_numbering": false,
621 |    "user_envs_cfg": false
622 |   }
623 |  },
624 |  "nbformat": 4,
625 |  "nbformat_minor": 2
626 | }
627 | 


--------------------------------------------------------------------------------
/examples/gobot_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "colab_type": "text",
  7 |     "id": "xYxEKPhTgRif"
  8 |    },
  9 |    "source": [
 10 |     "### You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_tutorial.ipynb)."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {
 16 |     "colab_type": "text",
 17 |     "id": "-NIf_5W0gRkj"
 18 |    },
 19 |    "source": [
 20 |     "# Simple bot in DeepPavlov"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {
 26 |     "colab_type": "text",
 27 |     "id": "GjTJTYIqgRk2"
 28 |    },
 29 |    "source": [
 30 |     "This tutorial describes how to build a simple trainable dialogue system with DeepPavlov framework. It shows one of the easiest ways to create a chatbot. All you need is just a dozen of dialogs from your domain with bot responses annotated for dialogue acts. The tutorial covers the following steps:\n",
 31 |     "\n",
 32 |     "0. [Data preparation](#0.-Data-Preparation)\n",
 33 |     "1. [Train bot](#1.-Train-bot)\n",
 34 |     "2. [Interact with bot](#2.-Interact-with-bot)\n",
 35 |     "\n",
 36 |     "\n",
 37 |     "An example of the final model served as a telegram bot is:\n",
 38 |     "\n",
 39 |     "![gobot_simple_example.png](img/gobot_simple_example.png)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "colab": {
 47 |      "base_uri": "https://localhost:8080/",
 48 |      "height": 1000
 49 |     },
 50 |     "colab_type": "code",
 51 |     "executionInfo": {
 52 |      "elapsed": 44268,
 53 |      "status": "ok",
 54 |      "timestamp": 1568799332537,
 55 |      "user": {
 56 |       "displayName": "Mikhail Burtsev",
 57 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
 58 |       "userId": "02998805542659340239"
 59 |      },
 60 |      "user_tz": -180
 61 |     },
 62 |     "id": "JeSE4a-SgRjo",
 63 |     "outputId": "bde9888c-654d-4e0f-dd4f-0dbc416aa283"
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "!pip install deeppavlov\n",
 68 |     "!python -m deeppavlov install gobot_dstc2_minimal"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {
 74 |     "colab_type": "text",
 75 |     "id": "fbv3rMFngRlH"
 76 |    },
 77 |    "source": [
 78 |     "## 0. Data Preparation"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {
 84 |     "colab_type": "text",
 85 |     "id": "hTpb4EHbgRla"
 86 |    },
 87 |    "source": [
 88 |     "In this tutorial we will build and train a simple chatbot just from 10 dialogues. \n",
 89 |     "\n",
 90 |     "Reading data:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "colab": {},
 98 |     "colab_type": "code",
 99 |     "id": "B5oak1V5gRlq"
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "from deeppavlov.dataset_readers.dstc2_reader import SimpleDSTC2DatasetReader\n",
104 |     "\n",
105 |     "\n",
106 |     "class AssistantDatasetReader(SimpleDSTC2DatasetReader):\n",
107 |     "    \n",
108 |     "    url = \"http://files.deeppavlov.ai/datasets/tutor_assistant_data.tar.gz\"\n",
109 |     "    \n",
110 |     "    @staticmethod\n",
111 |     "    def _data_fname(datatype):\n",
112 |     "        assert datatype in ('val', 'trn', 'tst'), \"wrong datatype name\"\n",
113 |     "        return f\"assistant-{datatype}.json\""
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "colab": {
121 |      "base_uri": "https://localhost:8080/",
122 |      "height": 137
123 |     },
124 |     "colab_type": "code",
125 |     "executionInfo": {
126 |      "elapsed": 595,
127 |      "status": "ok",
128 |      "timestamp": 1568799767898,
129 |      "user": {
130 |       "displayName": "Mikhail Burtsev",
131 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
132 |       "userId": "02998805542659340239"
133 |      },
134 |      "user_tz": -180
135 |     },
136 |     "id": "I-GPAAWjgRmj",
137 |     "outputId": "05f22005-56c2-48c7-882a-13a9d039b146"
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "data = AssistantDatasetReader().read('assistant_data')"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {
147 |     "colab_type": "text",
148 |     "id": "0dcewhzTgRns"
149 |    },
150 |    "source": [
151 |     "The training/validation/test data is stored in json files (`assistant-trn.json`, `assistant-val.json` and `assistant-tst.json`):"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "colab": {
159 |      "base_uri": "https://localhost:8080/",
160 |      "height": 50
161 |     },
162 |     "colab_type": "code",
163 |     "executionInfo": {
164 |      "elapsed": 2259,
165 |      "status": "ok",
166 |      "timestamp": 1568799772627,
167 |      "user": {
168 |       "displayName": "Mikhail Burtsev",
169 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
170 |       "userId": "02998805542659340239"
171 |      },
172 |      "user_tz": -180
173 |     },
174 |     "id": "T_q_AMkCgRnO",
175 |     "outputId": "b57a7fec-16aa-4261-ece7-965f7cdb0718"
176 |    },
177 |    "outputs": [],
178 |    "source": [
179 |     "!ls assistant_data"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {
185 |     "colab_type": "text",
186 |     "id": "5elPwVGH8sFb"
187 |    },
188 |    "source": [
189 |     "Let's take a look at the training data."
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "colab": {
197 |      "base_uri": "https://localhost:8080/",
198 |      "height": 1000
199 |     },
200 |     "colab_type": "code",
201 |     "executionInfo": {
202 |      "elapsed": 1937,
203 |      "status": "ok",
204 |      "timestamp": 1568801176503,
205 |      "user": {
206 |       "displayName": "Mikhail Burtsev",
207 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
208 |       "userId": "02998805542659340239"
209 |      },
210 |      "user_tz": -180
211 |     },
212 |     "id": "DOlrNl_9gRn9",
213 |     "outputId": "966e2c29-8460-471c-ac6c-6951dc8eb7d1"
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "!head -n 310 assistant_data/assistant-trn.json"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {
223 |     "colab_type": "text",
224 |     "id": "HbgaikNS9JY0"
225 |    },
226 |    "source": [
227 |     "Create data iterator to organize data processing."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "colab": {
235 |      "base_uri": "https://localhost:8080/",
236 |      "height": 357
237 |     },
238 |     "colab_type": "code",
239 |     "executionInfo": {
240 |      "elapsed": 668,
241 |      "status": "error",
242 |      "timestamp": 1568803282114,
243 |      "user": {
244 |       "displayName": "Mikhail Burtsev",
245 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
246 |       "userId": "02998805542659340239"
247 |      },
248 |      "user_tz": -180
249 |     },
250 |     "id": "9NYptoABgRol",
251 |     "outputId": "08c1e1b4-46cc-4be9-c4ab-98bc074e54a6",
252 |     "scrolled": true
253 |    },
254 |    "outputs": [],
255 |    "source": [
256 |     "from deeppavlov.dataset_iterators.dialog_iterator import DialogDatasetIterator\n",
257 |     "\n",
258 |     "iterator = DialogDatasetIterator(data)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {
264 |     "colab_type": "text",
265 |     "id": "hYh26FBDgRpL"
266 |    },
267 |    "source": [
268 |     "You can now iterate over batches of preprocessed dialogs:"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "colab": {
276 |      "base_uri": "https://localhost:8080/",
277 |      "height": 318
278 |     },
279 |     "colab_type": "code",
280 |     "executionInfo": {
281 |      "elapsed": 762,
282 |      "status": "ok",
283 |      "timestamp": 1568799789488,
284 |      "user": {
285 |       "displayName": "Mikhail Burtsev",
286 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
287 |       "userId": "02998805542659340239"
288 |      },
289 |      "user_tz": -180
290 |     },
291 |     "id": "oMLknr2mgRpk",
292 |     "outputId": "27d39825-3eac-4adb-c7ee-c3d12e8584ef"
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "from pprint import pprint\n",
297 |     "\n",
298 |     "for dialog in iterator.gen_batches(batch_size=1, data_type='train'):\n",
299 |     "    turns_x, turns_y = dialog\n",
300 |     "    \n",
301 |     "    print(\"User utterances:\\n----------------\\n\")\n",
302 |     "    pprint(turns_x[0], indent=4)\n",
303 |     "    print(\"\\nSystem responses:\\n-----------------\\n\")\n",
304 |     "    pprint(turns_y[0], indent=4)\n",
305 |     "    \n",
306 |     "    break\n",
307 |     "\n",
308 |     "print(\"\\n-----------------\")    \n",
309 |     "print(f\"{len(iterator.get_instances('train')[0])} dialog(s) in train.\")\n",
310 |     "print(f\"{len(iterator.get_instances('valid')[0])} dialog(s) in valid.\")\n",
311 |     "print(f\"{len(iterator.get_instances('test')[0])} dialog(s) in test.\")"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {
317 |     "colab_type": "text",
318 |     "id": "PbSQDMHfgRqo"
319 |    },
320 |    "source": [
321 |     "## 1. Train bot"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {
327 |     "colab_type": "text",
328 |     "id": "TgXWO32vgRqy"
329 |    },
330 |    "source": [
331 |     "A policy module of the bot decides what action should be taken in the current dialogue state.The policy in our bot is implemented as a recurrent neural network (recurrency over user utterances) followed by a dense layer with softmax function on top. The network classifies user input into one of predefined system actions. Examples of possible actions are to say hello, to ask what is the weather or to suggest to drink tea. \n",
332 |     "\n",
333 |     "&nbsp;\n",
334 |     "![gobot_simple_policy.png](img/gobot_simple_policy.png)\n",
335 |     "&nbsp;\n",
336 |     "\n",
337 |     "All actions available for the system should be listed in a `assistant-templates.txt` file. Each action should be associated with a string of the corresponding system response.\n",
338 |     "\n",
339 |     "&nbsp;\n",
340 |     "![gobot_simple_templates.png](img/gobot_simple_templates.png)\n",
341 |     "&nbsp;\n",
342 |     "\n",
343 |     "Templates should be in the format `<act>TAB<template>`, where `<act>` is a dialogue action and `<template>` is the corresponding response."
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {
349 |     "colab_type": "text",
350 |     "id": "c6Ra1TzW-WGU"
351 |    },
352 |    "source": [
353 |     "List of actions for our bot:"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "colab": {
361 |      "base_uri": "https://localhost:8080/",
362 |      "height": 100
363 |     },
364 |     "colab_type": "code",
365 |     "executionInfo": {
366 |      "elapsed": 1948,
367 |      "status": "ok",
368 |      "timestamp": 1568799821700,
369 |      "user": {
370 |       "displayName": "Mikhail Burtsev",
371 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
372 |       "userId": "02998805542659340239"
373 |      },
374 |      "user_tz": -180
375 |     },
376 |     "id": "lqg_cbfegRrJ",
377 |     "outputId": "9da7386c-b783-41fa-8280-d1c22ecb6958"
378 |    },
379 |    "outputs": [],
380 |    "source": [
381 |     "!head -n 10 assistant_data/assistant-templates.txt"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {
387 |     "colab_type": "text",
388 |     "id": "0EB74TkogRse"
389 |    },
390 |    "source": [
391 |     "In essense, the dialogue policy module solves classification task, where a set of classes is defined in `assistant-templates.txt`. So, to train the dialogue policy network you need action label for each system's turn in training dialogues. Our assistant dataset provides `\"act\"` dictionary key that contains action associated with current response. Here is an example of training data for the policy network."
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": null,
397 |    "metadata": {
398 |     "colab": {
399 |      "base_uri": "https://localhost:8080/",
400 |      "height": 535
401 |     },
402 |     "colab_type": "code",
403 |     "executionInfo": {
404 |      "elapsed": 2172,
405 |      "status": "ok",
406 |      "timestamp": 1568799833831,
407 |      "user": {
408 |       "displayName": "Mikhail Burtsev",
409 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
410 |       "userId": "02998805542659340239"
411 |      },
412 |      "user_tz": -180
413 |     },
414 |     "id": "o-Ny-LEYgRsq",
415 |     "outputId": "87f7a2b9-0d62-4ba7-bd3c-14cb1f697208"
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "!head -n 31 assistant_data/assistant-trn.json"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "markdown",
424 |    "metadata": {
425 |     "colab_type": "text",
426 |     "id": "o5P9piXwgRtl"
427 |    },
428 |    "source": [
429 |     "For our bot we will use ML pipline for task-oriented conversational skill from DeepPavlov. We will train this skill with our dialogue data. \n",
430 |     "\n",
431 |     "Skills in DeepPavlov are defined by configuration files. So, we will use [minimal DSTC2 bot config](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/go_bot/gobot_dstc2_minimal.json) ([more configs](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/go_bot) are available) and change sections responsible for \n",
432 |     "- embeddings,\n",
433 |     "- response templates,\n",
434 |     "- data and model load/save paths.\n",
435 |     "\n",
436 |     "Loading bot:"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": null,
442 |    "metadata": {
443 |     "colab": {},
444 |     "colab_type": "code",
445 |     "id": "X59MSkmBgRt3"
446 |    },
447 |    "outputs": [],
448 |    "source": [
449 |     "from deeppavlov import configs\n",
450 |     "from deeppavlov.core.common.file import read_json\n",
451 |     "\n",
452 |     "gobot_config = read_json(configs.go_bot.gobot_dstc2_minimal)"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {
458 |     "colab_type": "text",
459 |     "id": "BVFgvwKFBKv0"
460 |    },
461 |    "source": [
462 |     "Download pre-trained GLOVe embeddings:"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": null,
468 |    "metadata": {
469 |     "colab": {
470 |      "base_uri": "https://localhost:8080/",
471 |      "height": 53
472 |     },
473 |     "colab_type": "code",
474 |     "executionInfo": {
475 |      "elapsed": 1691,
476 |      "status": "ok",
477 |      "timestamp": 1568800095199,
478 |      "user": {
479 |       "displayName": "Mikhail Burtsev",
480 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
481 |       "userId": "02998805542659340239"
482 |      },
483 |      "user_tz": -180
484 |     },
485 |     "id": "XouQ1IBegRvR",
486 |     "outputId": "8bbeac1b-72d8-45a6-87b6-673ee71f5cb4"
487 |    },
488 |    "outputs": [],
489 |    "source": [
490 |     "from deeppavlov.download import download_resource\n",
491 |     "\n",
492 |     "download_resource(url=\"http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt\",\n",
493 |     "                  dest_paths=['assistant_bot/'])"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "markdown",
498 |    "metadata": {
499 |     "colab_type": "text",
500 |     "id": "KFhdvuWUBz5T"
501 |    },
502 |    "source": [
503 |     "Configure bot to use downloaded embeddings:"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": null,
509 |    "metadata": {
510 |     "colab": {},
511 |     "colab_type": "code",
512 |     "id": "FElG1xfjgRvq"
513 |    },
514 |    "outputs": [],
515 |    "source": [
516 |     "gobot_config['chainer']['pipe'][-1]['embedder'] = {\n",
517 |     "    \"class_name\": \"glove\",\n",
518 |     "    \"load_path\": \"assistant_bot/glove.6B.100d.txt\"\n",
519 |     "}"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {
525 |     "colab_type": "text",
526 |     "id": "KU0o9uM5gRui"
527 |    },
528 |    "source": [
529 |     "Configure bot to use templates:"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {
536 |     "colab": {},
537 |     "colab_type": "code",
538 |     "id": "yAACg0IAgRuq"
539 |    },
540 |    "outputs": [],
541 |    "source": [
542 |     "gobot_config['chainer']['pipe'][-1]['nlg_manager']['template_path'] = 'assistant_data/assistant-templates.txt'\n",
543 |     "gobot_config['chainer']['pipe'][-1]['nlg_manager']['api_call_action'] = None"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "markdown",
548 |    "metadata": {
549 |     "colab_type": "text",
550 |     "id": "JV27LFatgRwE"
551 |    },
552 |    "source": [
553 |     "Specify train/valid/test data path and path to save the final bot model:"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "code",
558 |    "execution_count": null,
559 |    "metadata": {
560 |     "colab": {},
561 |     "colab_type": "code",
562 |     "id": "pqhscSbhgRwK"
563 |    },
564 |    "outputs": [],
565 |    "source": [
566 |     "gobot_config['dataset_reader']['class_name'] = '__main__:AssistantDatasetReader'\n",
567 |     "gobot_config['metadata']['variables']['DATA_PATH'] = 'assistant_data'\n",
568 |     "\n",
569 |     "gobot_config['metadata']['variables']['MODEL_PATH'] = 'assistant_bot'"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "markdown",
574 |    "metadata": {
575 |     "colab_type": "text",
576 |     "id": "44c_Y8HsgR4l"
577 |    },
578 |    "source": [
579 |     "The whole dialogue system pipeline looks like this:\n",
580 |     "    \n",
581 |     "![gobot_simple_pipeline.png](img/gobot_simple_pipeline.png)"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "markdown",
586 |    "metadata": {
587 |     "colab_type": "text",
588 |     "id": "kirToS-DgR4v"
589 |    },
590 |    "source": [
591 |     "Train policy network:"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {
598 |     "colab": {
599 |      "base_uri": "https://localhost:8080/",
600 |      "height": 792
601 |     },
602 |     "colab_type": "code",
603 |     "executionInfo": {
604 |      "elapsed": 40132,
605 |      "status": "error",
606 |      "timestamp": 1568800144573,
607 |      "user": {
608 |       "displayName": "Mikhail Burtsev",
609 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mD-UjGT1Q2KIGGrL9KU-xXovwU2v8j7wSsrT1Tj9Q=s64",
610 |       "userId": "02998805542659340239"
611 |      },
612 |      "user_tz": -180
613 |     },
614 |     "id": "ZRcmJBcvgR5D",
615 |     "outputId": "d1ad0b56-d45a-4ae6-c7dc-cc88931dfed6"
616 |    },
617 |    "outputs": [],
618 |    "source": [
619 |     "from deeppavlov import train_model\n",
620 |     "\n",
621 |     "gobot_config['train']['batch_size'] = 4 # set batch size\n",
622 |     "gobot_config['train']['max_batches'] = 30 # maximum number of training batches\n",
623 |     "gobot_config['train']['val_every_n_batches'] = 30 # evaluate on full 'valid' split every 30 epochs\n",
624 |     "gobot_config['train']['log_every_n_batches'] = 5 # evaluate on full 'train' split every 5 batches\n",
625 |     "\n",
626 |     "train_model(gobot_config);"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "markdown",
631 |    "metadata": {
632 |     "colab_type": "text",
633 |     "id": "BMYLonE_gR_Q"
634 |    },
635 |    "source": [
636 |     "Training on the dataset takes up to 5 minutes depending on gpu/cpu. See [config doc page](http://docs.deeppavlov.ai/en/master/intro/configuration.html) for advanced configuration of the training process."
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "markdown",
641 |    "metadata": {
642 |     "colab_type": "text",
643 |     "id": "8CNlZyfSgSAi"
644 |    },
645 |    "source": [
646 |     "# 2. Interact with bot"
647 |    ]
648 |   },
649 |   {
650 |    "cell_type": "code",
651 |    "execution_count": null,
652 |    "metadata": {
653 |     "colab": {},
654 |     "colab_type": "code",
655 |     "id": "kaUTLCl_gSAm"
656 |    },
657 |    "outputs": [],
658 |    "source": [
659 |     "from deeppavlov import build_model\n",
660 |     "\n",
661 |     "bot = build_model(gobot_config)"
662 |    ]
663 |   },
664 |   {
665 |    "cell_type": "code",
666 |    "execution_count": null,
667 |    "metadata": {
668 |     "colab": {},
669 |     "colab_type": "code",
670 |     "id": "1UTvHL01gSA5"
671 |    },
672 |    "outputs": [],
673 |    "source": [
674 |     "bot([[{\"text\": \"good evening, bot\"}]])"
675 |    ]
676 |   },
677 |   {
678 |    "cell_type": "code",
679 |    "execution_count": null,
680 |    "metadata": {
681 |     "colab": {},
682 |     "colab_type": "code",
683 |     "id": "YY_BdF-egSBT"
684 |    },
685 |    "outputs": [],
686 |    "source": [
687 |     "bot([[{\"text\": \"the weather is clooudy and gloooomy\"}]])"
688 |    ]
689 |   },
690 |   {
691 |    "cell_type": "code",
692 |    "execution_count": null,
693 |    "metadata": {
694 |     "colab": {},
695 |     "colab_type": "code",
696 |     "id": "-Xf1gCdmgSBr"
697 |    },
698 |    "outputs": [],
699 |    "source": [
700 |     "bot([[{\"text\": \"nice idea, thanks!\"}]])"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": null,
706 |    "metadata": {
707 |     "colab": {},
708 |     "colab_type": "code",
709 |     "id": "Ad_GDanAgSCi"
710 |    },
711 |    "outputs": [],
712 |    "source": [
713 |     "bot.reset()"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "code",
718 |    "execution_count": null,
719 |    "metadata": {
720 |     "colab": {},
721 |     "colab_type": "code",
722 |     "id": "r5312Df1gSC0"
723 |    },
724 |    "outputs": [],
725 |    "source": [
726 |     "bot([[{\"text\": \"hi bot\"}]])"
727 |    ]
728 |   },
729 |   {
730 |    "cell_type": "code",
731 |    "execution_count": null,
732 |    "metadata": {
733 |     "colab": {},
734 |     "colab_type": "code",
735 |     "id": "jXQw_11jgSDM"
736 |    },
737 |    "outputs": [],
738 |    "source": [
739 |     "bot([[{\"text\": \"looks ok, the sun is bright and yesterday's rain stopped already\"}]])"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "code",
744 |    "execution_count": null,
745 |    "metadata": {
746 |     "colab": {},
747 |     "colab_type": "code",
748 |     "id": "-SKJicTCgSDe"
749 |    },
750 |    "outputs": [],
751 |    "source": [
752 |     "bot([[{\"text\": \"i dont wanna\"}]])"
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "code",
757 |    "execution_count": null,
758 |    "metadata": {},
759 |    "outputs": [],
760 |    "source": []
761 |   }
762 |  ],
763 |  "metadata": {
764 |   "colab": {
765 |    "name": "gobot_tutorial_simple(MB).ipynb",
766 |    "provenance": [],
767 |    "version": "0.3.2"
768 |   },
769 |   "kernelspec": {
770 |    "display_name": "Python 3",
771 |    "name": "python3"
772 |   },
773 |   "accelerator": "GPU",
774 |   "language_info": {
775 |    "codemirror_mode": {
776 |     "name": "ipython",
777 |     "version": 3
778 |    },
779 |    "file_extension": ".py",
780 |    "mimetype": "text/x-python",
781 |    "name": "python",
782 |    "nbconvert_exporter": "python",
783 |    "pygments_lexer": "ipython3",
784 |    "version": "3.7.4"
785 |   }
786 |  },
787 |  "nbformat": 4,
788 |  "nbformat_minor": 1
789 | }
790 | 


--------------------------------------------------------------------------------
/DP_ODQA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Open-domain question answering with DeepPavlov\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "The architecture of the DeepPavlov ODQA skill is modular and consists of two components: a **ranker** and a **reader**. In order to answer any question, the **ranker** first retrieves a few relevant articles from the article collection, and then the **reader** scans them carefully to identify the answer. The **ranker** is based on DrQA [1] proposed by Facebook Research. Specifically, the DrQA approach uses unigram-bigram hashing and TF-IDF matching designed to efficiently return a subset of relevant articles based on a question. The **reader** is based on R-NET [2] proposed by Microsoft Research Asia and its implementation by Wenxuan Zhou. The R-NET architecture is an end-to-end neural network model that aims to answer questions based on a given article. R-NET first matches the question and the article via gated attention-based recurrent networks to obtain a question-aware article representation. Then the self-matching attention mechanism refines the representation by matching the article against itself, which effectively encodes information from the whole article. Finally, the pointer networks locate the positions of answers in the article. The scheme below shows DeepPavlov ODQA system architecture.\n",
 11 |     "\n",
 12 |     "DeepPavlov’s ODQA system has two Wikipedia-based models. The first one is based on the English Wikipedia dump from 2018-02-11 (5,180,368 articles) and the second one is based on the Russian Wikipedia dump from 2018-04-01 (1,463,888 articles).\n",
 13 |     "\n",
 14 |     "[1] [Chen, Danqi, et al. \"Reading wikipedia to answer open-domain questions.\" arXiv preprint arXiv:1704.00051 (2017)](https://arxiv.org/pdf/1704.00051.pdf)\n",
 15 |     "\n",
 16 |     "[2] [R-NET: Machine reading comprehension with self-matching networks](https://www.microsoft.com/en-us/research/wp-content/uploads/2017/05/r-net.pdf)"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "<img src=\"odqa.png\">\n",
 24 |     "\n",
 25 |     "<center>Picture 1. The DeepPavlov-based ODQA system architecture</center>"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "# Model Requirements\n",
 33 |     "\n",
 34 |     "The DeepPavlov ODQA system has two Wikipedia-based models. The English Wikipedia model requires 35 GB of local storage, whereas the Russian version takes up about 20 GB. The Wikipedia dumps can be rebuilt by steps described in the [documentation](http://docs.deeppavlov.ai/en/0.1.6/components/tfidf_ranking.html#available-data-and-pretrained-models). Both models require about 24 GB of RAM. It is possible to run them on a 16 GB machine, but the swap size should be at least 8 GB.\n",
 35 |     " \n",
 36 |     "But first, install DeepPavlov and all the model's requirements."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "!pip install -q deeppavlov\n",
 48 |     "!python -m deeppavlov install en_odqa_infer_wiki"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "# Model Description\n",
 56 |     "\n",
 57 |     "The architecture of the ODQA skill is modular and consists of two components, a **ranker** and a **reader**. In order to answer any question, the **reader** first retrieves **top_n** relevant articles from the document collection, and then the **reader** scans them carefully to identify the answer. The detailed description of the ODQA models can be found in the [DeepPavlov documentation](http://docs.deeppavlov.ai/en/0.1.6/skills/odqa.html)."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "%load https://github.com/deepmipt/DeepPavlov/blob/0.1.6/deeppavlov/configs/odqa/en_odqa_infer_wiki.json"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "# Interacting with the model\n",
 74 |     "\n",
 75 |     "**As it was mentioned, the Wikipedia-based models have significant storage and RAM requirements, therefore it's impossible to interact with them on Colab, however you can do so localy (of course when the requirements are satisfied). Alternatively, you can check out our [demo](http://demo.ipavlov.ai/).**\n",
 76 |     "\n",
 77 |     "Make sure that you can navigate the configuration files by using Autocomplete (Tab key) with **configs** module."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "raw",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "from deeppavlov import configs\n",
 85 |     "from deeppavlov.core.commands.infer import build_model\n",
 86 |     "\n",
 87 |     "odqa = build_model(configs.odqa.en_odqa_infer_wiki, download = True)\n",
 88 |     "answers = odqa([\n",
 89 |     "                \"Where did guinea pigs originate?\", \n",
 90 |     "                \"When did the Lynmouth floods happen?\",\n",
 91 |     "                \"When is the Bastille Day?\"\n",
 92 |     "                ])"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "# Training the model\n",
100 |     "\n",
101 |     "You can train a model by running the framework with **train** parameter, wherein the model will be trained on the document collection defined in the **dataset_reader** section of the configuration file. The **dataset_reader** section of the ranker’s configuration defines the source of the articles. The source can be of the following **dataset_format-**:\n",
102 |     "\n",
103 |     "wiki — the Wikipedia dump,\n",
104 |     "txt — the path to the separated text files,\n",
105 |     "json — JSON files, which should be formatted as a list with dicts that contain the *title* and *doc* keywords.\n",
106 |     "\n",
107 |     "\n",
108 |     "* *wiki* - The Wikipedia dump\n",
109 |     "* *txt* - each document in separate txt file\n",
110 |     "* *json* - JSON files should be formatted as list with dicts which contain 'title' and 'doc' keywords.\n",
111 |     "\n",
112 |     "As a training corpus, I will use the PloS sentence corpus. It consists of 300 computational biology articles, each of them stored in a separate *txt* file. For simplicity, we will use the same configuration files that is used for the Wikipedia-based ODQA system; however, we strongly encourage you to create custom configuration files for your own models."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": true
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "!wget -q http://archive.ics.uci.edu/ml/machine-learning-databases/00311/SentenceCorpus.zip\n",
124 |     "!unzip SentenceCorpus.zip"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "In order to fit a model on new data, first, change the **data_path** parameter of the **dataset_reader** section. Then change the **dataset_format** to *txt*. Finally, train the model."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": true
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "from deeppavlov import configs\n",
143 |     "from deeppavlov.core.common.file import read_json\n",
144 |     "from deeppavlov import configs, train_model\n",
145 |     "\n",
146 |     "model_config = read_json(configs.doc_retrieval.en_ranker_tfidf_wiki)\n",
147 |     "model_config[\"dataset_reader\"][\"data_path\"] = \"/content/SentenceCorpus/unlabeled_articles/plos_unlabeled\"\n",
148 |     "model_config[\"dataset_reader\"][\"dataset_format\"] = \"txt\"\n",
149 |     "doc_retrieval = train_model(model_config)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "Examine the ranker output."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "doc_retrieval(['cerebellum'])"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "Everything is done to run the ODQA component, make sure that the **download = False** otherwise the pretrained Wikipedia dump will overwrite your model."
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 4,
180 |    "metadata": {
181 |     "collapsed": true
182 |    },
183 |    "outputs": [
184 |     {
185 |      "name": "stderr",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "2019-01-29 18:20:21.166 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 208: Starting new HTTP connection (1): files.deeppavlov.ai\n",
189 |       "2019-01-29 18:20:21.319 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 396: http://files.deeppavlov.ai:80 \"GET /deeppavlov_data/multi_squad_model_noans_1.1.tar.gz.md5 HTTP/1.1\" 200 461\n",
190 |       "2019-01-29 18:20:21.323 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 208: Starting new HTTP connection (1): files.deeppavlov.ai\n",
191 |       "2019-01-29 18:20:21.334 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 396: http://files.deeppavlov.ai:80 \"GET /deeppavlov_data/multi_squad_model_noans_1.1.tar.gz HTTP/1.1\" 200 264599752\n",
192 |       "2019-01-29 18:20:21.336 INFO in 'deeppavlov.core.data.utils'['utils'] at line 64: Downloading from http://files.deeppavlov.ai/deeppavlov_data/multi_squad_model_noans_1.1.tar.gz to /home/com/.deeppavlov/multi_squad_model_noans_1.1.tar.gz\n",
193 |       "100%|██████████| 265M/265M [00:14<00:00, 18.0MB/s]\n",
194 |       "2019-01-29 18:20:36.80 INFO in 'deeppavlov.core.data.utils'['utils'] at line 202: Extracting /home/com/.deeppavlov/multi_squad_model_noans_1.1.tar.gz archive into /home/com/.deeppavlov/models\n",
195 |       "[nltk_data] Downloading package punkt to /home/com/nltk_data...\n",
196 |       "[nltk_data]   Package punkt is already up-to-date!\n",
197 |       "[nltk_data] Downloading package stopwords to /home/com/nltk_data...\n",
198 |       "[nltk_data]   Package stopwords is already up-to-date!\n",
199 |       "[nltk_data] Downloading package perluniprops to /home/com/nltk_data...\n",
200 |       "[nltk_data]   Package perluniprops is already up-to-date!\n",
201 |       "[nltk_data] Downloading package nonbreaking_prefixes to\n",
202 |       "[nltk_data]     /home/com/nltk_data...\n",
203 |       "[nltk_data]   Package nonbreaking_prefixes is already up-to-date!\n",
204 |       "2019-01-29 18:20:47.161 INFO in 'deeppavlov.models.preprocessors.squad_preprocessor'['squad_preprocessor'] at line 311: SquadVocabEmbedder: loading saved tokens vocab from /home/com/.deeppavlov/models/multi_squad_model_noans/emb/vocab_embedder.pckl\n",
205 |       "2019-01-29 18:20:49.234 INFO in 'deeppavlov.models.preprocessors.squad_preprocessor'['squad_preprocessor'] at line 311: SquadVocabEmbedder: loading saved chars vocab from /home/com/.deeppavlov/models/multi_squad_model_noans/emb/char_vocab_embedder.pckl\n",
206 |       "Using TensorFlow backend.\n",
207 |       "2019-01-29 18:20:53.859 INFO in 'deeppavlov.core.layers.tf_layers'['tf_layers'] at line 614: \n",
208 |       "Warning! tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell is used. It is okay for inference mode, but if you train your model with this cell it could NOT be used with tf.contrib.cudnn_rnn.CudnnGRUCell later. \n",
209 |       "2019-01-29 18:21:00.90 WARNING in 'tensorflow'['tf_logging'] at line 125: From /home/com/Shared/DeepPavlov/deeppavlov/core/layers/tf_layers.py:808: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.\n",
210 |       "Instructions for updating:\n",
211 |       "seq_dim is deprecated, use seq_axis instead\n",
212 |       "2019-01-29 18:21:00.112 WARNING in 'tensorflow'['tf_logging'] at line 125: From /home/com/miniconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:454: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.\n",
213 |       "Instructions for updating:\n",
214 |       "batch_dim is deprecated, use batch_axis instead\n",
215 |       "2019-01-29 18:21:00.115 INFO in 'deeppavlov.core.layers.tf_layers'['tf_layers'] at line 614: \n",
216 |       "Warning! tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell is used. It is okay for inference mode, but if you train your model with this cell it could NOT be used with tf.contrib.cudnn_rnn.CudnnGRUCell later. \n",
217 |       "2019-01-29 18:21:00.368 INFO in 'deeppavlov.core.layers.tf_layers'['tf_layers'] at line 614: \n",
218 |       "Warning! tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell is used. It is okay for inference mode, but if you train your model with this cell it could NOT be used with tf.contrib.cudnn_rnn.CudnnGRUCell later. \n",
219 |       "2019-01-29 18:21:00.487 INFO in 'deeppavlov.core.layers.tf_layers'['tf_layers'] at line 614: \n",
220 |       "Warning! tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell is used. It is okay for inference mode, but if you train your model with this cell it could NOT be used with tf.contrib.cudnn_rnn.CudnnGRUCell later. \n",
221 |       "2019-01-29 18:21:03.583 WARNING in 'tensorflow'['tf_logging'] at line 125: From /home/com/Shared/DeepPavlov/deeppavlov/models/squad/squad.py:211: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
222 |       "Instructions for updating:\n",
223 |       "\n",
224 |       "Future major versions of TensorFlow will allow gradients to flow\n",
225 |       "into the labels input on backprop by default.\n",
226 |       "\n",
227 |       "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\n",
228 |       "\n",
229 |       "2019-01-29 18:21:15.649 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 47: [loading model from /home/com/.deeppavlov/models/multi_squad_model_noans/model]\n",
230 |       "2019-01-29 18:21:15.780 INFO in 'tensorflow'['tf_logging'] at line 115: Restoring parameters from /home/com/.deeppavlov/models/multi_squad_model_noans/model\n",
231 |       "2019-01-29 18:21:19.584 ERROR in 'deeppavlov.core.common.params'['params'] at line 106: Exception in <class 'deeppavlov.models.vectorizers.hashing_tfidf_vectorizer.HashingTfIdfVectorizer'>\n",
232 |       "Traceback (most recent call last):\n",
233 |       "  File \"/home/com/Shared/DeepPavlov/deeppavlov/core/common/params.py\", line 100, in from_params\n",
234 |       "    component = cls(**dict(config_params, **kwargs))\n",
235 |       "  File \"/home/com/Shared/DeepPavlov/deeppavlov/models/vectorizers/hashing_tfidf_vectorizer.py\", line 80, in __init__\n",
236 |       "    self.tfidf_matrix, opts = self.load()\n",
237 |       "  File \"/home/com/Shared/DeepPavlov/deeppavlov/models/vectorizers/hashing_tfidf_vectorizer.py\", line 263, in load\n",
238 |       "    raise FileNotFoundError(\"HashingTfIdfVectorizer path doesn't exist!\")\n",
239 |       "FileNotFoundError: HashingTfIdfVectorizer path doesn't exist!\n"
240 |      ]
241 |     },
242 |     {
243 |      "ename": "FileNotFoundError",
244 |      "evalue": "HashingTfIdfVectorizer path doesn't exist!",
245 |      "output_type": "error",
246 |      "traceback": [
247 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
248 |       "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
249 |       "\u001b[0;32m<ipython-input-4-4fc2944858d3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0msquad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfigs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msquad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmulti_squad_noans_infer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;31m# Do not download the ODQA models, we've just trained it\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0modqa\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfigs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0modqa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0men_odqa_infer_wiki\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0manswers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0modqa\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"what is tuberculosis?\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"how should I take antibiotics?\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
250 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/core/commands/infer.py\u001b[0m in \u001b[0;36mbuild_model\u001b[0;34m(config, mode, load_trained, download, serialized)\u001b[0m\n\u001b[1;32m     59\u001b[0m             \u001b[0mcomponent_serialized\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m         \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfrom_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mserialized\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcomponent_serialized\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;34m'in'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcomponent_config\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
251 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/core/common/params.py\u001b[0m in \u001b[0;36mfrom_params\u001b[0;34m(params, mode, serialized, **kwargs)\u001b[0m\n\u001b[1;32m     78\u001b[0m         \u001b[0m_refs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     79\u001b[0m         \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparse_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpand_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_params\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'config_path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m         \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mserialized\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mserialized\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     81\u001b[0m         \u001b[0m_refs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m         \u001b[0m_refs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrefs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
252 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/core/commands/infer.py\u001b[0m in \u001b[0;36mbuild_model\u001b[0;34m(config, mode, load_trained, download, serialized)\u001b[0m\n\u001b[1;32m     59\u001b[0m             \u001b[0mcomponent_serialized\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m         \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfrom_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcomponent_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mserialized\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcomponent_serialized\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;34m'in'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcomponent_config\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
253 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/core/common/params.py\u001b[0m in \u001b[0;36mfrom_params\u001b[0;34m(params, mode, serialized, **kwargs)\u001b[0m\n\u001b[1;32m     98\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'mode'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m         \u001b[0mcomponent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig_params\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    101\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    102\u001b[0m             \u001b[0m_refs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconfig_params\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomponent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
254 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/models/vectorizers/hashing_tfidf_vectorizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, tokenizer, hash_size, doc_index, save_path, load_path, **kwargs)\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     79\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'mode'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'infer'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'infer'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtfidf_matrix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     81\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mngram_range\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'ngram_range'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhash_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'hash_size'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
255 |       "\u001b[0;32m~/Shared/DeepPavlov/deeppavlov/models/vectorizers/hashing_tfidf_vectorizer.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    261\u001b[0m         \"\"\"\n\u001b[1;32m    262\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mFileNotFoundError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"HashingTfIdfVectorizer path doesn't exist!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    265\u001b[0m         \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Loading tfidf matrix from {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
256 |       "\u001b[0;31mFileNotFoundError\u001b[0m: HashingTfIdfVectorizer path doesn't exist!"
257 |      ]
258 |     }
259 |    ],
260 |    "source": [
261 |     "from deeppavlov import configs\n",
262 |     "from deeppavlov.core.commands.infer import build_model\n",
263 |     "\n",
264 |     "# Download all the SQuAD models\n",
265 |     "squad = build_model(configs.squad.multi_squad_noans_infer, download = True)\n",
266 |     "# Do not download the ODQA models, we've just trained it\n",
267 |     "odqa = build_model(configs.odqa.en_odqa_infer_wiki, download = False)\n",
268 |     "answers = odqa([\"what is tuberculosis?\", \"how should I take antibiotics?\"])"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "markdown",
273 |    "metadata": {},
274 |    "source": [
275 |     "# Useful links\n",
276 |     "\n",
277 |     "[DeepPavlov repository](https://github.com/deepmipt/DeepPavlov)\n",
278 |     "\n",
279 |     "[DeepPavlov demo page](https://demo.ipavlov.ai)\n",
280 |     "\n",
281 |     "[DeepPavlov documentation](https://docs.deeppavlov.ai)"
282 |    ]
283 |   }
284 |  ],
285 |  "metadata": {
286 |   "kernelspec": {
287 |    "display_name": "Python 3",
288 |    "language": "python",
289 |    "name": "python3"
290 |   },
291 |   "language_info": {
292 |    "codemirror_mode": {
293 |     "name": "ipython",
294 |     "version": 3
295 |    },
296 |    "file_extension": ".py",
297 |    "mimetype": "text/x-python",
298 |    "name": "python",
299 |    "nbconvert_exporter": "python",
300 |    "pygments_lexer": "ipython3",
301 |    "version": "3.6.8"
302 |   }
303 |  },
304 |  "nbformat": 4,
305 |  "nbformat_minor": 2
306 | }
307 | 


--------------------------------------------------------------------------------
/examples/DeepPavlov_MTL_Tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "YeHC94sJ5LtB"
  7 |       },
  8 |       "source": [
  9 |         "If you have not already installed DeepPavlov, you should run"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "id": "DTmWGZBU5LtG"
 17 |       },
 18 |       "outputs": [],
 19 |       "source": [
 20 |         "!pip install deeppavlov>=1.1.1"
 21 |       ]
 22 |     },
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "metadata": {
 26 |         "id": "Sk-y97ru5LtH"
 27 |       },
 28 |       "source": [
 29 |         "Multitask models are supported in DeepPavlov starting from the version 1.1.1.\n",
 30 |         "\n",
 31 |         "We will see what the multitask configs in DeepPavlov look like, on the example of config multitask/multitask_example.json.\n",
 32 |         "\n",
 33 |         "## Dataset reader\n",
 34 |         "\n",
 35 |         "As a dataset reader, we use the `multitask_reader` class. This class must have a parameter tasks, which is a dictionary `{task name: parameters for the task}`. The order of the tasks in this dictionary must be exactly the same as in the later stages of the config.\n",
 36 |         "Any parameter for any task, if it does not exist in that dictionary, is drawn from another parameter - task_defaults. This parameter contains the default dictionary for any task and this dictionary can also be empty.\n",
 37 |         "The dataset_reader, path, train, validation, and test fields must exist for all tasks - either as default fields or as fields that are explicitly given in the dictionary.\n",
 38 |         "```\n",
 39 |         "{\n",
 40 |         "    \"dataset_reader\": {\n",
 41 |         "   \t \"class_name\": \"multitask_reader\",\n",
 42 |         "   \t \"task_defaults\": {\n",
 43 |         "   \t\t \"class_name\": \"huggingface_dataset_reader\",\n",
 44 |         "   \t\t \"path\": \"glue\",\n",
 45 |         "   \t\t \"train\": \"train\",\n",
 46 |         "   \t\t \"valid\": \"validation\",\n",
 47 |         "   \t\t \"test\": \"test\"\n",
 48 |         "   \t },\n",
 49 |         "   \t \"tasks\": {\n",
 50 |         "   \t\t \"cola\": {\n",
 51 |         "   \t\t\t \"name\": \"cola\"\n",
 52 |         "   \t\t },\n",
 53 |         "   \t\t \"rte\": {\n",
 54 |         "   \t\t\t \"name\": \"rte\"\n",
 55 |         "   \t\t },\n",
 56 |         "   \t\t \"stsb\": {\n",
 57 |         "   \t\t\t \"name\": \"stsb\"\n",
 58 |         "   \t\t },\n",
 59 |         "   \t\t \"copa\": {\n",
 60 |         "   \t\t\t \"path\": \"super_glue\",\n",
 61 |         "   \t\t\t \"name\": \"copa\"\n",
 62 |         "   \t\t },\n",
 63 |         "   \t\t \"conll\": {\n",
 64 |         "   \t\t\t \"class_name\": \"conll2003_reader\",\n",
 65 |         "   \t\t\t \"use_task_defaults\": false,\n",
 66 |         "   \t\t\t \"data_path\": \"{DOWNLOADS_PATH}/conll2003/\",\n",
 67 |         "   \t\t\t \"dataset_name\": \"conll2003\",\n",
 68 |         "   \t\t\t \"provide_pos\": false\n",
 69 |         "   \t\t },\n",
 70 |         "   \t\t \"squad\": {\n",
 71 |         "   \t\t\t \"class_name\": \"squad_dataset_reader\",\n",
 72 |         "   \t\t\t \"dataset\": \"squad\",\n",
 73 |         "   \t\t\t \"url\": \"http://files.deeppavlov.ai/datasets/squad-v1.1.tar.gz\",\n",
 74 |         "   \t\t\t \"data_path\": \"{DOWNLOADS_PATH}/squad_ru_clean/\"\n",
 75 |         "   \t\t }\n",
 76 |         "   \t }\n",
 77 |         "    },\n",
 78 |         "```\n",
 79 |         "\n",
 80 |         "## Dataset iterator\n",
 81 |         "\n",
 82 |         "As a dataset iterator, we use the `multitask_iterator` class. In this class, we also pass the dictionary tasks, which contain an iterator class name and parameters(if they are set) for all tasks analogously to the `multitask_reader`.\n",
 83 |         "We also set in the same class number of gradient accumulation steps, training epochs, and batch size(these parameters need to be also in the trainer).\n",
 84 |         "We also pass into the `multitask_iterator` sampling mode, which defines for every task a probability that the samples will be drawn from its set of samples. We support uniform sampling (the same sampling probability for all tasks), plain sampling(sampling probability is proportional to the sample number), and annealed sampling.\n",
 85 |         "\n",
 86 |         "```\n",
 87 |         "\"dataset_iterator\": {\n",
 88 |         "   \t \"class_name\": \"multitask_iterator\",\n",
 89 |         "   \t \"num_train_epochs\": \"{NUM_TRAIN_EPOCHS}\",\n",
 90 |         "   \t \"gradient_accumulation_steps\": \"{GRADIENT_ACC_STEPS}\",\n",
 91 |         "   \t \"seed\": 42,\n",
 92 |         "   \t \"task_defaults\": {\n",
 93 |         "   \t\t \"class_name\": \"huggingface_dataset_iterator\",\n",
 94 |         "   \t\t \"label\": \"label\",\n",
 95 |         "   \t\t \"use_label_name\": false,\n",
 96 |         "   \t\t \"seed\": 42\n",
 97 |         "   \t },\n",
 98 |         "   \t \"tasks\": {\n",
 99 |         "   \t\t \"cola\": {\n",
100 |         "   \t\t\t \"features\": [\"sentence\"]\n",
101 |         "   \t\t },\n",
102 |         "   \t\t \"rte\": {\n",
103 |         "   \t\t\t \"features\": [\"sentence1\", \"sentence2\"]\n",
104 |         "   \t\t },\n",
105 |         "   \t\t \"stsb\": {\n",
106 |         "   \t\t\t \"features\": [\"sentence1\", \"sentence2\"]\n",
107 |         "   \t\t },\n",
108 |         "   \t\t \"copa\": {\n",
109 |         "   \t\t\t \"features\": [\"contexts\", \"choices\"]\n",
110 |         "   \t\t },\n",
111 |         "   \t\t \"conll\": {\n",
112 |         "   \t\t\t \"class_name\": \"basic_classification_iterator\",\n",
113 |         "   \t\t\t \"seed\": 42,\n",
114 |         "   \t\t\t \"use_task_defaults\": false\n",
115 |         "   \t\t },\n",
116 |         "   \t\t \"squad\": {\n",
117 |         "   \t\t\t \"class_name\": \"squad_iterator\",\n",
118 |         "   \t\t\t \"seed\": 1337,\n",
119 |         "   \t\t\t \"shuffle\": true\n",
120 |         "   \t\t }\n",
121 |         "   \t }\n",
122 |         "    },\n",
123 |         "\n",
124 |         "```\n",
125 |         "## Chainer\n",
126 |         "\n",
127 |         "The chainer utilizes elements for every task separately.\n",
128 |         "\n",
129 |         "However, to streamline the multi-task preprocessing, we have introduced the optional `multitask_pipeline_preprocessor` class. For this class, one should set the vocab_file for the tokenizer and either the default preprocessor class name or the list of preprocessor names(not the ones used in configs, but the ones defined in the library). The user can also set whether to do lowercase and whether to print the first example for the debugging purpose.\n",
130 |         "\n",
131 |         "```\n",
132 |         "\t\"chainer\": {\n",
133 |         "   \t \"in\": [\"x_cola\", \"x_rte\", \"x_stsb\", \"x_copa\", \"x_conll\", \"x_squad\"],\n",
134 |         "   \t \"in_y\": [\"y_cola\", \"y_rte\", \"y_stsb\", \"y_copa\", \"y_conll\", \"y_squad\"],\n",
135 |         "   \t \"pipe\": [{\n",
136 |         "   \t\t\t \"class_name\": \"multitask_input_splitter\",\n",
137 |         "                            \t\"keys_to_extract\": [0,1],\n",
138 |         "   \t\t\t \"in\": [\"x_squad\"],\n",
139 |         "   \t\t\t \"out\": [\"question_raw_squad\", \"context_raw_squad\"]\n",
140 |         "   \t\t },\n",
141 |         "   \t\t {\n",
142 |         "   \t\t\t \"class_name\": \"multitask_input_splitter\",\n",
143 |         "                            \t\"keys_to_extract\": [0,1],\n",
144 |         "   \t\t\t \"in\": [\"y_squad\"],\n",
145 |         "   \t\t\t \"out\": [\"ans_raw_squad\", \"ans_raw_start_squad\"]\n",
146 |         "   \t\t },\n",
147 |         "   \t\t {\n",
148 |         "   \t\t\t \"class_name\": \"torch_squad_transformers_preprocessor\",\n",
149 |         "   \t\t\t \"add_token_type_ids\": true,\n",
150 |         "   \t\t\t \"vocab_file\": \"{BACKBONE}\",\n",
151 |         "   \t\t\t \"do_lower_case\": true,\n",
152 |         "   \t\t\t \"max_seq_length\": 384,\n",
153 |         "   \t\t\t \"in\": [\n",
154 |         "   \t\t\t\t \"question_raw_squad\",\n",
155 |         "   \t\t\t\t \"context_raw_squad\"\n",
156 |         "   \t\t\t ],\n",
157 |         "   \t\t\t \"out\": [\n",
158 |         "   \t\t\t\t \"bert_features_squad\",\n",
159 |         "   \t\t\t\t \"subtokens_squad\",\n",
160 |         "   \t\t\t\t \"split_context_squad\"\n",
161 |         "   \t\t\t ]\n",
162 |         "   \t\t },\n",
163 |         "   \t\t {\n",
164 |         "   \t\t\t \"class_name\": \"squad_bert_mapping\",\n",
165 |         "   \t\t\t \"do_lower_case\": true,\n",
166 |         "   \t\t\t \"in\": [\n",
167 |         "   \t\t\t\t \"split_context_squad\",\n",
168 |         "   \t\t\t\t \"bert_features_squad\",\n",
169 |         "   \t\t\t\t \"subtokens_squad\"\n",
170 |         "   \t\t\t ],\n",
171 |         "   \t\t\t \"out\": [\n",
172 |         "   \t\t\t\t \"subtok2chars_squad\",\n",
173 |         "   \t\t\t\t \"char2subtoks_squad\"\n",
174 |         "   \t\t\t ]\n",
175 |         "   \t\t },\n",
176 |         "   \t\t {\n",
177 |         "   \t\t\t \"class_name\": \"squad_bert_ans_preprocessor\",\n",
178 |         "   \t\t\t \"do_lower_case\": true,\n",
179 |         "   \t\t\t \"in\": [\n",
180 |         "   \t\t\t\t \"ans_raw_squad\",\n",
181 |         "   \t\t\t\t \"ans_raw_start_squad\",\n",
182 |         "   \t\t\t\t \"char2subtoks_squad\"\n",
183 |         "   \t\t\t ],\n",
184 |         "   \t\t\t \"out\": [\n",
185 |         "   \t\t\t\t \"ans_squad\",\n",
186 |         "   \t\t\t\t \"ans_start_squad\",\n",
187 |         "   \t\t\t\t \"ans_end_squad\"\n",
188 |         "   \t\t\t ]\n",
189 |         "   \t\t },\n",
190 |         "   \t\t {\n",
191 |         "   \t\t\t \"class_name\": \"multitask_pipeline_preprocessor\",\n",
192 |         "   \t\t\t \"possible_keys_to_extract\": [0, 1],\n",
193 |         "   \t\t\t \"preprocessors\": [\n",
194 |         "   \t\t\t\t \"TorchTransformersPreprocessor\",\n",
195 |         "   \t\t\t\t \"TorchTransformersPreprocessor\",\n",
196 |         "   \t\t\t\t \"TorchTransformersPreprocessor\",\n",
197 |         "   \t\t\t\t \"TorchTransformersMultiplechoicePreprocessor\",\n",
198 |         "   \t\t\t\t \"TorchTransformersNerPreprocessor\"\n",
199 |         "   \t\t\t ],\n",
200 |         "   \t\t\t \"do_lower_case\": true,\n",
201 |         "   \t\t\t \"n_task\": 5,\n",
202 |         "   \t\t\t \"vocab_file\": \"{BACKBONE}\",\n",
203 |         "   \t\t\t \"max_seq_length\": 200,\n",
204 |         "   \t\t\t \"max_subword_length\": 15,\n",
205 |         "   \t\t\t \"token_masking_prob\": 0.0,\n",
206 |         "   \t\t\t \"return_features\": true,\n",
207 |         "   \t\t\t \"in\": [\"x_cola\", \"x_rte\", \"x_stsb\", \"x_copa\", \"x_conll\"],\n",
208 |         "   \t\t\t \"out\": [\n",
209 |         "   \t\t\t\t \"bert_features_cola\",\n",
210 |         "   \t\t\t\t \"bert_features_rte\",\n",
211 |         "   \t\t\t\t \"bert_features_stsb\",\n",
212 |         "   \t\t\t\t \"bert_features_copa\",\n",
213 |         "   \t\t\t\t \"bert_features_conll\"\n",
214 |         "   \t\t\t ]\n",
215 |         "   \t\t },\n",
216 |         "   \t\t {\n",
217 |         "   \t\t\t \"id\": \"vocab_conll\",\n",
218 |         "   \t\t\t \"class_name\": \"simple_vocab\",\n",
219 |         "   \t\t\t \"unk_token\": [\"O\"],\n",
220 |         "   \t\t\t \"pad_with_zeros\": true,\n",
221 |         "   \t\t\t \"save_path\": \"{MODELS_PATH}/tag.dict\",\n",
222 |         "   \t\t\t \"load_path\": \"{MODELS_PATH}/tag.dict\",\n",
223 |         "   \t\t\t \"fit_on\": [\"y_conll\"],\n",
224 |         "   \t\t\t \"in\": [\"y_conll\"],\n",
225 |         "   \t\t\t \"out\": [\"y_ids_conll\"]\n",
226 |         "   \t\t },\n",
227 |         "```\n",
228 |         "\n",
229 |         "## Multitask transformer\n",
230 |         "\n",
231 |         "As a class for multi-task training, we use the `multitask_transformer` class. The backbone model for multi-task training is defined in this class - it is advisable to make it the same as used for the tokenization in the previous components.\n",
232 |         "In this class, one should give as a `tasks` parameter a dictionary that has exactly the same order of tasks as in the reader, iterator, and `in_x` and `in_y` components in the chainer.\n",
233 |         "For every task, a number of options and the task_type needs to be set.\n",
234 |         "You give `in` ( bert_features, the same order as tasks have) and `in_y` ( y for every task, also the same order) and you obtain probabilities if return_probas=True or labels\n",
235 |         " ids if return_probas=False. ( Apart from the regression task, where always scores are returned(sts-b in config) and ner task, where always label ids for every token are returned(conll in config).\n",
236 |         "\n",
237 |         "```\n",
238 |         "   \t \t{\n",
239 |         "   \t\t\t \"id\": \"multitask_transformer\",\n",
240 |         "   \t\t\t \"class_name\": \"multitask_transformer\",\n",
241 |         "   \t\t\t \"optimizer_parameters\": {\n",
242 |         "   \t\t\t\t \"lr\": 2e-5\n",
243 |         "   \t\t\t },\n",
244 |         "   \t\t\t \"gradient_accumulation_steps\": \"{GRADIENT_ACC_STEPS}\",\n",
245 |         "   \t\t\t \"learning_rate_drop_patience\": 2,\n",
246 |         "   \t\t\t \"learning_rate_drop_div\": 2.0,\n",
247 |         "   \t\t\t \"return_probas\": true,\n",
248 |         "   \t\t\t \"backbone_model\": \"{BACKBONE}\",\n",
249 |         "   \t\t\t \"save_path\": \"{MODEL_PATH}\",\n",
250 |         "   \t\t\t \"load_path\": \"{MODEL_PATH}\",\n",
251 |         "   \t\t\t \"tasks\": {\n",
252 |         "   \t\t\t\t \"cola\": {\n",
253 |         "   \t\t\t\t\t \"type\": \"classification\",\n",
254 |         "   \t\t\t\t\t \"options\": 2\n",
255 |         "   \t\t\t\t },\n",
256 |         "   \t\t\t\t \"rte\": {\n",
257 |         "   \t\t\t\t\t \"type\": \"classification\",\n",
258 |         "   \t\t\t\t\t \"options\": 2\n",
259 |         "   \t\t\t\t },\n",
260 |         "   \t\t\t\t \"stsb\": {\n",
261 |         "   \t\t\t\t\t \"type\": \"regression\",\n",
262 |         "   \t\t\t\t\t \"options\": 1\n",
263 |         "   \t\t\t\t },\n",
264 |         "   \t\t\t\t \"copa\": {\n",
265 |         "   \t\t\t\t\t \"type\": \"multiple_choice\",\n",
266 |         "   \t\t\t\t\t \"options\": 2\n",
267 |         "   \t\t\t\t },\n",
268 |         "   \t\t\t\t \"conll\": {\n",
269 |         "   \t\t\t\t\t \"type\": \"sequence_labeling\",\n",
270 |         "   \t\t\t\t\t \"options\": \"#vocab_conll.len\"\n",
271 |         "   \t\t\t\t },\n",
272 |         "   \t\t\t\t \"squad\":{\"type\":\"question_answering\",\n",
273 |         "   \t\t\t\t \"options\":2}\n",
274 |         "   \t\t\t },\n",
275 |         "   \t\t\t \"in\": [\n",
276 |         "   \t\t\t\t \"bert_features_cola\",\n",
277 |         "   \t\t\t\t \"bert_features_rte\",\n",
278 |         "   \t\t\t\t \"bert_features_stsb\",\n",
279 |         "   \t\t\t\t \"bert_features_copa\",\n",
280 |         "   \t\t\t\t \"bert_features_conll\",\n",
281 |         "   \t\t\t\t \"bert_features_squad\"\n",
282 |         "   \t\t\t ],\n",
283 |         "   \t\t\t \"in_y\": [\"y_cola\", \"y_rte\", \"y_stsb\", \"y_copa\", \"y_ids_conll\", \"ans_squad\"],\n",
284 |         "   \t\t\t \"out\": [\n",
285 |         "   \t\t\t\t \"y_cola_pred_probas\",\n",
286 |         "   \t\t\t\t \"y_rte_pred_probas\",\n",
287 |         "   \t\t\t\t \"y_stsb_pred\",\n",
288 |         "   \t\t\t\t \"y_copa_pred_probas\",\n",
289 |         "   \t\t\t\t \"y_conll_pred_ids\",\n",
290 |         "   \t\t\t\t \"results_squad\"\n",
291 |         "   \t\t\t ]\n",
292 |         "   \t\t },\n",
293 |         "```\n",
294 |         "## Multitask metrics\n",
295 |         "After the multitask_transformer, almost all other components are the same as the single-task setting or as mentioned before…\n",
296 |         "\n",
297 |         "```\n",
298 |         "   \t \t{\n",
299 |         "   \t\t\t \"class_name\": \"multitask_input_splitter\",\n",
300 |         "   \t\t\t \"in\": [\"results_squad\"],\n",
301 |         "                            \t\"keys_to_extract\": [0,1,2,3,4],\n",
302 |         "   \t\t\t \"out\": [\"ans_start_predicted_squad\",\n",
303 |         "   \t\t\t\t \"ans_end_predicted_squad\",\n",
304 |         "   \t\t\t\t \"logits_squad\",\n",
305 |         "   \t\t\t\t \"scores_squad\",\n",
306 |         "   \t\t\t\t \"inds_squad\"\n",
307 |         "   \t\t\t ]\n",
308 |         "   \t\t },\n",
309 |         "   \t\t {\n",
310 |         "   \t\t\t \"class_name\": \"squad_bert_ans_postprocessor\",\n",
311 |         "   \t\t\t \"in\": [\n",
312 |         "   \t\t\t\t \"ans_start_predicted_squad\",\n",
313 |         "   \t\t\t\t \"ans_end_predicted_squad\",\n",
314 |         "   \t\t\t\t \"split_context_squad\",\n",
315 |         "   \t\t\t\t \"subtok2chars_squad\",\n",
316 |         "   \t\t\t\t \"subtokens_squad\",\n",
317 |         "   \t\t\t\t \"inds_squad\"\n",
318 |         "   \t\t\t ],\n",
319 |         "   \t\t\t \"out\": [\n",
320 |         "   \t\t\t\t \"ans_predicted_squad\",\n",
321 |         "   \t\t\t\t \"ans_start_predicted_squad\",\n",
322 |         "   \t\t\t\t \"ans_end_predicted_squad\"\n",
323 |         "   \t\t\t ]\n",
324 |         "   \t\t },\n",
325 |         "   \t\t {\n",
326 |         "   \t\t\t \"in\": [\"y_cola_pred_probas\"],\n",
327 |         "   \t\t\t \"out\": [\"y_cola_pred_ids\"],\n",
328 |         "   \t\t\t \"class_name\": \"proba2labels\",\n",
329 |         "   \t\t\t \"max_proba\": true\n",
330 |         "   \t\t },\n",
331 |         "   \t\t {\n",
332 |         "   \t\t\t \"in\": [\"y_rte_pred_probas\"],\n",
333 |         "   \t\t\t \"out\": [\"y_rte_pred_ids\"],\n",
334 |         "   \t\t\t \"class_name\": \"proba2labels\",\n",
335 |         "   \t\t\t \"max_proba\": true\n",
336 |         "   \t\t },\n",
337 |         "   \t\t {\n",
338 |         "   \t\t\t \"in\": [\"y_copa_pred_probas\"],\n",
339 |         "   \t\t\t \"out\": [\"y_copa_pred_ids\"],\n",
340 |         "   \t\t\t \"class_name\": \"proba2labels\",\n",
341 |         "   \t\t\t \"max_proba\": true\n",
342 |         "   \t\t },\n",
343 |         "   \t\t {\n",
344 |         "   \t\t\t \"in\": [\"y_conll_pred_ids\"],\n",
345 |         "   \t\t\t \"out\": [\"y_conll_pred_labels\"],\n",
346 |         "   \t\t\t \"ref\": \"vocab_conll\"\n",
347 |         "   \t\t }\n",
348 |         "   \t ],\n",
349 |         "   \t \"out\": [\"y_cola_pred_ids\", \"y_rte_pred_ids\", \"y_stsb_pred\", \"y_copa_pred_ids\", \"y_conll_pred_labels\"]\n",
350 |         "    },\n",
351 |         "    \"train\": {\n",
352 |         "   \t \"epochs\": \"{NUM_TRAIN_EPOCHS}\",\n",
353 |         "   \t \"batch_size\": 32,\n",
354 |         "\n",
355 |         "```\n",
356 |         "\n",
357 |         "\n",
358 |         "…apart from the metrics multitask_accuracy, multitask_f1_macro and multitask_f1_weighted, that calculate the corresponding metrics(accuracy, f1-macro and f1-weighted) for any task and then average them. As in any DeepPavlov config, the early stopping is performed for the first metric in the metric list.\n",
359 |         "```\n",
360 |         "    \t\"metrics\": [{\n",
361 |         "   \t\t\t \"name\": \"multitask_accuracy\",\n",
362 |         "   \t\t\t \"inputs\": [\"y_rte\", \"y_cola\", \"y_copa\", \"y_rte_pred_ids\", \"y_cola_pred_ids\", \"y_copa_pred_ids\"]\n",
363 |         "   \t\t },\n",
364 |         "```\n",
365 |         "However, one can also calculate the single-task metrics.\n",
366 |         "```\n",
367 |         "   \t \t{\n",
368 |         "   \t\t\t \"name\": \"ner_f1\",\n",
369 |         "   \t\t\t \"inputs\": [\"y_conll\", \"y_conll_pred_labels\"]\n",
370 |         "   \t\t },\n",
371 |         "   \t\t {\n",
372 |         "   \t\t\t \"name\": \"ner_token_f1\",\n",
373 |         "   \t\t\t \"inputs\": [\"y_conll\", \"y_conll_pred_labels\"]\n",
374 |         "   \t\t },\n",
375 |         "   \t\t {\n",
376 |         "   \t\t\t \"name\": \"accuracy\",\n",
377 |         "   \t\t\t \"alias\": \"accuracy_cola\",\n",
378 |         "   \t\t\t \"inputs\": [\"y_cola\", \"y_cola_pred_ids\"]\n",
379 |         "   \t\t },\n",
380 |         "   \t\t {\n",
381 |         "   \t\t\t \"name\": \"accuracy\",\n",
382 |         "   \t\t\t \"alias\": \"accuracy_rte\",\n",
383 |         "   \t\t\t \"inputs\": [\"y_rte\", \"y_rte_pred_ids\"]\n",
384 |         "   \t\t },\n",
385 |         "   \t\t {\n",
386 |         "   \t\t\t \"name\": \"accuracy\",\n",
387 |         "   \t\t\t \"alias\": \"accuracy_copa\",\n",
388 |         "   \t\t\t \"inputs\": [\"y_copa\", \"y_copa_pred_ids\"]\n",
389 |         "   \t\t },\n",
390 |         "   \t\t {\n",
391 |         "   \t\t\t \"name\": \"pearson_correlation\",\n",
392 |         "   \t\t\t \"alias\": \"pearson_stsb\",\n",
393 |         "   \t\t\t \"inputs\": [\"y_stsb\", \"y_stsb_pred\"]\n",
394 |         "   \t\t },\n",
395 |         "   \t\t {\n",
396 |         "   \t\t\t \"name\": \"spearman_correlation\",\n",
397 |         "   \t\t\t \"alias\": \"spearman_stsb\",\n",
398 |         "   \t\t\t \"inputs\": [\"y_stsb\", \"y_stsb_pred\"]\n",
399 |         "   \t\t },\n",
400 |         "   \t\t {\n",
401 |         "   \t\t\t \"name\": \"squad_v1_f1\",\n",
402 |         "   \t\t\t \"inputs\": [\n",
403 |         "   \t\t\t\t \"ans_squad\",\n",
404 |         "   \t\t\t\t \"ans_predicted_squad\"\n",
405 |         "   \t\t\t ]\n",
406 |         "   \t\t },\n",
407 |         "   \t\t {\n",
408 |         "   \t\t\t \"name\": \"squad_v1_em\",\n",
409 |         "   \t\t\t \"inputs\": [\n",
410 |         "   \t\t\t\t \"ans_squad\",\n",
411 |         "   \t\t\t\t \"ans_predicted_squad\"\n",
412 |         "   \t\t\t ]\n",
413 |         "   \t\t }\n",
414 |         "   \t ],\n",
415 |         "   \t \"validation_patience\": 3,\n",
416 |         "   \t \"val_every_n_epochs\": 1,\n",
417 |         "   \t \"log_every_n_epochs\": 1,\n",
418 |         "   \t \"show_examples\": false,\n",
419 |         "   \t \"evaluation_targets\": [\"valid\"],\n",
420 |         "   \t \"class_name\": \"torch_trainer\"\n",
421 |         "    },\n",
422 |         "    \"metadata\": {\n",
423 |         "   \t \"variables\": {\n",
424 |         "   \t\t \"ROOT_PATH\": \"~/.deeppavlov\",\n",
425 |         "   \t\t \"MODELS_PATH\": \"{ROOT_PATH}/models/multitask_example\",\n",
426 |         "   \t\t \"DOWNLOADS_PATH\": \"{ROOT_PATH}/downloads\",\n",
427 |         "   \t\t \"BACKBONE\": \"distilbert-base-uncased\",\n",
428 |         "   \t\t \"MODEL_PATH\": \"{MODELS_PATH}/{BACKBONE}\",\n",
429 |         "   \t\t \"NUM_TRAIN_EPOCHS\": 5,\n",
430 |         "   \t\t \"GRADIENT_ACC_STEPS\": 1\n",
431 |         "   \t },\n",
432 |         "   \t \"download\": [{\n",
433 |         "   \t\t \"url\": \"http://files.deeppavlov.ai/deeppavlov_data/multitask/multitask_example_v2.tar.gz\",\n",
434 |         "   \t\t \"subdir\": \"{MODELS_PATH}\"\n",
435 |         "   \t }]\n",
436 |         "    }\n",
437 |         "}\n",
438 |         "```"
439 |       ]
440 |     },
441 |     {
442 |       "cell_type": "markdown",
443 |       "source": [
444 |         "## MTL config inference"
445 |       ],
446 |       "metadata": {
447 |         "id": "GIMNHHb-nTd5"
448 |       }
449 |     },
450 |     {
451 |       "cell_type": "markdown",
452 |       "metadata": {
453 |         "id": "3LOrgLOF5LtI"
454 |       },
455 |       "source": [
456 |         "For inferring the multitask config in DeepPavlov, one firstly needs to build the model.\n",
457 |         "If you want to infer our pretrained config, you need to build the model"
458 |       ]
459 |     },
460 |     {
461 |       "cell_type": "code",
462 |       "execution_count": null,
463 |       "metadata": {
464 |         "id": "mCCLyYtb5LtL"
465 |       },
466 |       "outputs": [],
467 |       "source": [
468 |         "from deeppavlov import build_model, configs\n",
469 |         "model = build_model('multitask_example', download=True, install=True)\n",
470 |         "\n",
471 |         "\n",
472 |         "# If you use your config from scratch, it should look like\n",
473 |         "# model = build_model('path/to/your/config.json')"
474 |       ]
475 |     },
476 |     {
477 |       "cell_type": "markdown",
478 |       "metadata": {
479 |         "id": "hFEjdg3-5LtM"
480 |       },
481 |       "source": [
482 |         "Then, for inferring the config for N tasks, one needs to define the list of N lists of variables,\n",
483 |         "where every list is the list of examples to the certain task.\n",
484 |         "\n",
485 |         "Mind that the order of lists must be exactly the same as the order of tasks in config.\n",
486 |         "\n",
487 |         "If the same phrase needs to be classified for many tasks, it is cached.\n",
488 |         "That speeds the computation up compared to using different phrases.\n",
489 |         "If one does not hand over arguments for any task, one can just pass an empty list.\n",
490 |         "\n",
491 |         "\n",
492 |         "Here is how one can make the list of x."
493 |       ]
494 |     },
495 |     {
496 |       "cell_type": "code",
497 |       "execution_count": null,
498 |       "metadata": {
499 |         "id": "Szxs-Qu75LtO"
500 |       },
501 |       "outputs": [],
502 |       "source": [
503 |         "tasks =['cola','rte','stsb','copa','conll']\n",
504 |         "# the same order as config\n",
505 |         "x=dict()\n",
506 |         "for task in tasks:\n",
507 |         "    if task=='rte':  # Sentence pair classification/regression\n",
508 |         "       # Example can be a tuple\n",
509 |         "        x[task]=[('pair 1 phrase 1', 'pair 1 phrase 2'),\n",
510 |         "                 ('pair 2 phrase 1', 'pair 2 phrase 2')]\n",
511 |         "    elif task=='cola': # Single sentence classification/regression\n",
512 |         "       # Example can be a string\n",
513 |         "        x[task]=['phrase1']\n",
514 |         "    elif task=='conll': # NER\n",
515 |         "       # For NER, examples are strings\n",
516 |         "        x[task]=['first second'] # NER\n",
517 |         "    elif task=='stsb': # Single sentence regression.\n",
518 |         "       #Examples for any task can be empty, like in that case\n",
519 |         "        x[task]=[]\n",
520 |         "    elif task=='copa':\n",
521 |         "        x[task]=[('context in pair 1', ['choice 1 in pair 1', 'choice 2 in pair 1']),\n",
522 |         "                          ('context in pair 2', ['choice 1 in pair 2', 'choice 2 in pair 2'])]\n",
523 |         "       # Illustrating multiple choice task\n",
524 |         "\n",
525 |         "    else:\n",
526 |         "        x[task]=['test phrase']\n",
527 |         "list_of_x = [x[task] for task in tasks]"
528 |       ]
529 |     },
530 |     {
531 |       "cell_type": "markdown",
532 |       "source": [
533 |         "To infer the model, one need to pass the concatenation of list of x and list of y.\n",
534 |         "\n",
535 |         "List of y has the same structure as the list of x, but any list for y can be empty."
536 |       ],
537 |       "metadata": {
538 |         "id": "xXuvzUsR5ZgI"
539 |       }
540 |     },
541 |     {
542 |       "cell_type": "code",
543 |       "execution_count": null,
544 |       "metadata": {
545 |         "id": "ZLlGgXQq5LtP"
546 |       },
547 |       "outputs": [],
548 |       "source": [
549 |         "list_of_y = [[] for _ in tasks]\n",
550 |         "args = list_of_x + list_of_y"
551 |       ]
552 |     },
553 |     {
554 |       "cell_type": "markdown",
555 |       "metadata": {
556 |         "id": "avPFMvwK5LtQ"
557 |       },
558 |       "source": [
559 |         "Then we perform inference as for usual DeepPavlov models"
560 |       ]
561 |     },
562 |     {
563 |       "cell_type": "code",
564 |       "execution_count": null,
565 |       "metadata": {
566 |         "id": "wUA-uS2W5LtQ"
567 |       },
568 |       "outputs": [],
569 |       "source": [
570 |         "outputs = model(*args)\n",
571 |         "print(outputs)"
572 |       ]
573 |     }
574 |   ],
575 |   "metadata": {
576 |     "kernelspec": {
577 |       "display_name": "Python 3",
578 |       "language": "python",
579 |       "name": "python3"
580 |     },
581 |     "language_info": {
582 |       "codemirror_mode": {
583 |         "name": "ipython",
584 |         "version": 3
585 |       },
586 |       "file_extension": ".py",
587 |       "mimetype": "text/x-python",
588 |       "name": "python",
589 |       "nbconvert_exporter": "python",
590 |       "pygments_lexer": "ipython3",
591 |       "version": "3.6.9"
592 |     },
593 |     "colab": {
594 |       "provenance": []
595 |     }
596 |   },
597 |   "nbformat": 4,
598 |   "nbformat_minor": 0
599 | }


--------------------------------------------------------------------------------