├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature-request.md
├── .gitignore
├── .readthedocs.yml
├── CNAME
├── Jenkinsfile
├── LICENSE
├── MANIFEST.in
├── NLP_OSS_DeepPavlov_ACL_Demo_final.pdf
├── README.md
├── _config.yml
├── _layouts
    └── default.html
├── deeppavlov
    ├── __init__.py
    ├── __main__.py
    ├── _meta.py
    ├── configs
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── boolqa_rubert.json
    │   │   ├── few_shot_roberta.json
    │   │   ├── glue
    │   │   │   ├── glue_cola_roberta.json
    │   │   │   ├── glue_mnli_cased_bert_torch.json
    │   │   │   ├── glue_mnli_mm_cased_bert_torch.json
    │   │   │   ├── glue_mnli_roberta.json
    │   │   │   ├── glue_mrpc_roberta.json
    │   │   │   ├── glue_qnli_roberta.json
    │   │   │   ├── glue_qqp_roberta.json
    │   │   │   ├── glue_rte_cased_bert_torch.json
    │   │   │   ├── glue_rte_roberta_mnli.json
    │   │   │   ├── glue_sst2_roberta.json
    │   │   │   ├── glue_stsb_roberta.json
    │   │   │   └── glue_wnli_roberta.json
    │   │   ├── insults_kaggle_bert.json
    │   │   ├── paraphraser_convers_distilrubert_2L.json
    │   │   ├── paraphraser_convers_distilrubert_6L.json
    │   │   ├── paraphraser_rubert.json
    │   │   ├── query_pr.json
    │   │   ├── rusentiment_bert.json
    │   │   ├── rusentiment_convers_bert.json
    │   │   ├── rusentiment_convers_distilrubert_2L.json
    │   │   ├── rusentiment_convers_distilrubert_6L.json
    │   │   ├── sentiment_sst_conv_bert.json
    │   │   ├── sentiment_twitter.json
    │   │   ├── superglue
    │   │   │   ├── superglue_boolq_roberta_mnli.json
    │   │   │   ├── superglue_copa_roberta.json
    │   │   │   ├── superglue_record_roberta.json
    │   │   │   └── superglue_wic_bert.json
    │   │   └── topics_distilbert_base_uncased.json
    │   ├── doc_retrieval
    │   │   ├── en_ranker_pop_wiki.json
    │   │   ├── en_ranker_tfidf_wiki.json
    │   │   └── ru_ranker_tfidf_wiki.json
    │   ├── embedder
    │   │   ├── bert_embedder.json
    │   │   └── bert_sentence_embedder.json
    │   ├── entity_extraction
    │   │   ├── entity_detection_en.json
    │   │   ├── entity_detection_ru.json
    │   │   ├── entity_extraction_en.json
    │   │   ├── entity_extraction_ru.json
    │   │   ├── entity_linking_en.json
    │   │   └── entity_linking_ru.json
    │   ├── faq
    │   │   └── fasttext_logreg.json
    │   ├── kbqa
    │   │   ├── kbqa_cq_en.json
    │   │   ├── kbqa_cq_ru.json
    │   │   └── wiki_parser.json
    │   ├── morpho_syntax_parser
    │   │   ├── morpho_ru_syntagrus_bert.json
    │   │   ├── ru_syntagrus_joint_parsing.json
    │   │   └── syntax_ru_syntagrus_bert.json
    │   ├── multitask
    │   │   ├── mt_glue.json
    │   │   └── multitask_example.json
    │   ├── ner
    │   │   ├── ner_bert_base.json
    │   │   ├── ner_case_agnostic_mdistilbert.json
    │   │   ├── ner_collection3_bert.json
    │   │   ├── ner_conll2003_bert.json
    │   │   ├── ner_conll2003_deberta_crf.json
    │   │   ├── ner_ontonotes_bert.json
    │   │   ├── ner_ontonotes_bert_mult.json
    │   │   ├── ner_ontonotes_deberta_crf.json
    │   │   ├── ner_rus_bert.json
    │   │   ├── ner_rus_bert_probas.json
    │   │   ├── ner_rus_convers_distilrubert_2L.json
    │   │   └── ner_rus_convers_distilrubert_6L.json
    │   ├── odqa
    │   │   ├── en_odqa_infer_wiki.json
    │   │   ├── en_odqa_pop_infer_wiki.json
    │   │   └── ru_odqa_infer_wiki.json
    │   ├── ranking
    │   │   ├── path_ranking_nll_roberta_en.json
    │   │   ├── ranking_ubuntu_v2_torch_bert_uncased.json
    │   │   ├── rel_ranking_nll_bert_ru.json
    │   │   └── rel_ranking_roberta_en.json
    │   ├── regressors
    │   │   └── translation_ranker.json
    │   ├── relation_extraction
    │   │   ├── re_docred.json
    │   │   └── re_rured.json
    │   ├── russian_super_glue
    │   │   ├── russian_superglue_danetqa_rubert.json
    │   │   ├── russian_superglue_lidirus_rubert.json
    │   │   ├── russian_superglue_muserc_rubert.json
    │   │   ├── russian_superglue_parus_rubert.json
    │   │   ├── russian_superglue_rcb_rubert.json
    │   │   ├── russian_superglue_rucos_rubert.json
    │   │   ├── russian_superglue_russe_rubert.json
    │   │   ├── russian_superglue_rwsd_rubert.json
    │   │   └── russian_superglue_terra_rubert.json
    │   ├── sentence_segmentation
    │   │   └── sentseg_dailydialog_bert.json
    │   ├── spelling_correction
    │   │   ├── brillmoore_wikitypos_en.json
    │   │   └── levenshtein_corrector_ru.json
    │   └── squad
    │   │   ├── qa_multisberquad_bert.json
    │   │   ├── qa_nq_psgcls_bert.json
    │   │   ├── qa_squad2_bert.json
    │   │   ├── squad_bert.json
    │   │   ├── squad_ru_bert.json
    │   │   ├── squad_ru_convers_distilrubert_2L.json
    │   │   └── squad_ru_convers_distilrubert_6L.json
    ├── core
    │   ├── __init__.py
    │   ├── commands
    │   │   ├── __init__.py
    │   │   ├── infer.py
    │   │   ├── train.py
    │   │   └── utils.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── aliases.py
    │   │   ├── base.py
    │   │   ├── chainer.py
    │   │   ├── cross_validation.py
    │   │   ├── errors.py
    │   │   ├── file.py
    │   │   ├── log.py
    │   │   ├── log_events.py
    │   │   ├── metrics_registry.json
    │   │   ├── metrics_registry.py
    │   │   ├── params.py
    │   │   ├── params_search.py
    │   │   ├── paths.py
    │   │   ├── prints.py
    │   │   ├── registry.json
    │   │   ├── registry.py
    │   │   └── requirements_registry.json
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── data_fitting_iterator.py
    │   │   ├── data_learning_iterator.py
    │   │   ├── dataset_reader.py
    │   │   ├── simple_vocab.py
    │   │   └── utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── component.py
    │   │   ├── estimator.py
    │   │   ├── nn_model.py
    │   │   ├── serializable.py
    │   │   └── torch_model.py
    │   └── trainers
    │   │   ├── __init__.py
    │   │   ├── fit_trainer.py
    │   │   ├── nn_trainer.py
    │   │   ├── torch_trainer.py
    │   │   └── utils.py
    ├── dataset_iterators
    │   ├── __init__.py
    │   ├── basic_classification_iterator.py
    │   ├── huggingface_dataset_iterator.py
    │   ├── morphotagger_iterator.py
    │   ├── multitask_iterator.py
    │   ├── siamese_iterator.py
    │   ├── sqlite_iterator.py
    │   ├── squad_iterator.py
    │   └── typos_iterator.py
    ├── dataset_readers
    │   ├── __init__.py
    │   ├── basic_classification_reader.py
    │   ├── boolqa_reader.py
    │   ├── conll2003_reader.py
    │   ├── docred_reader.py
    │   ├── faq_reader.py
    │   ├── huggingface_dataset_reader.py
    │   ├── imdb_reader.py
    │   ├── line_reader.py
    │   ├── morphotagging_dataset_reader.py
    │   ├── multitask_reader.py
    │   ├── odqa_reader.py
    │   ├── paraphraser_reader.py
    │   ├── rel_ranking_reader.py
    │   ├── rured_reader.py
    │   ├── sq_reader.py
    │   ├── squad_dataset_reader.py
    │   ├── typos_reader.py
    │   └── ubuntu_v2_reader.py
    ├── deep.py
    ├── download.py
    ├── metrics
    │   ├── __init__.py
    │   ├── accuracy.py
    │   ├── bleu.py
    │   ├── correlation.py
    │   ├── elmo_metrics.py
    │   ├── fmeasure.py
    │   ├── google_bleu.py
    │   ├── log_loss.py
    │   ├── mse.py
    │   ├── recall_at_k.py
    │   ├── record_metrics.py
    │   ├── roc_auc_score.py
    │   └── squad_metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── api_requester
    │   │   ├── __init__.py
    │   │   ├── api_requester.py
    │   │   └── api_router.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── cos_sim_classifier.py
    │   │   ├── dnnc_proba2labels.py
    │   │   ├── proba2labels.py
    │   │   ├── re_bert.py
    │   │   ├── torch_classification_model.py
    │   │   ├── torch_nets.py
    │   │   └── utils.py
    │   ├── doc_retrieval
    │   │   ├── __init__.py
    │   │   ├── bpr.py
    │   │   ├── logit_ranker.py
    │   │   ├── pop_ranker.py
    │   │   ├── tfidf_ranker.py
    │   │   └── utils.py
    │   ├── embedders
    │   │   ├── __init__.py
    │   │   ├── abstract_embedder.py
    │   │   ├── fasttext_embedder.py
    │   │   ├── tfidf_weighted_embedder.py
    │   │   └── transformers_embedder.py
    │   ├── entity_extraction
    │   │   ├── __init__.py
    │   │   ├── entity_detection_parser.py
    │   │   ├── entity_linking.py
    │   │   ├── find_word.py
    │   │   └── ner_chunker.py
    │   ├── kbqa
    │   │   ├── __init__.py
    │   │   ├── query_generator.py
    │   │   ├── query_generator_base.py
    │   │   ├── rel_ranking_infer.py
    │   │   ├── ru_adj_to_noun.py
    │   │   ├── sentence_answer.py
    │   │   ├── template_matcher.py
    │   │   ├── tree_to_sparql.py
    │   │   ├── type_define.py
    │   │   ├── utils.py
    │   │   └── wiki_parser.py
    │   ├── morpho_syntax_parser
    │   │   ├── __init__.py
    │   │   ├── dependency_decoding.py
    │   │   ├── joint.py
    │   │   ├── spacy_lemmatizer.py
    │   │   └── syntax_parsing.py
    │   ├── preprocessors
    │   │   ├── __init__.py
    │   │   ├── dirty_comments_preprocessor.py
    │   │   ├── dnnc_preprocessor.py
    │   │   ├── mask.py
    │   │   ├── multitask_preprocessor.py
    │   │   ├── ner_preprocessor.py
    │   │   ├── odqa_preprocessors.py
    │   │   ├── one_hotter.py
    │   │   ├── re_preprocessor.py
    │   │   ├── response_base_loader.py
    │   │   ├── sanitizer.py
    │   │   ├── sentseg_preprocessor.py
    │   │   ├── squad_preprocessor.py
    │   │   ├── str_lower.py
    │   │   ├── str_token_reverser.py
    │   │   ├── str_utf8_encoder.py
    │   │   ├── torch_transformers_preprocessor.py
    │   │   └── transformers_preprocessor.py
    │   ├── ranking
    │   │   ├── __init__.py
    │   │   └── metrics.py
    │   ├── relation_extraction
    │   │   ├── __init__.py
    │   │   ├── losses.py
    │   │   └── relation_extraction_bert.py
    │   ├── sklearn
    │   │   ├── __init__.py
    │   │   └── sklearn_component.py
    │   ├── spelling_correction
    │   │   ├── __init__.py
    │   │   ├── brillmoore
    │   │   │   ├── __init__.py
    │   │   │   └── error_model.py
    │   │   ├── electors
    │   │   │   ├── __init__.py
    │   │   │   ├── kenlm_elector.py
    │   │   │   └── top1_elector.py
    │   │   └── levenshtein
    │   │   │   ├── __init__.py
    │   │   │   ├── levenshtein_searcher.py
    │   │   │   ├── searcher_component.py
    │   │   │   └── tabled_trie.py
    │   ├── tokenizers
    │   │   ├── __init__.py
    │   │   ├── lazy_tokenizer.py
    │   │   ├── nltk_moses_tokenizer.py
    │   │   ├── nltk_tokenizer.py
    │   │   ├── spacy_tokenizer.py
    │   │   ├── split_tokenizer.py
    │   │   └── utils.py
    │   ├── torch_bert
    │   │   ├── __init__.py
    │   │   ├── crf.py
    │   │   ├── multitask_transformer.py
    │   │   ├── torch_bert_ranker.py
    │   │   ├── torch_transformers_classifier.py
    │   │   ├── torch_transformers_el_ranker.py
    │   │   ├── torch_transformers_multiplechoice.py
    │   │   ├── torch_transformers_nll_ranking.py
    │   │   ├── torch_transformers_sequence_tagger.py
    │   │   ├── torch_transformers_squad.py
    │   │   └── torch_transformers_syntax_parser.py
    │   └── vectorizers
    │   │   ├── __init__.py
    │   │   └── hashing_tfidf_vectorizer.py
    ├── paramsearch.py
    ├── requirements
    │   ├── datasets.txt
    │   ├── dependency_decoding.txt
    │   ├── en_core_web_sm.txt
    │   ├── faiss.txt
    │   ├── fasttext.txt
    │   ├── hdt.txt
    │   ├── kenlm.txt
    │   ├── lxml.txt
    │   ├── opt_einsum.txt
    │   ├── protobuf.txt
    │   ├── pytorch.txt
    │   ├── rapidfuzz.txt
    │   ├── razdel.txt
    │   ├── ru_core_news_sm.txt
    │   ├── sacremoses.txt
    │   ├── sentencepiece.txt
    │   ├── slovnet.txt
    │   ├── sortedcontainers.txt
    │   ├── torchcrf.txt
    │   ├── transformers.txt
    │   ├── udapi.txt
    │   └── whapi.txt
    ├── settings.py
    ├── utils
    │   ├── __init__.py
    │   ├── benchmarks
    │   │   ├── __init__.py
    │   │   └── benchmarks.py
    │   ├── connector
    │   │   ├── __init__.py
    │   │   └── dialog_logger.py
    │   ├── pip_wrapper
    │   │   ├── __init__.py
    │   │   └── pip_wrapper.py
    │   ├── server
    │   │   ├── __init__.py
    │   │   ├── metrics.py
    │   │   └── server.py
    │   ├── settings
    │   │   ├── __init__.py
    │   │   ├── dialog_logger_config.json
    │   │   ├── log_config.json
    │   │   └── server_config.json
    │   └── socket
    │   │   ├── __init__.py
    │   │   └── socket.py
    └── vocabs
    │   ├── __init__.py
    │   ├── typos.py
    │   └── wiki_sqlite.py
├── docs
    ├── Makefile
    ├── _static
    │   ├── aws_ec2
    │   │   ├── 01_login_to_aws.png
    │   │   ├── 02_choose_ubuntu.png
    │   │   ├── 03_select_instance_type.png
    │   │   ├── 04_add_storage.png
    │   │   ├── 05_review_instance.png
    │   │   ├── 06_go_to_running_instances.png
    │   │   ├── 07_wait_init.png
    │   │   ├── 08_01_set_sec_group.png
    │   │   ├── 08_02_set_inbound.png
    │   │   ├── 09_01_select_connect.png
    │   │   └── 09_02_connection_info.png
    │   ├── deeppavlov.css
    │   ├── deeppavlov.png
    │   ├── deeppavlov_logo.png
    │   ├── dp_agnt_diag.png
    │   ├── gobot_diagram.png
    │   ├── ipavlov_footer.png
    │   ├── kvret_diagram.png
    │   ├── my_blocks.css
    │   ├── social
    │   │   ├── Medium_Monogram.svg
    │   │   ├── Twitter_Social_Icon_Circle_Color.svg
    │   │   ├── telegram.png
    │   │   └── youtube_social_circle_red.png
    │   └── tree.png
    ├── _templates
    │   └── footer.html
    ├── apiref
    │   ├── core.rst
    │   ├── core
    │   │   ├── commands.rst
    │   │   ├── common.rst
    │   │   ├── data.rst
    │   │   ├── models.rst
    │   │   └── trainers.rst
    │   ├── dataset_iterators.rst
    │   ├── dataset_readers.rst
    │   ├── metrics.rst
    │   ├── models.rst
    │   ├── models
    │   │   ├── api_requester.rst
    │   │   ├── classifiers.rst
    │   │   ├── doc_retrieval.rst
    │   │   ├── embedders.rst
    │   │   ├── entity_extraction.rst
    │   │   ├── kbqa.rst
    │   │   ├── preprocessors.rst
    │   │   ├── relation_extraction.rst
    │   │   ├── sklearn.rst
    │   │   ├── spelling_correction.rst
    │   │   ├── tokenizers.rst
    │   │   ├── torch_bert.rst
    │   │   └── vectorizers.rst
    │   └── vocabs.rst
    ├── conf.py
    ├── devguides
    │   ├── contribution_guide.rst
    │   └── registry.rst
    ├── features
    │   ├── hypersearch.rst
    │   ├── models
    │   │   ├── KBQA.ipynb
    │   │   ├── NER.ipynb
    │   │   ├── ODQA.ipynb
    │   │   ├── SQuAD.ipynb
    │   │   ├── bert.rst
    │   │   ├── classification.ipynb
    │   │   ├── entity_extraction.ipynb
    │   │   ├── few_shot_classification.ipynb
    │   │   ├── morpho_tagger.ipynb
    │   │   ├── multitask_bert.rst
    │   │   ├── neural_ranking.ipynb
    │   │   ├── popularity_ranking.rst
    │   │   ├── relation_extraction.ipynb
    │   │   ├── spelling_correction.ipynb
    │   │   ├── superglue.rst
    │   │   ├── syntax_parser.ipynb
    │   │   └── tfidf_ranking.ipynb
    │   ├── overview.rst
    │   └── pretrained_vectors.rst
    ├── index.rst
    ├── integrations
    │   ├── aws_ec2.rst
    │   ├── rest_api.rst
    │   ├── settings.rst
    │   └── socket_api.rst
    ├── internships
    │   └── internships.rst
    └── intro
    │   ├── configuration.rst
    │   ├── installation.rst
    │   ├── overview.rst
    │   ├── python.ipynb
    │   └── quick_start.rst
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── test_configs
    │   └── doc_retrieval
    │   │   ├── en_ranker_pop_wiki_test.json
    │   │   ├── en_ranker_tfidf_wiki_test.json
    │   │   └── ru_ranker_tfidf_wiki_test.json
    └── test_quick_start.py
└── utils
    ├── Docker
        ├── Dockerfile
        ├── README.md
        ├── cmd.sh
        └── docker-compose.yml
    ├── __init__.py
    └── prepare
        ├── __init__.py
        ├── hashes.py
        ├── optimize_ipynb.py
        ├── registry.py
        └── upload.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report on a bug you encountered
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Want to contribute to DeepPavlov? Please read the [contributing guideline](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html) first.
11 | 
12 | Please enter all the information below, otherwise your issue may be closed without a warning. 
13 | 
14 | 
15 | **DeepPavlov version** (you can look it up by running `pip show deeppavlov`):
16 | 
17 | **Python version**:
18 | 
19 | **Operating system** (ubuntu linux, windows, ...):
20 | 
21 | **Issue**:
22 | 
23 | 
24 | **Content or a name of a configuration file**:
25 | ```
26 | 
27 | ```
28 | 
29 | 
30 | **Command that led to error**:
31 | ```
32 | 
33 | ```
34 | 
35 | **Error (including full traceback)**:
36 | ```
37 | 
38 | ```
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Ask a question
4 |     url: https://forum.deeppavlov.ai/
5 |     about: If you have a different question, please ask it in the forum https://forum.deeppavlov.ai
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest a feature to improve the DeepPavlov library
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Want to contribute to DeepPavlov? Please read the [contributing guideline](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html) first.
11 | 
12 | 
13 | **What problem are we trying to solve?**:
14 | ```
15 | 
16 | ```
17 | 
18 | **How can we solve it?**:
19 | ```
20 | 
21 | ```
22 | 
23 | **Are there other issues that block this solution?**:
24 | ```
25 | 
26 | ```
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | #IDEA
104 | .idea/
105 | 
106 | #Atom IDE
107 | .ftpconfig
108 | 
109 | #vscode IDE
110 | .vscode
111 | 
112 | # Vim
113 | *.vim
114 | *.vimrc
115 | 
116 | #GIT
117 | .git/
118 | 
119 | #Default usr dir
120 | download/
121 | 
122 | #project test
123 | /test/
124 | .pytest_cache
125 | 
126 | # project data
127 | /data/
128 | 
129 | # local dockerfiles
130 | /Dockerfile
131 | /entrypoint.sh
132 | /.dockerignore
133 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | version: 2
 3 | 
 4 | build:
 5 |   os: "ubuntu-20.04"
 6 |   tools:
 7 |     python: "3.10"
 8 | formats: []
 9 | 
10 | python:
11 |   install:
12 |     - method: pip
13 |       path: .
14 |       extra_requirements:
15 |         - docs
16 | 


--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | deeppavlov.ai


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | node('cuda-module') {
 2 |     timestamps {
 3 |         try {
 4 |             stage('Clean') {
 5 |                 sh "rm -rf .[^.] .??* *"
 6 |             }
 7 |             stage('Checkout') {
 8 |                 checkout scm
 9 |             }
10 |             stage('Setup') {
11 |                 env.TFHUB_CACHE_DIR="tfhub_cache"
12 |                 sh """
13 |                     EPOCH=\$(date +%s) docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG build
14 |                 """
15 |             }
16 |             stage('Tests') {
17 |                 sh """
18 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py36 py37
19 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
20 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py38 py39
21 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
22 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 py311
23 |                     docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 || exit 0
24 |                 """
25 |                 currentBuild.result = 'SUCCESS'
26 |             }
27 |         }
28 |         catch(e) {
29 |             currentBuild.result = 'FAILURE'
30 |             throw e
31 |         }
32 |         finally {
33 |             sh """
34 |                 docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG rm -f
35 |                 docker network rm \$(echo $BUILD_TAG | awk '{print tolower(\$0)}')_default
36 |             """
37 |             emailext to: "\${DEFAULT_RECIPIENTS}",
38 |                 subject: "${env.JOB_NAME} - Build # ${currentBuild.number} - ${currentBuild.result}!",
39 |                 body: '${BRANCH_NAME} - ${BUILD_URL}',
40 |                 attachLog: true
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.MD
2 | include LICENSE
3 | include requirements.txt
4 | include deeppavlov/requirements/*.txt
5 | recursive-include deeppavlov *.json
6 | recursive-include deeppavlov *.md
7 | 


--------------------------------------------------------------------------------
/NLP_OSS_DeepPavlov_ACL_Demo_final.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/NLP_OSS_DeepPavlov_ACL_Demo_final.pdf


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-leap-day
2 | google_analytics: UA-139843736-5
3 | include:
4 |   - _static
5 | 


--------------------------------------------------------------------------------
/deeppavlov/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | from pathlib import Path
17 | 
18 | from ._meta import __author__, __description__, __email__, __keywords__, __license__, __version__
19 | from .configs import configs
20 | from .core.commands.infer import build_model
21 | from .core.commands.train import train_evaluate_model_from_config
22 | from .core.common.base import Element, Model
23 | from .core.common.chainer import Chainer
24 | from .core.common.log import init_logger
25 | from .download import deep_download
26 | 
27 | 
28 | # TODO: make better
29 | def train_model(config: [str, Path, dict], install: bool = False,
30 |                 download: bool = False, recursive: bool = False) -> Chainer:
31 |     train_evaluate_model_from_config(config, install=install, download=download, recursive=recursive)
32 |     return build_model(config, load_trained=True)
33 | 
34 | 
35 | def evaluate_model(config: [str, Path, dict], install: bool = False,
36 |                    download: bool = False, recursive: bool = False) -> dict:
37 |     return train_evaluate_model_from_config(config, to_train=False, install=install,
38 |                                             download=download, recursive=recursive)
39 | 
40 | 
41 | # check version
42 | assert sys.hexversion >= 0x3060000, 'Does not work in python3.5 or lower'
43 | 
44 | # resolve conflicts with previous DeepPavlov installations versioned up to 0.0.9
45 | dot_dp_path = Path('~/.deeppavlov').expanduser().resolve()
46 | if dot_dp_path.is_file():
47 |     dot_dp_path.unlink()
48 | 
49 | # initiate logging
50 | init_logger()
51 | 


--------------------------------------------------------------------------------
/deeppavlov/__main__.py:
--------------------------------------------------------------------------------
1 | if __name__ == '__main__':
2 |     from .deep import main
3 | 
4 |     main()
5 | 


--------------------------------------------------------------------------------
/deeppavlov/_meta.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.7.0'
2 | __author__ = 'Neural Networks and Deep Learning lab, MIPT'
3 | __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
4 | __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
5 | __license__ = 'Apache License, Version 2.0'
6 | __email__ = 'info@deeppavlov.ai'
7 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/classifiers/boolqa_rubert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "boolqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/boolqa_data",
 5 |     "language": "ru"
 6 |   },
 7 |   "dataset_iterator": {
 8 |     "class_name": "basic_classification_iterator",
 9 |     "seed": 243
10 |   },
11 |   "chainer": {
12 |     "in": ["text_a", "text_b"],
13 |     "in_y": ["y"],
14 |     "pipe": [
15 |       {
16 |         "class_name": "torch_transformers_preprocessor",
17 |         "vocab_file": "{TRANSFORMER}",
18 |         "do_lower_case": false,
19 |         "max_seq_length": 128,
20 |         "in": ["text_a", "text_b"],
21 |         "out": ["bert_features"]
22 |       },
23 |       {
24 |         "class_name": "torch_transformers_classifier",
25 |         "n_classes": 2,
26 |         "pretrained_bert": "{TRANSFORMER}",
27 |         "save_path": "{MODELS_PATH}/boolqa_rubert/model_rubert",
28 |         "load_path": "{MODELS_PATH}/boolqa_rubert/model_rubert",
29 |         "optimizer": "AdamW",
30 |         "optimizer_parameters": {"lr": 2e-05},
31 |         "learning_rate_drop_patience": 3,
32 |         "learning_rate_drop_div": 2.0,
33 |         "in": ["bert_features"],
34 |         "in_y": ["y"],
35 |         "out": ["predictions"]
36 |       }
37 |     ],
38 |     "out": ["predictions"]
39 |   },
40 |   "train": {
41 |     "epochs": 50,
42 |     "batch_size": 32,
43 |     "train_metrics": ["f1", "acc"],
44 |     "metrics": ["f1", "acc"],
45 |     "validation_patience": 5,
46 |     "val_every_n_epochs": 1,
47 |     "log_every_n_epochs": 1,
48 |     "evaluation_targets": ["valid", "train"],
49 |     "show_examples": false,
50 |     "class_name": "torch_trainer"
51 |   },
52 |   "metadata": {
53 |     "variables": {
54 |       "ROOT_PATH": "~/.deeppavlov",
55 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
56 |       "MODELS_PATH": "{ROOT_PATH}/models",
57 |       "TRANSFORMER": "DeepPavlov/rubert-base-cased"
58 |     }
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/classifiers/few_shot_roberta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["texts", "dataset"],
 4 |     "in_y": ["y_true"],
 5 |     "pipe": [
 6 |       {
 7 |         "class_name": "dnnc_pair_generator",
 8 |         "in": ["texts", "dataset"],
 9 |         "out": ["x", "x_support", "x_populated", "y_support"],
10 |         "bidirectional": true
11 |       },
12 |       {
13 |         "class_name": "torch_transformers_preprocessor",
14 |         "in": ["x_populated", "x_support"],
15 |         "out": ["bert_features"],
16 |         "vocab_file": "{BASE_MODEL}",
17 |         "do_lower_case": true,
18 |         "max_seq_length": 128
19 |       },
20 |       {
21 |         "class_name": "torch_transformers_classifier",
22 |         "main": true,
23 |         "in": ["bert_features"],
24 |         "out": ["simmilarity_scores"],
25 |         "n_classes": 2,
26 |         "return_probas": true,
27 |         "pretrained_bert": "{BASE_MODEL}",
28 |         "save_path": "{MODEL_PATH}/model",
29 |         "load_path": "{MODEL_PATH}/model",
30 |         "is_binary": "{BINARY_CLASSIFICATION}"
31 |       },
32 |       {
33 |         "class_name": "dnnc_proba2labels",
34 |         "is_binary": "{BINARY_CLASSIFICATION}",
35 |         "in": ["simmilarity_scores", "x", "x_populated", "x_support", "y_support"],
36 |         "out": ["y_pred"],
37 |         "confidence_threshold": 0.0
38 |       }
39 |     ],
40 |     "out": ["y_pred"]
41 |   },
42 |   "metadata": {
43 |     "variables": {
44 |       "ROOT_PATH": "~/.deeppavlov",
45 |       "MODEL_PATH": "{ROOT_PATH}/models/fewshot/roberta_nli_mrpc_1_10",
46 |       "BINARY_CLASSIFICATION": true,
47 |       "BASE_MODEL": "roberta-base"
48 |     },
49 |     "download": [
50 |       {
51 |         "url": "http://files.deeppavlov.ai/v1/classifiers/fewshot/roberta_nli_mrpc_1_10.tar.gz",
52 |         "subdir": "{MODEL_PATH}"
53 |       }
54 |     ]
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/classifiers/glue/glue_stsb_roberta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "huggingface_dataset_reader",
 4 |     "path": "{COMPETITION}",
 5 |     "name": "{TASK}",
 6 |     "train": "train",
 7 |     "valid": "validation",
 8 |     "test": "test"
 9 |   },
10 |   "dataset_iterator": {
11 |     "class_name": "huggingface_dataset_iterator",
12 |     "features": ["sentence1", "sentence2"],
13 |     "label": "label",
14 |     "use_label_name": false,
15 |     "seed": 42
16 |   },
17 |   "chainer": {
18 |     "in": ["sentence1", "sentence2"],
19 |     "in_y": ["y"],
20 |     "pipe": [
21 |       {
22 |         "class_name": "torch_transformers_preprocessor",
23 |         "vocab_file": "{BASE_MODEL}",
24 |         "do_lower_case": false,
25 |         "max_seq_length": 64,
26 |         "in": ["sentence1", "sentence2"],
27 |         "out": ["bert_features"]
28 |       },
29 |       {
30 |         "class_name": "torch_transformers_classifier",
31 |         "n_classes": 1,
32 |         "return_probas": false,
33 |         "pretrained_bert": "{BASE_MODEL}",
34 |         "save_path": "{MODEL_PATH}/model",
35 |         "load_path": "{MODEL_PATH}/model",
36 |         "optimizer": "AdamW",
37 |         "optimizer_parameters": {
38 |           "lr": 2e-05
39 |         },
40 |         "learning_rate_drop_patience": 3,
41 |         "learning_rate_drop_div": 2.0,
42 |         "in": ["bert_features"],
43 |         "in_y": ["y"],
44 |         "out": ["y_pred"]
45 |       }
46 |     ],
47 |     "out": ["y_pred"]
48 |   },
49 |   "train": {
50 |     "batch_size": 32,
51 |     "metrics": [
52 |       "pearson_correlation",
53 |       "spearman_correlation"
54 |     ],
55 |     "validation_patience": 10,
56 |     "val_every_n_epochs": 1,
57 |     "log_every_n_epochs": 1,
58 |     "show_examples": false,
59 |     "evaluation_targets": ["train", "valid"],
60 |     "class_name": "torch_trainer",
61 |     "tensorboard_log_dir": "{MODEL_PATH}/",
62 |     "pytest_max_batches": 2
63 |   },
64 |   "metadata": {
65 |     "variables": {
66 |       "BASE_MODEL": "roberta-large",
67 |       "ROOT_PATH": "~/.deeppavlov",
68 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
69 |       "MODELS_PATH": "{ROOT_PATH}/models",
70 |       "COMPETITION": "glue",
71 |       "TASK": "stsb",
72 |       "MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
73 |     },
74 |     "download": [
75 |       {
76 |         "url": "http://files.deeppavlov.ai/v1/glue/glue_stsb_roberta.tar.gz",
77 |         "subdir": "{MODEL_PATH}"
78 |       }
79 |     ]
80 |   }
81 | }
82 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/classifiers/paraphraser_rubert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "paraphraser_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/paraphraser_data",
 5 |     "do_lower_case": false
 6 |   },
 7 |   "dataset_iterator": {
 8 |     "class_name": "siamese_iterator",
 9 |     "seed": 243,
10 |     "len_valid": 500
11 |   },
12 |   "chainer": {
13 |     "in": ["text_a", "text_b"],
14 |     "in_y": ["y"],
15 |     "pipe": [
16 |       {
17 |         "class_name": "torch_transformers_preprocessor",
18 |         "vocab_file": "{TRANSFORMER}",
19 |         "do_lower_case": false,
20 |         "max_seq_length": 64,
21 |         "in": ["text_a", "text_b"],
22 |         "out": ["bert_features"]
23 |       },
24 |       {
25 |         "class_name": "torch_transformers_classifier",
26 |         "n_classes": 2,
27 |         "pretrained_bert": "{TRANSFORMER}",
28 |         "save_path": "{MODEL_PATH}/model",
29 |         "load_path": "{MODEL_PATH}/model",
30 |         "optimizer": "AdamW",
31 |         "optimizer_parameters": {"lr": 2e-05},
32 |         "learning_rate_drop_patience": 3,
33 |         "learning_rate_drop_div": 2.0,
34 |         "in": ["bert_features"],
35 |         "in_y": ["y"],
36 |         "out": ["predictions"]
37 |       }
38 |     ],
39 |     "out": ["predictions"]
40 |   },
41 |   "train": {
42 |     "batch_size": 64,
43 |     "pytest_max_batches": 2,
44 |     "train_metrics": ["f1", "acc"],
45 |     "metrics": ["f1", "acc"],
46 |     "validation_patience": 7,
47 |     "val_every_n_batches": 50,
48 |     "log_every_n_batches": 50,
49 |     "evaluation_targets": ["valid", "test"],
50 |     "class_name": "torch_trainer"
51 |   },
52 |   "metadata": {
53 |     "variables": {
54 |       "ROOT_PATH": "~/.deeppavlov",
55 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
56 |       "MODELS_PATH": "{ROOT_PATH}/models",
57 |       "MODEL_PATH": "{MODELS_PATH}/classifiers/paraphraser_rubert_torch",
58 |       "TRANSFORMER": "DeepPavlov/rubert-base-cased"
59 |     },
60 |     "download": [
61 |       {
62 |         "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip",
63 |         "subdir": "{DOWNLOADS_PATH}/paraphraser_data"
64 |       },
65 |       {
66 |         "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip",
67 |         "subdir": "{DOWNLOADS_PATH}/paraphraser_data"
68 |       },
69 |       {
70 |         "url": "http://files.deeppavlov.ai/v1/classifiers/paraphraser_rubert/paraphraser_rubert_v1.tar.gz",
71 |         "subdir": "{MODEL_PATH}"
72 |       }
73 |     ]
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/doc_retrieval/en_ranker_pop_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "odqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/odqa/enwiki",
 5 |     "save_path": "{DOWNLOADS_PATH}/odqa/enwiki.db",
 6 |     "dataset_format": "wiki"
 7 |   },
 8 |   "dataset_iterator": {
 9 |     "class_name": "sqlite_iterator",
10 |     "shuffle": false,
11 |     "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
12 |   },
13 |   "chainer": {
14 |     "in": ["docs"],
15 |     "in_y": ["doc_ids", "doc_nums"],
16 |     "out": ["pop_doc_ids"],
17 |     "pipe": [
18 |       {
19 |         "class_name": "hashing_tfidf_vectorizer",
20 |         "id": "vectorizer",
21 |         "fit_on": ["docs", "doc_ids", "doc_nums"],
22 |         "save_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
23 |         "load_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
24 |         "tokenizer": {
25 |           "class_name": "stream_spacy_tokenizer",
26 |           "lemmas": true,
27 |           "lowercase": true,
28 |           "filter_stopwords": true,
29 |           "ngram_range": [1, 3]
30 |         }
31 |       },
32 |       {
33 |         "class_name": "tfidf_ranker",
34 |         "top_n": 100,
35 |         "in": ["docs"],
36 |         "out": ["tfidf_doc_ids", "tfidf_doc_scores"],
37 |         "vectorizer": "#vectorizer"
38 |       },
39 |       {
40 |         "class_name": "pop_ranker",
41 |         "pop_dict_path": "{DOWNLOADS_PATH}/odqa/enwiki_popularities.json",
42 |         "load_path": "{MODELS_PATH}/odqa/logreg_3features_v2.joblib",
43 |         "top_n": 100,
44 |         "in": ["tfidf_doc_ids", "tfidf_doc_scores"],
45 |         "out": ["pop_doc_ids", "pop_doc_scores"]
46 |       }
47 |     ]
48 |   },
49 |   "train": {
50 |     "batch_size": 10000,
51 |     "evaluation_targets": [],
52 |     "class_name": "fit_trainer"
53 |   },
54 |   "metadata": {
55 |     "variables": {
56 |       "ROOT_PATH": "~/.deeppavlov",
57 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
58 |       "MODELS_PATH": "{ROOT_PATH}/models"
59 |     },
60 |     "download": [
61 |       {
62 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_l100.tar.gz",
63 |         "subdir": "{DOWNLOADS_PATH}/odqa"
64 |       },
65 |       {
66 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_tfidf_matrix_par_lite.tar.gz",
67 |         "subdir": "{MODELS_PATH}/odqa"
68 |       },
69 |       {
70 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_popularities.tar.gz",
71 |         "subdir": "{DOWNLOADS_PATH}/odqa"
72 |       },
73 |       {
74 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/ranking/logreg_3features_v2.joblib",
75 |         "subdir": "{MODELS_PATH}/odqa"
76 |       }
77 |     ]
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "odqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/odqa/enwiki",
 5 |     "save_path": "{DOWNLOADS_PATH}/odqa/enwiki.db",
 6 |     "dataset_format": "wiki"
 7 |   },
 8 |   "dataset_iterator": {
 9 |     "class_name": "sqlite_iterator",
10 |     "shuffle": false,
11 |     "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
12 |   },
13 |   "chainer": {
14 |     "in": ["docs"],
15 |     "in_y": ["doc_ids", "doc_nums"],
16 |     "out": ["tfidf_doc_ids"],
17 |     "pipe": [
18 |       {
19 |         "class_name": "hashing_tfidf_vectorizer",
20 |         "id": "vectorizer",
21 |         "fit_on": ["docs", "doc_ids", "doc_nums"],
22 |         "save_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
23 |         "load_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
24 |         "tokenizer": {
25 |           "class_name": "stream_spacy_tokenizer",
26 |           "lemmas": true,
27 |           "lowercase": true,
28 |           "filter_stopwords": true,
29 |           "ngram_range": [1, 3]
30 |         }
31 |       },
32 |       {
33 |         "class_name": "tfidf_ranker",
34 |         "top_n": 100,
35 |         "in": ["docs"],
36 |         "out": ["tfidf_doc_ids", "tfidf_doc_scores"],
37 |         "vectorizer": "#vectorizer"
38 |       }
39 |     ]
40 |   },
41 |   "train": {
42 |     "batch_size": 10000,
43 |     "evaluation_targets": [],
44 |     "class_name": "fit_trainer"
45 |   },
46 |   "metadata": {
47 |     "variables": {
48 |       "ROOT_PATH": "~/.deeppavlov",
49 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
50 |       "MODELS_PATH": "{ROOT_PATH}/models"
51 |     },
52 |     "download": [
53 |       {
54 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_l100.tar.gz",
55 |         "subdir": "{DOWNLOADS_PATH}/odqa"
56 |       },
57 |       {
58 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_tfidf_matrix_par_lite.tar.gz",
59 |         "subdir": "{MODELS_PATH}/odqa"
60 |       }
61 |     ]
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "odqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/odqa/ruwiki",
 5 |     "save_path": "{DOWNLOADS_PATH}/odqa/ruwiki_par_page_compr.db",
 6 |     "dataset_format": "wiki"
 7 |   },
 8 |   "dataset_iterator": {
 9 |     "class_name": "sqlite_iterator",
10 |     "shuffle": false,
11 |     "load_path": "{DOWNLOADS_PATH}/odqa/ruwiki_par_page_compr.db"
12 |   },
13 |   "chainer": {
14 |     "in": ["docs"],
15 |     "in_y": ["doc_ids", "doc_nums"],
16 |     "out": ["tfidf_doc_ids"],
17 |     "pipe": [
18 |       {
19 |         "class_name": "hashing_tfidf_vectorizer",
20 |         "id": "vectorizer",
21 |         "fit_on": ["docs", "doc_ids", "doc_nums"],
22 |         "save_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix_compr.npz",
23 |         "load_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix_compr.npz",
24 |         "tokenizer": {
25 |           "class_name": "stream_spacy_tokenizer",
26 |           "spacy_model": "ru_core_news_sm",
27 |           "lemmas": true,
28 |           "lowercase": true,
29 |           "filter_stopwords": true,
30 |           "ngram_range": [1, 3]
31 |         }
32 |       },
33 |       {
34 |         "class_name": "tfidf_ranker",
35 |         "top_n": 100,
36 |         "in": ["docs"],
37 |         "out": ["tfidf_doc_ids", "tfidf_doc_scores"],
38 |         "vectorizer": "#vectorizer"
39 |       }
40 |     ]
41 |   },
42 |   "train": {
43 |     "batch_size": 10000,
44 |     "evaluation_targets": [],
45 |     "class_name": "fit_trainer"
46 |   },
47 |   "metadata": {
48 |     "variables": {
49 |       "ROOT_PATH": "~/.deeppavlov",
50 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
51 |       "MODELS_PATH": "{ROOT_PATH}/models"
52 |     },
53 |     "download": [
54 |       {
55 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/ruwiki_par_page_compr.tar.gz",
56 |         "subdir": "{DOWNLOADS_PATH}/odqa"
57 |       },
58 |       {
59 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/ruwiki_tfidf_matrix_compr.tar.gz",
60 |         "subdir": "{MODELS_PATH}/odqa"
61 |       }
62 |     ]
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/embedder/bert_embedder.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["texts"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "transformers_bert_preprocessor",
 7 |         "vocab_file": "{BERT_PATH}/vocab.txt",
 8 |         "do_lower_case": false,
 9 |         "max_seq_length": 512,
10 |         "in": ["texts"],
11 |         "out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
12 |       },
13 |       {
14 |         "class_name": "transformers_bert_embedder",
15 |         "bert_config_path": "{BERT_PATH}/bert_config.json",
16 |         "load_path": "{BERT_PATH}",
17 |         "truncate": true,
18 |         "in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
19 |         "out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
20 |       }
21 |     ],
22 |     "out": ["tokens", "word_emb", "subword_tokens", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
23 |   },
24 |   "train": {},
25 |   "metadata": {
26 |     "variables": {
27 |       "ROOT_PATH": "~/.deeppavlov",
28 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
29 |       "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12_pt"
30 |     },
31 |     "labels": {},
32 |     "download": [
33 |       {
34 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12_pt.tar.gz",
35 |         "subdir": "{DOWNLOADS_PATH}/bert_models"
36 |       }
37 |     ]
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/embedder/bert_sentence_embedder.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["texts"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "transformers_bert_preprocessor",
 7 |         "vocab_file": "{BERT_PATH}/vocab.txt",
 8 |         "do_lower_case": false,
 9 |         "max_seq_length": 512,
10 |         "in": ["texts"],
11 |         "out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
12 |       },
13 |       {
14 |         "class_name": "transformers_bert_embedder",
15 |         "bert_config_path": "{BERT_PATH}/config.json",
16 |         "load_path": "{BERT_PATH}",
17 |         "truncate": false,
18 |         "in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
19 |         "out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
20 |       }
21 |     ],
22 |     "out": ["max_emb", "mean_emb", "pooler_output"]
23 |   },
24 |   "train": {},
25 |   "metadata": {
26 |     "variables": {
27 |       "ROOT_PATH": "~/.deeppavlov",
28 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
29 |       "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt_v1"
30 |     },
31 |     "labels": {},
32 |     "download": [
33 |       {
34 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/sentence_multi_cased_L-12_H-768_A-12_pt_v1.tar.gz",
35 |         "subdir": "{DOWNLOADS_PATH}/bert_models"
36 |       }
37 |     ]
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/entity_extraction/entity_detection_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "ner_chunker",
 7 |         "batch_size": 16,
 8 |         "max_seq_len" : 300,
 9 |         "vocab_file": "{TRANSFORMER}",
10 |         "in": ["x"],
11 |         "out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
12 |       },
13 |       {
14 |         "thres_proba": 0.6,
15 |         "o_tag": "O",
16 |         "tags_file": "{NER_PATH}/tag.dict",
17 |         "class_name": "entity_detection_parser",
18 |         "id": "edp"
19 |       },
20 |       {
21 |         "class_name": "ner_chunk_model",
22 |         "ner": {
23 |           "config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert.json",
24 |           "overwrite": {
25 |             "chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"]
26 |           }
27 |         },
28 |         "ner_parser": "#edp",
29 |         "in": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"],
30 |         "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
31 |       }
32 |     ],
33 |     "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
34 |   },
35 |   "metadata": {
36 |     "variables": {
37 |       "ROOT_PATH": "~/.deeppavlov",
38 |       "MODELS_PATH": "{ROOT_PATH}/models",
39 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
40 |       "TRANSFORMER": "bert-base-cased",
41 |       "NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert_torch_crf"
42 |     }
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/entity_extraction/entity_detection_ru.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "ner_chunker",
 7 |         "batch_size": 16,
 8 |         "max_seq_len" : 300,
 9 |         "vocab_file": "{TRANSFORMER}",
10 |         "in": ["x"],
11 |         "out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
12 |       },
13 |       {
14 |         "thres_proba": 0.05,
15 |         "o_tag": "O",
16 |         "tags_file": "{NER_PATH}/tag.dict",
17 |         "class_name": "entity_detection_parser",
18 |         "id": "edp"
19 |       },
20 |       {
21 |         "class_name": "ner_chunk_model",
22 |         "ner": {"config_path": "{CONFIGS_PATH}/ner/ner_rus_bert_probas.json"},
23 |         "ner_parser": "#edp",
24 |         "in": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"],
25 |         "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
26 |       }
27 |     ],
28 |     "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
29 |   },
30 |   "metadata": {
31 |     "variables": {
32 |       "ROOT_PATH": "~/.deeppavlov",
33 |       "MODELS_PATH": "{ROOT_PATH}/models",
34 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
35 |       "TRANSFORMER": "DeepPavlov/rubert-base-cased",
36 |       "NER_PATH": "{MODELS_PATH}/wiki_ner_rus_bert"
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/entity_extraction/entity_extraction_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x"],
 4 |     "pipe": [
 5 |       {
 6 |         "config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_en.json",
 7 |         "in": ["x"],
 8 |         "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
 9 |       },
10 |       {
11 |         "config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_en.json",
12 |         "in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
13 |         "out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
14 |       }
15 |     ],
16 |     "out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
17 |   },
18 |   "metadata": {
19 |     "variables": {
20 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/entity_extraction/entity_extraction_ru.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x"],
 4 |     "pipe": [
 5 |       {
 6 |         "config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_ru.json",
 7 |         "in": ["x"],
 8 |         "out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
 9 |       },
10 |       {
11 |         "config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_ru.json",
12 |         "in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
13 |         "out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
14 |       }
15 |     ],
16 |     "out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
17 |   },
18 |   "metadata": {
19 |     "variables": {
20 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/entity_extraction/entity_linking_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "torch_transformers_entity_ranker_infer",
 7 |         "id": "entity_descr_ranking",
 8 |         "pretrained_bert": "{TRANSFORMER}",
 9 |         "encoder_weights_path": "{MODELS_PATH}/entity_linking_eng/encoder.pth.tar",
10 |         "bilinear_weights_path": "{MODELS_PATH}/entity_linking_eng/bilinear.pth.tar",
11 |         "special_token_id": 30522,
12 |         "emb_size": 512,
13 |         "block_size": 8
14 |       },
15 |       {
16 |         "class_name": "entity_linker",
17 |         "in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
18 |         "out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"],
19 |         "load_path": "{DOWNLOADS_PATH}/entity_linking_eng",
20 |         "entities_database_filename": "el_eng_v2.db",
21 |         "entity_ranker": "#entity_descr_ranking",
22 |         "rank_in_runtime": true,
23 |         "num_entities_for_bert_ranking": 20,
24 |         "include_mention": false,
25 |         "num_entities_to_return": 3,
26 |         "lemmatize": true,
27 |         "use_descriptions": true,
28 |         "use_connections": true,
29 |         "use_tags": true,
30 |         "full_paragraph": true,
31 |         "return_confidences": true,
32 |         "lang": "en"
33 |       }
34 |     ],
35 |     "out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
36 |   },
37 |   "metadata": {
38 |     "variables": {
39 |       "ROOT_PATH": "~/.deeppavlov",
40 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
41 |       "MODELS_PATH": "{ROOT_PATH}/models",
42 |       "TRANSFORMER": "prajjwal1/bert-small"
43 |     },
44 |     "download": [
45 |       {
46 |         "url": "http://files.deeppavlov.ai/kbqa/downloads/el_db_eng_v2.tar.gz",
47 |         "subdir": "{DOWNLOADS_PATH}/entity_linking_eng"
48 |       },
49 |       {
50 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_ranker_eng.tar.gz",
51 |         "subdir": "{MODELS_PATH}/entity_linking_eng"
52 |       }
53 |     ]
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/kbqa/wiki_parser.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["parser_info", "query"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "wiki_parser",
 7 |         "in": ["parser_info", "query"],
 8 |         "out": ["wiki_parser_output"],
 9 |         "wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_compr.pickle",
10 |         "file_format": "pickle",
11 |         "lang": "@en"
12 |       }
13 |     ],
14 |     "out": ["wiki_parser_output"]
15 |   },
16 |   "metadata": {
17 |     "variables": {
18 |       "ROOT_PATH": "~/.deeppavlov",
19 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
20 |       "MODELS_PATH": "{ROOT_PATH}/models",
21 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
22 |     },
23 |     "download": [
24 |       {
25 |         "url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_compr.pickle",
26 |         "subdir": "{DOWNLOADS_PATH}/wikidata"
27 |       }
28 |     ]
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/morpho_syntax_parser/ru_syntagrus_joint_parsing.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x_words"],
 4 |     "pipe": [
 5 |       {
 6 |         "id": "main",
 7 |         "class_name": "joint_tagger_parser",
 8 |         "tagger": {
 9 |           "config_path": "{CONFIGS_PATH}/morpho_syntax_parser/morpho_ru_syntagrus_bert.json",
10 |           "overwrite": {"chainer.pipe.6.return_string": false}
11 |         },
12 |         "parser": {
13 |           "config_path": "{CONFIGS_PATH}/morpho_syntax_parser/syntax_ru_syntagrus_bert.json",
14 |           "overwrite": {"chainer.pipe.6.return_string": false}
15 |         },
16 |         "in": ["x_words"],
17 |         "out": ["y_parsed"]
18 |       }
19 |     ],
20 |     "out": ["y_parsed"]
21 |   },
22 |   "metadata": {
23 |     "variables": {
24 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
25 |     }
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/ner/ner_bert_base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["x"],
 4 |     "in_y": ["y"],
 5 |     "pipe": [
 6 |       {
 7 |         "class_name": "torch_transformers_ner_preprocessor",
 8 |         "vocab_file": "{BASE_MODEL}",
 9 |         "in": ["x"],
10 |         "out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
11 |       },
12 |       {
13 |         "id": "tag_vocab",
14 |         "class_name": "simple_vocab",
15 |         "unk_token": ["O"],
16 |         "save_path": "{MODEL_PATH}/tag.dict",
17 |         "load_path": "{MODEL_PATH}/tag.dict",
18 |         "fit_on": ["y"],
19 |         "in": ["y"],
20 |         "out": ["y_ind"]
21 |       },
22 |       {
23 |         "class_name": "torch_transformers_sequence_tagger",
24 |         "n_tags": "#tag_vocab.len",
25 |         "pretrained_bert": "{BASE_MODEL}",
26 |         "save_path": "{MODEL_PATH}/model",
27 |         "load_path": "{MODEL_PATH}/model",
28 |         "in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
29 |         "in_y": ["y_ind"],
30 |         "out": ["y_pred_ind", "probas"]
31 |       },
32 |       {
33 |         "ref": "tag_vocab",
34 |         "in": ["y_pred_ind"],
35 |         "out": ["y_pred"]
36 |       }
37 |     ],
38 |     "out": ["x_tokens", "y_pred"]
39 |   },
40 |   "metadata": {
41 |     "variables": {
42 |       "BASE_MODEL": "bert-base-multilingual-cased",
43 |       "ROOT_PATH": "~/.deeppavlov",
44 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
45 |       "MODELS_PATH": "{ROOT_PATH}/models",
46 |       "MODEL_PATH": "{MODELS_PATH}/ner/{BASE_MODEL}"
47 |     },
48 |     "download": [
49 |       {
50 |         "url": "http://files.deeppavlov.ai/v1/ner/ner_bert_base.tar.gz",
51 |         "subdir": "{MODEL_PATH}"
52 |       }
53 |     ]
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/odqa/en_odqa_infer_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["question_raw"],
 4 |     "out": ["answer", "answer_score", "answer_place"],
 5 |     "pipe": [
 6 |       {
 7 |         "config_path": "{CONFIGS_PATH}/doc_retrieval/en_ranker_tfidf_wiki.json",
 8 |         "in": ["question_raw"],
 9 |         "out": ["tfidf_doc_ids"]
10 |       },
11 |       {
12 |         "class_name": "bpr",
13 |         "load_path": "{MODELS_PATH}/bpr/eng",
14 |         "query_encoder_file": "query_encoder_en.pth.tar",
15 |         "bpr_index": "bpr_finetuned_nq_adv.idx",
16 |         "pretrained_model": "bert-base-uncased",
17 |         "top_n": 100,
18 |         "in": ["question_raw"],
19 |         "out": ["bpr_doc_ids"]
20 |       },
21 |       {
22 |         "class_name": "concat_lists",
23 |         "in": ["tfidf_doc_ids", "bpr_doc_ids"],
24 |         "out": ["doc_ids"]
25 |       },
26 |       {
27 |         "class_name": "wiki_sqlite_vocab",
28 |         "in": ["doc_ids"],
29 |         "out": ["doc_text"],
30 |         "join_docs": false,
31 |         "shuffle": false,
32 |         "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
33 |       },
34 |       {
35 |         "class_name": "string_multiplier",
36 |         "in": ["question_raw", "doc_text"],
37 |         "out":["questions"]
38 |       },
39 |       {
40 |         "class_name": "logit_ranker",
41 |         "batch_size": 64,
42 |         "squad_model": {"config_path": "{CONFIGS_PATH}/squad/qa_nq_psgcls_bert.json"},
43 |         "sort_noans": true,
44 |         "in": ["doc_text", "questions"],
45 |         "out": ["answer", "answer_score", "answer_place"]
46 |       }
47 |     ]
48 |   },
49 |   "metadata": {
50 |     "variables": {
51 |       "ROOT_PATH": "~/.deeppavlov",
52 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
53 |       "MODELS_PATH": "{ROOT_PATH}/models",
54 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
55 |     },
56 |     "download": [
57 |       {
58 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/bpr_encoder_index_eng.tar.gz",
59 |         "subdir": "{MODELS_PATH}/bpr/eng"
60 |       }
61 |     ]
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/odqa/en_odqa_pop_infer_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["question_raw"],
 4 |     "out": ["answer", "answer_score", "answer_place"],
 5 |     "pipe": [
 6 |       {
 7 |         "config_path": "{CONFIGS_PATH}/doc_retrieval/en_ranker_pop_wiki.json",
 8 |         "in": ["question_raw"],
 9 |         "out": ["tfidf_doc_ids"]
10 |       },
11 |       {
12 |         "class_name": "bpr",
13 |         "load_path": "{MODELS_PATH}/bpr/eng",
14 |         "query_encoder_file": "query_encoder_en.pth.tar",
15 |         "bpr_index": "bpr_finetuned_nq_adv.idx",
16 |         "pretrained_model": "bert-base-uncased",
17 |         "top_n": 100,
18 |         "in": ["question_raw"],
19 |         "out": ["bpr_doc_ids"]
20 |       },
21 |       {
22 |         "class_name": "concat_lists",
23 |         "in": ["tfidf_doc_ids", "bpr_doc_ids"],
24 |         "out": ["doc_ids"]
25 |       },
26 |       {
27 |         "class_name": "wiki_sqlite_vocab",
28 |         "in": ["doc_ids"],
29 |         "out": ["doc_text"],
30 |         "join_docs": false,
31 |         "shuffle": false,
32 |         "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
33 |       },
34 |       {
35 |         "class_name": "string_multiplier",
36 |         "in": ["question_raw", "doc_text"],
37 |         "out":["questions"]
38 |       },
39 |       {
40 |         "class_name": "logit_ranker",
41 |         "batch_size": 64,
42 |         "squad_model": {"config_path": "{CONFIGS_PATH}/squad/qa_nq_psgcls_bert.json"},
43 |         "sort_noans": true,
44 |         "in": ["doc_text", "questions"],
45 |         "out": ["answer", "answer_score", "answer_place"]
46 |       }
47 |     ]
48 |   },
49 |   "metadata": {
50 |     "variables": {
51 |       "ROOT_PATH": "~/.deeppavlov",
52 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
53 |       "MODELS_PATH": "{ROOT_PATH}/models",
54 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
55 |     },
56 |     "download": [
57 |       {
58 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/bpr_encoder_index_eng.tar.gz",
59 |         "subdir": "{MODELS_PATH}/bpr/eng"
60 |       }
61 |     ]
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/odqa/ru_odqa_infer_wiki.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["question_raw"],
 4 |     "out": ["best_answer"],
 5 |     "pipe": [
 6 |       {
 7 |         "config_path": "{CONFIGS_PATH}/doc_retrieval/ru_ranker_tfidf_wiki.json",
 8 |         "in": ["question_raw"],
 9 |         "out": ["tfidf_doc_ids"]
10 |       },
11 |       {
12 |         "class_name": "wiki_sqlite_vocab",
13 |         "in": ["tfidf_doc_ids"],
14 |         "out": ["tfidf_doc_text"],
15 |         "join_docs": false,
16 |         "shuffle": false,
17 |         "load_path": "{DOWNLOADS_PATH}/odqa/ruwiki_par_page_compr.db"
18 |       },
19 |       {
20 |         "class_name": "string_multiplier",
21 |         "in": ["question_raw", "tfidf_doc_text"],
22 |         "out":["questions"]
23 |       },
24 |       {
25 |         "class_name": "logit_ranker",
26 |         "batch_size": 64,
27 |         "squad_model": {"config_path": "{CONFIGS_PATH}/squad/qa_multisberquad_bert.json"},
28 |         "sort_noans": true,
29 |         "in": ["tfidf_doc_text", "questions"],
30 |         "out": ["best_answer", "best_answer_score"]
31 |       }
32 |     ]
33 |   },
34 |   "metadata": {
35 |     "variables": {
36 |       "ROOT_PATH": "~/.deeppavlov",
37 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
38 |       "MODELS_PATH": "{ROOT_PATH}/models",
39 |       "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
40 |     },
41 |     "download": [
42 |     ]
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/ranking/path_ranking_nll_roberta_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["question", "rels"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "path_ranking_preprocessor",
 7 |         "vocab_file": "{TRANSFORMER}",
 8 |         "do_lower_case": false,
 9 |         "additional_special_tokens": ["<one_rel>", "</one_rel>", "<double>", "</double>", "<first_rel>", "<mid>", "</second_rel>"],
10 |         "max_seq_length": 96,
11 |         "in": ["question", "rels"],
12 |         "out": ["bert_features"]
13 |       },
14 |       {
15 |         "class_name": "torch_transformers_nll_ranker",
16 |         "in": ["bert_features"],
17 |         "out": ["model_output"],
18 |         "return_probas": true,
19 |         "save_path": "{MODEL_PATH}/model",
20 |         "load_path": "{MODEL_PATH}/model",
21 |         "encoder_save_path": "{MODEL_PATH}/encoder",
22 |         "linear_save_path": "{MODEL_PATH}/linear",
23 |         "pretrained_bert": "{TRANSFORMER}",
24 |         "learning_rate_drop_patience": 5,
25 |         "learning_rate_drop_div": 1.5,
26 |         "optimizer_parameters": {"lr": 1e-5, "weight_decay": 0.01, "eps": 1e-6}
27 |       }
28 |     ],
29 |     "out": ["model_output"]
30 |   },
31 |   "metadata": {
32 |     "variables": {
33 |       "TRANSFORMER": "haisongzhang/roberta-tiny-cased",
34 |       "MODEL_PATH": "~/.deeppavlov/models/classifiers/path_ranking_nll_roberta_lcquad2"
35 |     },
36 |     "download": [
37 |       {
38 |         "url": "http://files.deeppavlov.ai/kbqa/models/path_ranking_nll_roberta_lcquad2.tar.gz",
39 |         "subdir": "{MODEL_PATH}"
40 |       }
41 |     ]
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "ubuntu_v2_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/ubuntu_v2_data"
 5 |   },
 6 |   "dataset_iterator": {
 7 |     "class_name": "siamese_iterator",
 8 |     "seed": 243
 9 |   },
10 |   "chainer": {
11 |     "in": [
12 |       "x"
13 |     ],
14 |     "in_y": [
15 |       "y"
16 |     ],
17 |     "pipe": [
18 |       {
19 |         "class_name": "torch_bert_ranker_preprocessor",
20 |         "vocab_file": "bert-base-uncased",
21 |         "do_lower_case": true,
22 |         "max_seq_length": 128,
23 |         "in": [
24 |           "x"
25 |         ],
26 |         "out": [
27 |           "bert_features"
28 |         ]
29 |       },
30 |       {
31 |         "class_name": "torch_bert_ranker",
32 |         "pretrained_bert": "bert-base-uncased",
33 |         "save_path": "{MODEL_PATH}/model",
34 |         "load_path": "{MODEL_PATH}/model",
35 |         "optimizer": "AdamW",
36 |         "optimizer_parameters": {
37 |           "lr": 2e-5,
38 |           "weight_decay": 1e-2,
39 |           "betas": [
40 |             0.9,
41 |             0.999
42 |           ],
43 |           "eps": 1e-6
44 |         },
45 |         "clip_norm": 1.0,
46 |         "in": [
47 |           "bert_features"
48 |         ],
49 |         "in_y": [
50 |           "y"
51 |         ],
52 |         "out": [
53 |           "predictions"
54 |         ]
55 |       }
56 |     ],
57 |     "out": [
58 |       "predictions"
59 |     ]
60 |   },
61 |   "train": {
62 |     "batch_size": 32,
63 |     "pytest_max_batches": 2,
64 |     "train_metrics": [],
65 |     "metrics": [
66 |       "r@1",
67 |       "r@2",
68 |       "r@5"
69 |     ],
70 |     "validation_patience": 1,
71 |     "val_every_n_epochs": 1,
72 |     "log_every_n_epochs": 1,
73 |     "evaluation_targets": [
74 |       "valid",
75 |       "test"
76 |     ],
77 |     "class_name": "torch_trainer"
78 |   },
79 |   "metadata": {
80 |     "variables": {
81 |       "ROOT_PATH": "~/.deeppavlov",
82 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
83 |       "MODELS_PATH": "{ROOT_PATH}/models",
84 |       "MODEL_PATH": "{MODELS_PATH}/ubuntu_v2_uncased_torch_bert_model"
85 |     },
86 |     "download": [
87 |       {
88 |         "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz",
89 |         "subdir": "{DOWNLOADS_PATH}/ubuntu_v2_data"
90 |       },
91 |       {
92 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_uncased_torch_bert_model_v2.tar.gz",
93 |         "subdir": "{MODELS_PATH}"
94 |       }
95 |     ]
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/ranking/rel_ranking_nll_bert_ru.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["question", "rels"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "path_ranking_preprocessor",
 7 |         "vocab_file": "{TRANSFORMER}",
 8 |         "do_lower_case": false,
 9 |         "max_seq_length": 96,
10 |         "in": ["question", "rels"],
11 |         "out": ["bert_features"]
12 |       },
13 |       {
14 |         "class_name": "torch_transformers_nll_ranker",
15 |         "in": ["bert_features"],
16 |         "out": ["model_output"],
17 |         "return_probas": true,
18 |         "save_path": "{MODEL_PATH}/model",
19 |         "load_path": "{MODEL_PATH}/model",
20 |         "encoder_save_path": "{MODEL_PATH}/encoder",
21 |         "linear_save_path": "{MODEL_PATH}/linear",
22 |         "pretrained_bert": "{TRANSFORMER}",
23 |         "learning_rate_drop_patience": 4,
24 |         "learning_rate_drop_div": 1.5,
25 |         "optimizer_parameters": {"lr": 1e-5, "weight_decay": 0.01, "eps": 1e-6}
26 |       }
27 |     ],
28 |     "out": ["model_output"]
29 |   },
30 |   "metadata": {
31 |     "variables": {
32 |       "ROOT_PATH": "~/.deeppavlov",
33 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
34 |       "MODELS_PATH": "{ROOT_PATH}/models",
35 |       "TRANSFORMER": "DeepPavlov/rubert-base-cased",
36 |       "MODEL_PATH": "{MODELS_PATH}/classifiers/rel_ranking_nll_bert_ru"
37 |     },
38 |     "download": [
39 |       {
40 |         "url": "http://files.deeppavlov.ai/kbqa/models/rel_ranking_nll_bert_ru.tar.gz",
41 |         "subdir": "{MODEL_PATH}"
42 |       }
43 |     ]
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "typos_wikipedia_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}"
 5 |   },
 6 |   "dataset_iterator": {
 7 |     "class_name": "typos_iterator",
 8 |     "test_ratio": 0.05
 9 |   },
10 |   "chainer":{
11 |     "in": ["x"],
12 |     "in_y": ["y"],
13 |     "pipe": [
14 |       {
15 |         "class_name": "str_lower",
16 |         "id": "lower",
17 |         "in": ["x"],
18 |         "out": ["x_lower"]
19 |       },
20 |       {
21 |         "class_name": "nltk_moses_tokenizer",
22 |         "id": "tokenizer",
23 |         "in": ["x_lower"],
24 |         "out": ["x_tokens"]
25 |       },
26 |       {
27 |         "ref": "tokenizer",
28 |         "in": ["y"],
29 |         "out": ["y_tokens"]
30 |       },
31 |       {
32 |         "fit_on": ["x_tokens", "y_tokens"],
33 |         "in": ["x_tokens"],
34 |         "out": ["tokens_candidates"],
35 |         "class_name": "spelling_error_model",
36 |         "window": 1,
37 |         "candidates_count": 4,
38 |         "dictionary": {
39 |           "class_name": "wikitionary_100K_vocab",
40 |           "data_dir": "{DOWNLOADS_PATH}/vocabs"
41 |         },
42 |         "save_path": "{MODELS_PATH}/error_model/error_model.tsv"
43 |       },
44 |       {
45 |         "class_name": "kenlm_elector",
46 |         "in": ["tokens_candidates"],
47 |         "out": ["y_predicted_tokens"],
48 |         "load_path": "{DOWNLOADS_PATH}/language_models/en_wiki_no_punkt.arpa.binary"
49 |       },
50 |       {
51 |         "ref": "tokenizer",
52 |         "in": ["y_predicted_tokens"],
53 |         "out": ["y_predicted"]
54 |       }
55 |     ],
56 |     "out": ["y_predicted"]
57 |   },
58 |   "train": {
59 |     "evaluation_targets": ["test"],
60 |     "class_name": "fit_trainer"
61 |   },
62 |   "metadata": {
63 |     "variables": {
64 |       "ROOT_PATH": "~/.deeppavlov",
65 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
66 |       "MODELS_PATH": "{ROOT_PATH}/models"
67 |     },
68 |     "download": [
69 |       {
70 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz",
71 |         "subdir": "{MODELS_PATH}"
72 |       },
73 |       {
74 |         "url": "http://files.deeppavlov.ai/lang_models/en_wiki_no_punkt.arpa.binary.gz",
75 |         "subdir": "{DOWNLOADS_PATH}/language_models"
76 |       },
77 |       {
78 |         "url": "http://files.deeppavlov.ai/datasets/wiktionary/wikipedia_100K_vocab.tar.gz",
79 |         "subdir": "{DOWNLOADS_PATH}/vocabs"
80 |       }
81 |     ]
82 |   }
83 | }


--------------------------------------------------------------------------------
/deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer":{
 3 |     "in": ["x"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "str_lower",
 7 |         "id": "lower",
 8 |         "in": ["x"],
 9 |         "out": ["x_lower"]
10 |       },
11 |       {
12 |         "class_name": "nltk_moses_tokenizer",
13 |         "id": "tokenizer",
14 |         "in": ["x_lower"],
15 |         "out": ["x_tokens"]
16 |       },
17 |       {
18 |         "id": "vocab",
19 |         "class_name": "simple_vocab",
20 |         "save_path": "{DOWNLOADS_PATH}/vocabs/russian_words_vocab.dict",
21 |         "load_path": "{DOWNLOADS_PATH}/vocabs/russian_words_vocab.dict"
22 |       },
23 |       {
24 |         "in": ["x_tokens"],
25 |         "out": ["tokens_candidates"],
26 |         "class_name": "spelling_levenshtein",
27 |         "words": "#vocab.keys()"
28 |       },
29 |       {
30 |         "class_name": "kenlm_elector",
31 |         "in": ["tokens_candidates"],
32 |         "out": ["y_predicted_tokens"],
33 |         "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary"
34 |       },
35 |       {
36 |         "ref": "tokenizer",
37 |         "in": ["y_predicted_tokens"],
38 |         "out": ["y_predicted"]
39 |       }
40 |     ],
41 |     "out": ["y_predicted"]
42 |   },
43 |   "metadata": {
44 |     "variables": {
45 |       "ROOT_PATH": "~/.deeppavlov",
46 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
47 |       "MODELS_PATH": "{ROOT_PATH}/models"
48 |     },
49 |     "download": [
50 |       {
51 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/vocabs/russian_words_vocab.dict.gz",
52 |         "subdir": "{DOWNLOADS_PATH}/vocabs"
53 |       },
54 |       {
55 |         "url": "http://files.deeppavlov.ai/lang_models/ru_wiyalen_no_punkt.arpa.binary.gz",
56 |         "subdir": "{DOWNLOADS_PATH}/language_models"
57 |       }
58 |     ]
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/deeppavlov/configs/squad/qa_nq_psgcls_bert.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "chainer": {
 3 |     "in": ["context_raw", "question_raw"],
 4 |     "pipe": [
 5 |       {
 6 |         "class_name": "torch_squad_transformers_preprocessor",
 7 |         "vocab_file": "{TRANSFORMER}",
 8 |         "do_lower_case": "{LOWERCASE}",
 9 |         "max_seq_length": 384,
10 |         "in": ["question_raw", "context_raw"],
11 |         "out": ["bert_features", "subtokens", "split_context"]
12 |       },
13 |       {
14 |         "class_name": "squad_bert_mapping",
15 |         "do_lower_case": "{LOWERCASE}",
16 |         "in": ["split_context", "bert_features", "subtokens"],
17 |         "out": ["subtok2chars", "char2subtoks"]
18 |       },
19 |       {
20 |         "class_name": "torch_transformers_squad",
21 |         "pretrained_bert": "{TRANSFORMER}",
22 |         "save_path": "{MODEL_PATH}/model",
23 |         "load_path": "{MODEL_PATH}/model",
24 |         "torch_seed": 1,
25 |         "optimizer": "AdamW",
26 |         "optimizer_parameters": {
27 |           "lr": 2e-05,
28 |           "weight_decay": 0.01,
29 |           "betas": [0.9, 0.999],
30 |           "eps": 1e-06
31 |         },
32 |         "random_seed": 1,
33 |         "psg_cls": true,
34 |         "learning_rate_drop_patience": 2,
35 |         "learning_rate_drop_div": 2.0,
36 |         "in": ["bert_features"],
37 |         "out": ["ans_start_predicted", "ans_end_predicted", "logits", "scores", "inds"]
38 |       },
39 |       {
40 |         "class_name": "squad_bert_ans_postprocessor",
41 |         "in": ["ans_start_predicted", "ans_end_predicted", "split_context", "subtok2chars", "subtokens", "inds"],
42 |         "out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"]
43 |       }
44 |     ],
45 |     "out": ["ans_predicted", "ans_start_predicted", "scores"]
46 |   },
47 |   "metadata": {
48 |     "variables": {
49 |       "LOWERCASE": true,
50 |       "TRANSFORMER": "bert-base-uncased",
51 |       "ROOT_PATH": "~/.deeppavlov",
52 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
53 |       "MODELS_PATH": "{ROOT_PATH}/models",
54 |       "MODEL_PATH": "{MODELS_PATH}/passage_reader_classifier_eng"
55 |     },
56 |     "download": [
57 |       {
58 |         "url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/nq_psgcls_bert.tar.gz",
59 |         "subdir": "{MODEL_PATH}"
60 |       }
61 |     ]
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/deeppavlov/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/core/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/core/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/core/commands/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/core/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/core/common/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/core/common/aliases.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ALIASES = {
16 |     'kbqa_cq': 'kbqa_cq_en',
17 |     'kbqa_cq_online': 'kbqa_cq_en',
18 |     'kbqa_cq_rus': 'kbqa_cq_ru',
19 |     'multi_squad_noans': 'qa_squad2_bert',
20 |     'multi_squad_noans_infer': 'qa_squad2_bert',
21 |     'multi_squad_retr_noans': 'qa_squad2_bert',
22 |     'ner_collection3_m1': 'ner_collection3_bert',
23 |     'ner_conll2003': 'ner_conll2003_bert',
24 |     'ner_conll2003_torch_bert': 'ner_conll2003_bert',
25 |     'ner_dstc2': 'ner_conll2003_bert',
26 |     'ner_few_shot_ru': 'ner_rus_bert',
27 |     'ner_few_shot_ru_simulate': 'ner_rus_bert',
28 |     'ner_ontonotes': 'ner_ontonotes_bert',
29 |     'ner_ontonotes_bert_emb': 'ner_ontonotes_bert',
30 |     'ner_ontonotes_bert_mult_torch': 'ner_ontonotes_bert_mult',
31 |     'ner_ontonotes_bert_torch': 'ner_ontonotes_bert',
32 |     'ner_rus': 'ner_rus_bert',
33 |     'paraphraser_bert': 'paraphraser_rubert',
34 |     'ru_odqa_infer_wiki_rubert': 'ru_odqa_infer_wiki',
35 |     'sentseg_dailydialog': 'sentseg_dailydialog_bert',
36 |     'squad': 'squad_bert',
37 |     'squad_bert_infer': 'squad_bert',
38 |     'squad_bert_multilingual_freezed_emb': 'squad_bert',
39 |     'squad_ru': 'squad_ru_bert',
40 |     'squad_ru_bert_infer': 'squad_ru_bert',
41 |     'squad_ru_convers_distilrubert_2L_infer': 'squad_ru_convers_distilrubert_2L',
42 |     'squad_ru_convers_distilrubert_6L_infer': 'squad_ru_convers_distilrubert_6L',
43 |     'squad_ru_rubert': 'squad_ru_bert',
44 |     'squad_ru_rubert_infer': 'squad_ru_bert',
45 |     'squad_torch_bert': 'squad_bert',
46 |     'squad_torch_bert_infer': 'squad_bert'
47 | }
48 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/errors.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | class ConfigError(Exception):
21 |     """Any configuration error."""
22 | 
23 |     def __init__(self, message):
24 |         super(ConfigError, self).__init__()
25 |         self.message = message
26 | 
27 |     def __str__(self):
28 |         return repr(self.message)
29 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/log.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import logging
17 | import logging.config
18 | from pathlib import Path
19 | 
20 | from .paths import get_settings_path
21 | 
22 | LOG_CONFIG_FILENAME = 'log_config.json'
23 | TRACEBACK_LOGGER_ERRORS = True
24 | 
25 | root_path = Path(__file__).resolve().parents[3]
26 | 
27 | log_config_path = get_settings_path() / LOG_CONFIG_FILENAME
28 | 
29 | with log_config_path.open(encoding='utf8') as log_config_json:
30 |     log_config = json.load(log_config_json)
31 | 
32 | 
33 | class ProbeFilter(logging.Filter):
34 |     """ProbeFilter class is used to filter POST requests to /probe endpoint from logs."""
35 | 
36 |     def filter(self, record: logging.LogRecord) -> bool:
37 |         """To log the record method should return True."""
38 |         return 'POST /probe HTTP' not in record.getMessage()
39 | 
40 | 
41 | def init_logger():
42 |     configured_loggers = [log_config.get('root', {})] + [logger for logger in
43 |                                                          log_config.get('loggers', {}).values()]
44 | 
45 |     used_handlers = {handler for log in configured_loggers for handler in log.get('handlers', [])}
46 | 
47 |     for handler_id, handler in list(log_config['handlers'].items()):
48 |         if handler_id not in used_handlers:
49 |             del log_config['handlers'][handler_id]
50 |         elif 'filename' in handler.keys():
51 |             filename = handler['filename']
52 |             logfile_path = Path(filename).expanduser().resolve()
53 |             handler['filename'] = str(logfile_path)
54 | 
55 |     logging.config.dictConfig(log_config)
56 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/log_events.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import Optional
17 | from deeppavlov.core.commands.utils import expand_path
18 | 
19 | log = getLogger(__name__)
20 | 
21 | 
22 | class TBWriter:
23 |     def __init__(self, tensorboard_log_dir: str):
24 |         # TODO: After adding wandb logger, create common parent class for both loggers
25 |         from torch.utils.tensorboard import SummaryWriter
26 |         tensorboard_log_dir = expand_path(tensorboard_log_dir)
27 |         self.tb_train_writer = SummaryWriter(str(tensorboard_log_dir / 'train_log'))
28 |         self.tb_valid_writer = SummaryWriter(str(tensorboard_log_dir / 'valid_log'))
29 | 
30 |     # TODO: find how to write Summary
31 |     def write_train(self, tag, scalar_value, global_step):
32 |         self.tb_train_writer.add_scalar(tag, scalar_value, global_step)
33 | 
34 |     def write_valid(self, tag, scalar_value, global_step):
35 |         self.tb_valid_writer.add_scalar(tag, scalar_value, global_step)
36 | 
37 |     def flush(self):
38 |         self.tb_train_writer.flush()
39 |         self.tb_valid_writer.flush()
40 | 
41 | 
42 | def get_tb_writer(tensorboard_log_dir: Optional[str]) -> Optional[TBWriter]:
43 |     try:
44 |         if tensorboard_log_dir is not None:
45 |             tb_writer = TBWriter(tensorboard_log_dir)
46 |         else:
47 |             tb_writer = None
48 |     except ImportError:
49 |         log.error('Failed to import SummaryWriter from torch.utils.tensorboard. Failed to initialize Tensorboard '
50 |                   'logger. Install appropriate Pytorch version to use this logger or remove tensorboard_log_dir '
51 |                   'parameter from the train parameters list in the configuration file.')
52 |         tb_writer = None
53 |     return tb_writer
54 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/metrics_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | import json
17 | from logging import getLogger
18 | from pathlib import Path
19 | from typing import Callable, Any
20 | 
21 | from deeppavlov.core.common.errors import ConfigError
22 | 
23 | log = getLogger(__name__)
24 | 
25 | _registry_path = Path(__file__).parent / 'metrics_registry.json'
26 | if _registry_path.exists():
27 |     with _registry_path.open(encoding='utf-8') as f:
28 |         _REGISTRY = json.load(f)
29 | else:
30 |     _REGISTRY = {}
31 | 
32 | 
33 | def fn_from_str(name: str) -> Callable[..., Any]:
34 |     """Returns a function object with the name given in string."""
35 |     try:
36 |         module_name, fn_name = name.split(':')
37 |         return getattr(importlib.import_module(module_name), fn_name)
38 |     except ValueError:
39 |         raise ConfigError('Expected function description in a `module.submodules:function_name` form, but got `{}`'
40 |                           .format(name))
41 |     except AttributeError:
42 |         # noinspection PyUnboundLocalVariable
43 |         raise ConfigError(f"Incorrect metric: '{module_name}' has no attribute '{fn_name}'.")
44 | 
45 | 
46 | def register_metric(metric_name: str) -> Callable[..., Any]:
47 |     """Decorator for metric registration."""
48 | 
49 |     def decorate(fn):
50 |         fn_name = fn.__module__ + ':' + fn.__name__
51 |         if metric_name in _REGISTRY and _REGISTRY[metric_name] != fn_name:
52 |             log.warning('"{}" is already registered as a metric name, the old function will be ignored'
53 |                         .format(metric_name))
54 |         _REGISTRY[metric_name] = fn_name
55 |         return fn
56 | 
57 |     return decorate
58 | 
59 | 
60 | def get_metric_by_name(name: str) -> Callable[..., Any]:
61 |     """Returns a metric callable with a corresponding name."""
62 |     name = _REGISTRY.get(name, name)
63 |     return fn_from_str(name)
64 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/paths.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import shutil
16 | 
17 | from pathlib import Path
18 | 
19 | _root_path = Path(__file__).resolve().parents[3]
20 | _default_settings_path: Path = _root_path / 'deeppavlov' / 'utils' / 'settings'
21 | _settings_path = Path(os.getenv('DP_SETTINGS_PATH', _default_settings_path)).expanduser().resolve()
22 | if _settings_path.is_file():
23 |     raise FileExistsError(f'DP_SETTINGS_PATH={_settings_path} is a file and not a directory')
24 | 
25 | if _default_settings_path in _settings_path.parents:
26 |     raise RecursionError(f'DP_SETTINGS_PATH={_settings_path} is relative'
27 |                          f' to the default settings path {_default_settings_path}')
28 | 
29 | 
30 | def get_settings_path() -> Path:
31 |     """Return an absolute path to the DeepPavlov settings directory"""
32 |     populate_settings_dir()
33 |     return _settings_path
34 | 
35 | 
36 | def populate_settings_dir(force: bool = False) -> bool:
37 |     """
38 |     Populate settings directory with default settings files
39 | 
40 |     Args:
41 |         force: if ``True``, replace existing settings files with default ones
42 | 
43 |     Returns:
44 |         ``True`` if any files were copied and ``False`` otherwise
45 |     """
46 |     res = False
47 |     if _default_settings_path == _settings_path:
48 |         return res
49 | 
50 |     for src in list(_default_settings_path.glob('**/*.json')):
51 |         dest = _settings_path / src.relative_to(_default_settings_path)
52 |         if not force and dest.exists():
53 |             continue
54 |         res = True
55 |         dest.parent.mkdir(parents=True, exist_ok=True)
56 |         shutil.copy(src, dest)
57 |     return res
58 | 


--------------------------------------------------------------------------------
/deeppavlov/core/common/prints.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | from contextlib import redirect_stdout
17 | 
18 | 
19 | class RedirectedPrints(redirect_stdout):
20 |     """Context manager for temporarily redirecting stdout to another stream """
21 | 
22 |     def __init__(self, new_target=sys.stderr):
23 |         super().__init__(new_target=new_target)
24 | 


--------------------------------------------------------------------------------
/deeppavlov/core/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/core/data/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/core/data/dataset_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List, Dict, Tuple, Any
16 | 
17 | 
18 | class DatasetReader:
19 |     """An abstract class for reading data from some location and construction of a dataset."""
20 | 
21 |     def read(self, data_path: str, *args, **kwargs) -> Dict[str, List[Tuple[Any, Any]]]:
22 |         """Reads a file from a path and returns data as a list of tuples of inputs and correct outputs
23 |          for every data type in ``train``, ``valid`` and ``test``.
24 |         """
25 |         raise NotImplementedError
26 | 


--------------------------------------------------------------------------------
/deeppavlov/core/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/core/models/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/core/models/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABCMeta, abstractmethod
16 | 
17 | from logging import getLogger
18 | 
19 | log = getLogger(__name__)
20 | 
21 | 
22 | class Component(metaclass=ABCMeta):
23 |     """Abstract class for all callables that could be used in Chainer's pipe."""
24 | 
25 |     @abstractmethod
26 |     def __call__(self, *args, **kwargs):
27 |         pass
28 | 
29 |     def reset(self):
30 |         pass
31 | 
32 |     def destroy(self):
33 |         attr_list = list(self.__dict__.keys())
34 |         for attr_name in attr_list:
35 |             attr = getattr(self, attr_name)
36 |             if hasattr(attr, 'destroy'):
37 |                 attr.destroy()
38 |             delattr(self, attr_name)
39 | 


--------------------------------------------------------------------------------
/deeppavlov/core/models/estimator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import abstractmethod
16 | 
17 | from .component import Component
18 | from .serializable import Serializable
19 | 
20 | 
21 | class Estimator(Component, Serializable):
22 |     """Abstract class for components that could be fitted on the data as a whole."""
23 | 
24 |     @abstractmethod
25 |     def fit(self, *args, **kwargs):
26 |         pass
27 | 


--------------------------------------------------------------------------------
/deeppavlov/core/models/nn_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import abstractmethod
16 | 
17 | from .component import Component
18 | from .serializable import Serializable
19 | 
20 | 
21 | class NNModel(Component, Serializable):
22 |     """Abstract class for deep learning components."""
23 | 
24 |     @abstractmethod
25 |     def train_on_batch(self, x: list, y: list):
26 |         pass
27 | 
28 |     def process_event(self, event_name, data):
29 |         pass
30 | 


--------------------------------------------------------------------------------
/deeppavlov/core/models/serializable.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABCMeta, abstractmethod
16 | from logging import getLogger
17 | from pathlib import Path
18 | from typing import Union, Optional
19 | 
20 | from deeppavlov.core.commands.utils import expand_path
21 | 
22 | log = getLogger(__name__)
23 | 
24 | 
25 | class Serializable(metaclass=ABCMeta):
26 |     """Abstract base class that expresses the interface for all models that can serialize data to a path."""
27 | 
28 |     def __init__(self, save_path: Optional[Union[str, Path]], load_path: Optional[Union[str, Path]] = None,
29 |                  mode: str = 'infer',
30 |                  *args, **kwargs) -> None:
31 | 
32 |         if save_path:
33 |             self.save_path = expand_path(save_path)
34 |             self.save_path.parent.mkdir(parents=True, exist_ok=True)
35 |         else:
36 |             self.save_path = None
37 | 
38 |         if load_path:
39 |             self.load_path = expand_path(load_path)
40 |             if mode != 'train' and self.save_path and self.load_path != self.save_path:
41 |                 log.warning("Load path '{}' differs from save path '{}' in '{}' mode for {}."
42 |                             .format(self.load_path, self.save_path, mode, self.__class__.__name__))
43 |         elif mode != 'train' and self.save_path:
44 |             self.load_path = self.save_path
45 |             log.warning("No load path is set for {} in '{}' mode. Using save path instead"
46 |                         .format(self.__class__.__name__, mode))
47 |         else:
48 |             self.load_path = None
49 |             log.warning("No load path is set for {}!".format(self.__class__.__name__))
50 | 
51 |     @abstractmethod
52 |     def save(self, *args, **kwargs):
53 |         pass
54 | 
55 |     @abstractmethod
56 |     def load(self, *args, **kwargs):
57 |         pass
58 | 


--------------------------------------------------------------------------------
/deeppavlov/core/trainers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .fit_trainer import FitTrainer
16 | from .nn_trainer import NNTrainer
17 | from .torch_trainer import TorchTrainer
18 | 


--------------------------------------------------------------------------------
/deeppavlov/core/trainers/torch_trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import Tuple, Optional, Iterable, Collection, Any
17 | 
18 | from deeppavlov.core.trainers.utils import Metric
19 | from deeppavlov.core.common.registry import register
20 | from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
21 | from deeppavlov.core.trainers.nn_trainer import NNTrainer
22 | 
23 | log = getLogger(__name__)
24 | 
25 | 
26 | @register('torch_trainer')
27 | class TorchTrainer(NNTrainer):
28 | 
29 |     def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]],
30 |              metrics: Optional[Collection[Metric]] = None, *,
31 |              start_time: Optional[float] = None, show_examples: Optional[bool] = None) -> dict:
32 |         self._chainer.get_main_component().model.eval()
33 | 
34 |         report = super(TorchTrainer, self).test(data=data, metrics=metrics, start_time=start_time,
35 |                                                 show_examples=show_examples)
36 |         self._chainer.get_main_component().model.train()
37 |         return report
38 | 
39 |     def train_on_batches(self, iterator: DataLearningIterator) -> None:
40 |         self._chainer.get_main_component().model.train()
41 |         super(TorchTrainer, self).train_on_batches(iterator=iterator)
42 |         self._chainer.get_main_component().model.eval()
43 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_iterators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/dataset_iterators/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/dataset_iterators/siamese_iterator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import Dict, List, Tuple
17 | 
18 | from deeppavlov.core.common.registry import register
19 | from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
20 | 
21 | log = getLogger(__name__)
22 | 
23 | 
24 | @register('siamese_iterator')
25 | class SiameseIterator(DataLearningIterator):
26 |     """The class contains methods for iterating over a dataset for ranking in training, validation and test mode."""
27 | 
28 |     def split(self, *args, len_valid=1000, len_test=1000, **kwargs) -> None:
29 |         if len(self.valid) == 0 and len_valid != 0:
30 |             self.random.shuffle(self.train)
31 |             self.valid = self.train[-len_valid:]
32 |             self.train = self.train[:-len_valid]
33 |         if len(self.test) == 0 and len_test != 0:
34 |             self.random.shuffle(self.train)
35 |             self.test = self.train[-len_test:]
36 |             self.train = self.train[:-len_test]
37 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_iterators/typos_iterator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from deeppavlov.core.common.registry import register
16 | from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
17 | 
18 | 
19 | @register('typos_iterator')
20 | class TyposDatasetIterator(DataLearningIterator):
21 |     """Implementation of :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for training
22 |      :class:`~deeppavlov.models.spelling_correction.brillmoore.ErrorModel`
23 | 
24 |     """
25 | 
26 |     def split(self, test_ratio: float = 0., *args, **kwargs):
27 |         """Split all data into train and test
28 | 
29 |         Args:
30 |             test_ratio: ratio of test data to train, from 0. to 1.
31 |         """
32 |         self.train += self.valid + self.test
33 | 
34 |         split = int(len(self.train) * test_ratio)
35 | 
36 |         self.random.shuffle(self.train)
37 | 
38 |         self.test = self.train[:split]
39 |         self.train = self.train[split:]
40 |         self.valid = []
41 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/dataset_readers/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/faq_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, softwaredata
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from pandas import read_csv
18 | 
19 | from deeppavlov.core.common.registry import register
20 | from deeppavlov.core.data.dataset_reader import DatasetReader
21 | 
22 | 
23 | @register('faq_reader')
24 | class FaqDatasetReader(DatasetReader):
25 |     """Reader for FAQ dataset"""
26 | 
27 |     def read(self, data_path: str = None, data_url: str = None, x_col_name: str = 'x', y_col_name: str = 'y') -> Dict:
28 |         """
29 |         Read FAQ dataset from specified csv file or remote url
30 | 
31 |         Parameters:
32 |             data_path: path to csv file of FAQ
33 |             data_url: url to csv file of FAQ
34 |             x_col_name: name of Question column in csv file
35 |             y_col_name: name of Answer column in csv file
36 | 
37 |         Returns:
38 |             A dictionary containing training, validation and test parts of the dataset obtainable via
39 |             ``train``, ``valid`` and ``test`` keys.
40 |         """
41 | 
42 |         if data_url is not None:
43 |             data = read_csv(data_url)
44 |         elif data_path is not None:
45 |             data = read_csv(data_path)
46 |         else:
47 |             raise ValueError("Please specify data_path or data_url parameter")
48 | 
49 |         x = data[x_col_name]
50 |         y = data[y_col_name]
51 | 
52 |         train_xy_tuples = [(x[i].strip(), y[i].strip()) for i in range(len(x))]
53 | 
54 |         dataset = dict()
55 |         dataset["train"] = train_xy_tuples
56 |         dataset["valid"] = []
57 |         dataset["test"] = []
58 | 
59 |         return dataset
60 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/line_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, softwaredata
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from deeppavlov.core.common.registry import register
18 | from deeppavlov.core.data.dataset_reader import DatasetReader
19 | 
20 | 
21 | @register('line_reader')
22 | class LineReader(DatasetReader):
23 |     """Read txt file by lines"""
24 | 
25 |     def read(self, data_path: str = None, *args, **kwargs) -> Dict:
26 |         """Read lines from txt file
27 | 
28 |         Args:
29 |             data_path: path to txt file
30 | 
31 |         Returns:
32 |             A dictionary containing training, validation and test parts of the dataset obtainable via ``train``, ``valid`` and ``test`` keys.
33 |         """
34 | 
35 |         with open(data_path) as f:
36 |             content = f.readlines()
37 | 
38 |         dataset = dict()
39 |         dataset["train"] = [(line,) for line in content]
40 |         dataset["valid"] = []
41 |         dataset["test"] = []
42 | 
43 |         return dataset
44 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/multitask_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import copy
16 | from logging import getLogger
17 | from typing import Dict
18 | 
19 | from deeppavlov.core.common.registry import get_model, register
20 | from deeppavlov.core.data.dataset_reader import DatasetReader
21 | 
22 | log = getLogger(__name__)
23 | 
24 | 
25 | @register('multitask_reader')
26 | class MultiTaskReader(DatasetReader):
27 |     """Class to read several datasets simultaneously."""
28 | 
29 |     def read(self, tasks: Dict[str, Dict[str, dict]], task_defaults: dict = None, **kwargs):
30 |         """Creates dataset readers for tasks and returns what task dataset readers `read()` methods return.
31 | 
32 |         Args:
33 |             tasks: dictionary which keys are task names and values are dictionaries with param name - value pairs for
34 |                 nested dataset readers initialization. If task has key-value pair ``'use_task_defaults': False``,
35 |                 task_defaults for this task dataset reader will be ignored.
36 |             task_defaults: default task parameters.
37 | 
38 |         Returns:
39 |             dictionary which keys are task names and values are what task readers `read()` methods returned.
40 |         """
41 |         data = dict()
42 |         if task_defaults is None:
43 |             task_defaults = dict()
44 |         for task_name, task_params in tasks.items():
45 |             if task_params.pop('use_task_defaults', True) is True:
46 |                 task_config = copy.deepcopy(task_defaults)
47 |                 task_config.update(task_params)
48 |             else:
49 |                 task_config = task_params
50 |             reader = get_model(task_config.pop('class_name'))()
51 |             data[task_name] = reader.read(**task_config)
52 |         return data
53 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/paraphraser_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import xml.etree.ElementTree as ET
16 | from pathlib import Path
17 | from typing import Dict, List, Tuple
18 | 
19 | from deeppavlov.core.commands.utils import expand_path
20 | from deeppavlov.core.common.registry import register
21 | from deeppavlov.core.data.dataset_reader import DatasetReader
22 | 
23 | 
24 | @register('paraphraser_reader')
25 | class ParaphraserReader(DatasetReader):
26 |     """The class to read the paraphraser.ru dataset from files.
27 | 
28 |     Please, see https://paraphraser.ru.
29 |     """
30 | 
31 |     def read(self,
32 |              data_path: str,
33 |              do_lower_case: bool = True,
34 |              *args, **kwargs) -> Dict[str, List[Tuple[Tuple[str, str], int]]]:
35 |         """Read the paraphraser.ru dataset from files.
36 | 
37 |         Args:
38 |             data_path: A path to a folder with dataset files.
39 |             do_lower_case: Do you want to lowercase all texts
40 |         """
41 | 
42 |         data_path = expand_path(data_path)
43 |         train_fname = data_path / 'paraphrases.xml'
44 |         test_fname = data_path / 'paraphrases_gold.xml'
45 | 
46 |         train_data = self._build_data(train_fname, do_lower_case)
47 |         test_data = self._build_data(test_fname, do_lower_case)
48 |         return {"train": train_data, "valid": [], "test": test_data}
49 | 
50 |     @staticmethod
51 |     def _build_data(data_path: Path, do_lower_case: bool) -> List[Tuple[Tuple[str, str], int]]:
52 |         root = ET.fromstring(data_path.read_text(encoding='utf8'))
53 |         data = {}
54 |         for paraphrase in root.findall('corpus/paraphrase'):
55 |             key = (paraphrase.find('value[@name="text_1"]').text,
56 |                    paraphrase.find('value[@name="text_2"]').text)
57 |             if do_lower_case:
58 |                 key = tuple([t.lower() for t in key])
59 | 
60 |             data[key] = 1 if int(paraphrase.find('value[@name="class"]').text) >= 0 else 0
61 |         return list(data.items())
62 | 


--------------------------------------------------------------------------------
/deeppavlov/dataset_readers/rel_ranking_reader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import xml.etree.ElementTree as ET
16 | from pathlib import Path
17 | from typing import Dict, List, Tuple
18 | 
19 | from deeppavlov.core.commands.utils import expand_path
20 | from deeppavlov.core.common.registry import register
21 | from deeppavlov.core.data.dataset_reader import DatasetReader
22 | 
23 | 
24 | @register('rel_ranking_reader')
25 | class ParaphraserReader(DatasetReader):
26 |     """The class to read the paraphraser.ru dataset from files.
27 | ​
28 |     Please, see https://paraphraser.ru.
29 |     """
30 | 
31 |     def read(self,
32 |              data_path: str,
33 |              do_lower_case: bool = True,
34 |              *args, **kwargs) -> Dict[str, List[Tuple[Tuple[str, str], int]]]:
35 |         """Read the paraphraser.ru dataset from files.
36 | ​
37 |         Args:
38 |             data_path: A path to a folder with dataset files.
39 |             do_lower_case: Do you want to lowercase all texts
40 |         """
41 | 
42 |         data_path = expand_path(data_path)
43 |         train_fname = data_path / 'paraphrases.xml'
44 |         test_fname = data_path / 'paraphrases_gold.xml'
45 | 
46 |         train_data = self._build_data(train_fname, do_lower_case)
47 |         test_data = self._build_data(test_fname, do_lower_case)
48 |         return {"train": train_data, "valid": [], "test": test_data}
49 | 
50 |     @staticmethod
51 |     def _build_data(data_path: Path, do_lower_case: bool) -> List[Tuple[Tuple[str, str], int]]:
52 |         root = ET.fromstring(data_path.read_text(encoding='utf8'))
53 |         data = []
54 |         for paraphrase in root.findall('corpus/paraphrase'):
55 |             key = (paraphrase.find('value[@name="text_1"]').text,
56 |                    paraphrase.find('value[@name="text_2"]').text)
57 |             if do_lower_case:
58 |                 key = tuple([t.lower() for t in key])
59 | 
60 |             pos_or_neg = int(paraphrase.find('value[@name="class"]').text)
61 |             data.append((key, pos_or_neg))
62 |         return data
63 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/metrics/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/metrics/correlation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from scipy.stats import pearsonr, spearmanr
16 | from sklearn.metrics import matthews_corrcoef
17 | 
18 | from deeppavlov.core.common.metrics_registry import register_metric
19 | 
20 | 
21 | @register_metric('pearson_correlation')
22 | def pearson_correlation(y_true, y_predicted) -> float:
23 |     return pearsonr(y_predicted, y_true)[0]
24 | 
25 | 
26 | @register_metric('spearman_correlation')
27 | def spearman_correlation(y_true, y_predicted) -> float:
28 |     return spearmanr(y_predicted, y_true)[0]
29 | 
30 | 
31 | @register_metric('matthews_correlation')
32 | def matthews_correlation(y_true, y_predicted) -> float:
33 |     return matthews_corrcoef(y_true, y_predicted)
34 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/elmo_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | import numpy as np
18 | 
19 | from deeppavlov.core.common.metrics_registry import register_metric
20 | 
21 | 
22 | @register_metric('elmo_loss2ppl')
23 | def elmo_loss2ppl(losses: List[np.ndarray]) -> float:
24 |     """ Calculates perplexity by loss
25 | 
26 |     Args:
27 |         losses: list of numpy arrays of model losses
28 | 
29 |     Returns:
30 |         perplexity : float
31 |     """
32 |     avg_loss = np.mean(losses)
33 |     return float(np.exp(avg_loss))
34 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/log_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from typing import List, Union
17 | 
18 | import numpy as np
19 | from sklearn.metrics import log_loss
20 | 
21 | from deeppavlov.core.common.metrics_registry import register_metric
22 | 
23 | 
24 | @register_metric('log_loss')
25 | def sk_log_loss(y_true: Union[List[List[float]], List[List[int]], np.ndarray],
26 |                 y_predicted: Union[List[List[float]], List[List[int]], np.ndarray]) -> float:
27 |     """
28 |     Calculates log loss.
29 | 
30 |     Args:
31 |         y_true: list or array of true values
32 |         y_predicted: list or array of predicted values
33 | 
34 |     Returns:
35 |         Log loss
36 | 
37 |     Alias:
38 |         log_loss
39 |     """
40 |     return log_loss(y_true, y_predicted)
41 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/mse.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | from sklearn.metrics import mean_squared_error
17 | from typing import Union
18 | 
19 | from deeppavlov.core.common.metrics_registry import register_metric
20 | 
21 | 
22 | @register_metric('mean_squared_error')
23 | def mse(y_true: Union[np.array, list],
24 |         y_predicted: Union[np.array, list],
25 |         *args,
26 |         **kwargs) -> float:
27 |     """
28 |     Calculates mean squared error.
29 |     Args:
30 |         y_true: list of true values
31 |         y_predicted: list of predicted values
32 |     Returns:
33 |         float: Mean squared error
34 |     """
35 |     for value in [y_true, y_predicted]:
36 |         assert (np.isfinite(value).all())
37 |     return mean_squared_error(y_true, y_predicted, *args, **kwargs)
38 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/recall_at_k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from typing import List
17 | 
18 | import numpy as np
19 | 
20 | from deeppavlov.core.common.metrics_registry import register_metric
21 | 
22 | 
23 | def recall_at_k(y_true: List[int], y_pred: List[List[np.ndarray]], k: int):
24 |     """
25 |     Calculates recall at k ranking metric.
26 | 
27 |     Args:
28 |         y_true: Labels. Not used in the calculation of the metric.
29 |         y_predicted: Predictions.
30 |             Each prediction contains ranking score of all ranking candidates for the particular data sample.
31 |             It is supposed that the ranking score for the true candidate goes first in the prediction.
32 | 
33 |     Returns:
34 |         Recall at k
35 |     """
36 |     num_examples = float(len(y_pred))
37 |     predictions = np.array(y_pred)
38 |     predictions = np.flip(np.argsort(predictions, -1), -1)[:, :k]
39 |     num_correct = 0
40 |     for el in predictions:
41 |         if 0 in el:
42 |             num_correct += 1
43 |     return float(num_correct) / num_examples
44 | 
45 | 
46 | @register_metric('r@1')
47 | def r_at_1(y_true, y_pred):
48 |     return recall_at_k(y_true, y_pred, k=1)
49 | 
50 | 
51 | @register_metric('r@2')
52 | def r_at_2(y_true, y_pred):
53 |     return recall_at_k(y_true, y_pred, k=2)
54 | 
55 | 
56 | @register_metric('r@5')
57 | def r_at_5(labels, predictions):
58 |     return recall_at_k(labels, predictions, k=5)
59 | 
60 | 
61 | @register_metric('r@10')
62 | def r_at_10(labels, predictions):
63 |     return recall_at_k(labels, predictions, k=10)
64 | 


--------------------------------------------------------------------------------
/deeppavlov/metrics/roc_auc_score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from typing import List, Union
17 | 
18 | import numpy as np
19 | import sklearn.metrics
20 | 
21 | from deeppavlov.core.common.metrics_registry import register_metric
22 | 
23 | 
24 | @register_metric('roc_auc')
25 | def roc_auc_score(y_true: Union[List[List[float]], List[List[int]], np.ndarray],
26 |                   y_pred: Union[List[List[float]], List[List[int]], np.ndarray]) -> float:
27 |     """
28 |     Compute Area Under the Curve (AUC) from prediction scores.
29 | 
30 |     Args:
31 |         y_true: true binary labels
32 |         y_pred: target scores, can either be probability estimates of the positive class
33 | 
34 |     Returns:
35 |         Area Under the Curve (AUC) from prediction scores
36 | 
37 |     Alias:
38 |         roc_auc
39 |     """
40 |     try:
41 |         return sklearn.metrics.roc_auc_score(np.squeeze(np.array(y_true)),
42 |                                              np.squeeze(np.array(y_pred)), average="macro")
43 |     except ValueError:
44 |         return 0.
45 | 


--------------------------------------------------------------------------------
/deeppavlov/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import nltk
18 | 
19 | from deeppavlov.core.common.prints import RedirectedPrints
20 | 
21 | if not os.environ.get('DP_SKIP_NLTK_DOWNLOAD'):
22 |     with RedirectedPrints():
23 |         nltk.download('punkt', quiet=True)
24 |         nltk.download('stopwords', quiet=True)
25 |         nltk.download('perluniprops', quiet=True)
26 |         nltk.download('nonbreaking_prefixes', quiet=True)
27 | 


--------------------------------------------------------------------------------
/deeppavlov/models/api_requester/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_requester import *
2 | 


--------------------------------------------------------------------------------
/deeppavlov/models/api_requester/api_router.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import concurrent
16 | from concurrent.futures import ProcessPoolExecutor
17 | from logging import getLogger
18 | from typing import List
19 | 
20 | from deeppavlov.core.common.registry import register
21 | from deeppavlov.core.models.component import Component
22 | from deeppavlov.models.api_requester import ApiRequester
23 | 
24 | logger = getLogger(__name__)
25 | 
26 | 
27 | @register("api_router")
28 | class ApiRouter(Component):
29 |     """A helper class for running multiple API requesters on the same data in parallel
30 | 
31 |     Args:
32 |         api_requesters: list of ApiRequester objects
33 |         n_workers: The maximum number of subprocesses to run
34 | 
35 |     Attributes:
36 |         api_requesters: list of ApiRequester objects
37 |         n_workers: The maximum number of subprocesses to run
38 |     """
39 | 
40 |     def __init__(self, api_requesters: List[ApiRequester], n_workers: int = 1, *args, **kwargs):
41 |         self.api_requesters = api_requesters
42 |         self.n_workers = n_workers
43 | 
44 |     def __call__(self, *args):
45 |         """
46 | 
47 |         Args:
48 |             *args: list of arguments to forward to the API requesters
49 | 
50 |         Returns:
51 |             results of the requests
52 |         """
53 |         with ProcessPoolExecutor(self.n_workers) as executor:
54 |             futures = [executor.submit(api_requester, *args) for api_requester
55 |                        in
56 |                        self.api_requesters]
57 | 
58 |             concurrent.futures.wait(futures)
59 |             results = []
60 |             for future, api_requester in zip(futures, self.api_requesters):
61 |                 result = future.result()
62 |                 if api_requester.out_count > 1:
63 |                     results += result
64 |                 else:
65 |                     results.append(result)
66 | 
67 |         return results
68 | 


--------------------------------------------------------------------------------
/deeppavlov/models/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/classifiers/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/doc_retrieval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/doc_retrieval/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/doc_retrieval/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, List
16 | 
17 | import nltk
18 | 
19 | from deeppavlov.core.common.registry import register
20 | 
21 | 
22 | @register('concat_lists')
23 | def concat_lists(list_a: List[List[Any]], list_b: List[List[Any]]):
24 |     list_u = []
25 |     for element_a, element_b in zip(list_a, list_b):
26 |         list_u.append(element_a + element_b)
27 |     return list_u
28 | 
29 | 
30 | def find_answer_sentence(answer_pos: int, context: str) -> str:
31 |     answer_sentence = ""
32 |     context_sentences = nltk.sent_tokenize(context)
33 |     start = 0
34 |     context_sentences_offsets = []
35 |     for sentence in context_sentences:
36 |         end = start + len(sentence)
37 |         context_sentences_offsets.append((start, end))
38 |         start = end + 1
39 | 
40 |     for sentence, (start_offset, end_offset) in zip(context_sentences, context_sentences_offsets):
41 |         if start_offset < answer_pos < end_offset:
42 |             answer_sentence = sentence
43 |             break
44 | 
45 |     return answer_sentence
46 | 


--------------------------------------------------------------------------------
/deeppavlov/models/embedders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/embedders/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/embedders/fasttext_embedder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import Iterator
17 | 
18 | import fasttext
19 | 
20 | import numpy as np
21 | 
22 | from deeppavlov.core.common.registry import register
23 | from deeppavlov.models.embedders.abstract_embedder import Embedder
24 | 
25 | log = getLogger(__name__)
26 | 
27 | 
28 | @register('fasttext')
29 | class FasttextEmbedder(Embedder):
30 |     """
31 |     Class implements fastText embedding model
32 | 
33 |     Args:
34 |         load_path: path where to load pre-trained embedding model from
35 |         pad_zero: whether to pad samples or not
36 | 
37 |     Attributes:
38 |         model: fastText model instance
39 |         tok2emb: dictionary with already embedded tokens
40 |         dim: dimension of embeddings
41 |         pad_zero: whether to pad sequence of tokens with zeros or not
42 |         load_path: path with pre-trained fastText binary model
43 |     """
44 | 
45 |     def _get_word_vector(self, w: str) -> np.ndarray:
46 |         return self.model.get_word_vector(w)
47 | 
48 |     def load(self) -> None:
49 |         """
50 |         Load fastText binary model from self.load_path
51 |         """
52 |         log.debug(f"[loading fastText embeddings from `{self.load_path}`]")
53 |         self.model = fasttext.load_model(str(self.load_path))
54 |         self.dim = self.model.get_dimension()
55 | 
56 |     def __iter__(self) -> Iterator[str]:
57 |         """
58 |         Iterate over all words from fastText model vocabulary
59 | 
60 |         Returns:
61 |             iterator
62 |         """
63 |         yield from self.model.get_words()
64 | 


--------------------------------------------------------------------------------
/deeppavlov/models/entity_extraction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/entity_extraction/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/kbqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/kbqa/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/morpho_syntax_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/morpho_syntax_parser/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/morpho_syntax_parser/dependency_decoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | import numpy as np
18 | from ufal.chu_liu_edmonds import chu_liu_edmonds
19 | 
20 | from deeppavlov.core.common.registry import register
21 | from deeppavlov.core.models.component import Component
22 | 
23 | 
24 | @register('chu_liu_edmonds_transformer')
25 | class ChuLiuEdmonds(Component):
26 |     """
27 |     A wrapper for Chu-Liu-Edmonds algorithm for maximum spanning tree
28 |     """
29 | 
30 |     def __init__(self, min_edge_prob=1e-6, **kwargs):
31 |         self.min_edge_prob = min_edge_prob
32 | 
33 |     def __call__(self, probs: List[np.ndarray]) -> List[List[int]]:
34 |         """Applies Chu-Liu-Edmonds algorithm to the matrix of head probabilities.
35 |         probs: a 3D-array of probabilities of shape B*L*(L+1)
36 |         """
37 |         answer = []
38 |         for elem in probs:
39 |             m, n = elem.shape
40 |             if n == m + 1:
41 |                 elem = np.log10(np.maximum(self.min_edge_prob, elem)) - np.log10(self.min_edge_prob)
42 |                 elem = np.concatenate([np.zeros_like(elem[:1, :]), elem], axis=0)
43 |                 # it makes impossible to create multiple edges 0->i
44 |                 elem[1:, 0] += np.log10(self.min_edge_prob) * len(elem)
45 |                 heads, _ = chu_liu_edmonds(elem.astype("float64"))
46 |                 answer.append(heads[1:])
47 |             else:
48 |                 raise ValueError("First and second axis lengths m, n of probs should satisfy the condition n == m + 1")
49 |         return answer
50 | 


--------------------------------------------------------------------------------
/deeppavlov/models/morpho_syntax_parser/spacy_lemmatizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | import spacy
18 | 
19 | from deeppavlov.core.common.registry import register
20 | from deeppavlov.core.models.component import Component
21 | 
22 | 
23 | @register('spacy_lemmatizer')
24 | class SpacyLemmatizer(Component):
25 |     def __init__(self, model: str, **kwargs):
26 |         self.nlp = spacy.load(model)
27 | 
28 |     def __call__(self, words_batch: List[List[str]]):
29 |         return [[self.nlp(word)[0].lemma_ for word in words_list] for words_list in words_batch]
30 | 


--------------------------------------------------------------------------------
/deeppavlov/models/preprocessors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/preprocessors/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/preprocessors/dnnc_preprocessor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import List, Tuple
17 | 
18 | import numpy as np
19 | 
20 | from deeppavlov.core.common.registry import register
21 | from deeppavlov.core.models.component import Component
22 | 
23 | log = getLogger(__name__)
24 | 
25 | 
26 | @register('dnnc_pair_generator')
27 | class PairGenerator(Component):
28 |     """
29 |     Generates all possible ordered pairs from 'texts_batch' and 'support_dataset'
30 |     
31 |     Args:
32 |         bidirectional: adds pairs in reverse order
33 |     """
34 | 
35 |     def __init__(self, bidirectional: bool = False, **kwargs) -> None:
36 |         self.bidirectional = bidirectional
37 | 
38 |     def __call__(self,
39 |                  texts: List[str],
40 |                  dataset: List[List[str]],
41 |                 ) -> Tuple[List[str], List[str], List[str], List[str]]:
42 |         hypotesis_batch = []
43 |         premise_batch = []
44 |         hypotesis_labels_batch = []
45 |         for [premise, [hypotesis, hypotesis_labels]] in zip(texts * len(dataset),
46 |                                                             np.repeat(dataset, len(texts), axis=0)):
47 |             premise_batch.append(premise)
48 |             hypotesis_batch.append(hypotesis)
49 |             hypotesis_labels_batch.append(hypotesis_labels)
50 | 
51 |             if self.bidirectional:
52 |                 premise_batch.append(hypotesis)
53 |                 hypotesis_batch.append(premise)
54 |                 hypotesis_labels_batch.append(hypotesis_labels)
55 |         return texts, hypotesis_batch, premise_batch, hypotesis_labels_batch
56 | 


--------------------------------------------------------------------------------
/deeppavlov/models/preprocessors/mask.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | 
17 | from deeppavlov.core.common.registry import register
18 | from deeppavlov.core.models.component import Component
19 | 
20 | 
21 | @register('mask')
22 | class Mask(Component):
23 |     """Takes a batch of tokens and returns the masks of corresponding length"""
24 |     def __init__(self, *args, **kwargs):
25 |         pass
26 | 
27 |     @staticmethod
28 |     def __call__(tokens_batch, **kwargs):
29 |         batch_size = len(tokens_batch)
30 |         max_len = max(len(utt) for utt in tokens_batch)
31 |         mask = np.zeros([batch_size, max_len], dtype=np.float32)
32 |         for n, utterance in enumerate(tokens_batch):
33 |             mask[n, :len(utterance)] = 1
34 | 
35 |         return mask
36 | 


--------------------------------------------------------------------------------
/deeppavlov/models/preprocessors/sentseg_preprocessor.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from deeppavlov.core.common.registry import register
 4 | 
 5 | 
 6 | @register("sentseg_restore_sent")
 7 | def SentSegRestoreSent(batch_words: List[List[str]], batch_tags: List[List[str]]) -> List[str]:
 8 |     ret = []
 9 |     for words, tags in zip(batch_words, batch_tags):
10 |         if len(tags) == 0:
11 |             ret.append("")
12 |             continue
13 |         sent = words[0]
14 |         punct = "" if tags[0] == "O" else tags[0][-1]
15 |         for word, tag in zip(words[1:], tags[1:]):
16 |             if tag != "O":
17 |                 sent += punct
18 |                 punct = tag[-1]
19 |             sent += " " + word
20 |         sent += punct
21 |         ret.append(sent)
22 | 
23 |     return ret
24 | 


--------------------------------------------------------------------------------
/deeppavlov/models/preprocessors/str_lower.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Union
16 | 
17 | from deeppavlov.core.common.registry import register
18 | 
19 | 
20 | @register('str_lower')
21 | def str_lower(batch: Union[str, list, tuple]):
22 |     """Recursively search for strings in a list and convert them to lowercase
23 | 
24 |     Args:
25 |         batch: a string or a list containing strings at some level of nesting
26 | 
27 |     Returns:
28 |         the same structure where all strings are converted to lowercase
29 |     """
30 |     if isinstance(batch, str):
31 |         return batch.lower()
32 |     else:
33 |         return list(map(str_lower, batch))
34 | 


--------------------------------------------------------------------------------
/deeppavlov/models/ranking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/ranking/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/ranking/metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | 
17 | from deeppavlov.core.common.metrics_registry import register_metric
18 | 
19 | 
20 | @register_metric('rank_response')
21 | def rank_response(y_true, y_pred):
22 |     num_examples = float(len(y_pred))
23 |     predictions = np.array(y_pred)
24 |     predictions = np.flip(np.argsort(predictions, -1), -1)
25 |     rank_tot = 0
26 |     for el in predictions:
27 |         for i, x in enumerate(el):
28 |             if x == 0:
29 |                 rank_tot += i
30 |                 break
31 |     return float(rank_tot) / num_examples
32 | 
33 | 
34 | @register_metric('r@1_insQA')
35 | def r_at_1_insQA(y_true, y_pred):
36 |     return recall_at_k_insQA(y_true, y_pred, k=1)
37 | 
38 | 
39 | def recall_at_k_insQA(y_true, y_pred, k):
40 |     labels = np.repeat(np.expand_dims(np.asarray(y_true), axis=1), k, axis=1)
41 |     predictions = np.array(y_pred)
42 |     predictions = np.flip(np.argsort(predictions, -1), -1)[:, :k]
43 |     flags = np.zeros_like(predictions)
44 |     for i in range(predictions.shape[0]):
45 |         for j in range(predictions.shape[1]):
46 |             if predictions[i][j] in np.arange(labels[i][j]):
47 |                 flags[i][j] = 1.
48 |     return np.mean((np.sum(flags, -1) >= 1.).astype(float))
49 | 


--------------------------------------------------------------------------------
/deeppavlov/models/relation_extraction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/relation_extraction/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/relation_extraction/losses.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is copied from ATLOP algorithm (https://github.com/wzhouad/ATLOP/blob/main/losses.py)
 3 | """
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch import Tensor
 9 | 
10 | 
11 | class ATLoss(nn.Module):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     def forward(self, logits: Tensor, labels: Tensor) -> float:
16 |         """
17 |         Args:
18 |             logits: predicted probabilities (shape: batch size x num classes)
19 |             labels: one-hot encoded true labels (shape: batch size x num classes)
20 |         """
21 | 
22 |         # TH label
23 |         th_label = torch.zeros_like(labels, dtype=torch.float).to(labels)
24 |         th_label[:, 0] = 1.0
25 |         labels[:, 0] = 0.0
26 | 
27 |         p_mask = labels + th_label          # = 1 for the gold labels + for 0 (negative) class, 0 otherwise
28 |         n_mask = 1 - labels         # = 0 for the gold labels, 1 otherwise
29 | 
30 |         # Rank positive classes to TH
31 |         logit1 = logits - (1 - p_mask) * 1e30   # org logits remain for gold labels + 0 class, others are reduced by 1
32 |         loss1 = -(F.log_softmax(logit1, dim=-1) * labels).sum(1)
33 | 
34 |         # Rank TH to negative classes
35 |         logit2 = logits - (1 - n_mask) * 1e30  # org logits remain for not gold and not 0-class, others are reduced by 1
36 |         loss2 = -(F.log_softmax(logit2, dim=-1) * th_label).sum(1)
37 | 
38 |         # Sum two parts
39 |         loss = loss1 + loss2
40 |         loss = loss.mean()
41 |         return loss
42 | 
43 |     def get_label(self, logits: Tensor, num_labels: int = -1, threshold: float = None) -> Tensor:
44 |         """ Calculated the labels """
45 |         if threshold:
46 |             th_logit = torch.full((len(logits), 1), threshold)
47 |         else:
48 |             th_logit = logits[:, 0].unsqueeze(1)        # vector of predicted probabilities for class 0 (negative class)
49 |         output = torch.zeros_like(logits).to(logits)
50 |         mask = (logits > th_logit)    # for each sample: True, if prob for a class > prob for neg class, False otherwise
51 |         if num_labels > 0:
52 |             top_v, _ = torch.topk(logits, num_labels, dim=1)        # len(num_labels) max elements; sorted
53 |             top_v = top_v[:, -1]            # the smallest pro for each sample
54 |             mask = (logits >= top_v.unsqueeze(1)) & mask    # mask + additionally: logits should be bigger than minimum
55 |         output[mask] = 1.0
56 |         output[:, 0] = (output.sum(1) == 0.).to(logits)         # no relation if no label matched
57 |         return output
58 | 


--------------------------------------------------------------------------------
/deeppavlov/models/sklearn/__init__.py:
--------------------------------------------------------------------------------
1 | from .sklearn_component import *
2 | 


--------------------------------------------------------------------------------
/deeppavlov/models/spelling_correction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/spelling_correction/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/spelling_correction/brillmoore/__init__.py:
--------------------------------------------------------------------------------
1 | from .error_model import ErrorModel
2 | 


--------------------------------------------------------------------------------
/deeppavlov/models/spelling_correction/electors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/spelling_correction/electors/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/spelling_correction/electors/top1_elector.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import List, Tuple
17 | 
18 | from deeppavlov.core.common.registry import register
19 | from deeppavlov.core.models.component import Component
20 | 
21 | logger = getLogger(__name__)
22 | 
23 | 
24 | @register('top1_elector')
25 | class TopOneElector(Component):
26 |     """Component that chooses a candidate with highest base probability for every token
27 | 
28 |     """
29 | 
30 |     def __init__(self, *args, **kwargs):
31 |         pass
32 | 
33 |     def __call__(self, batch: List[List[List[Tuple[float, str]]]]) -> List[List[str]]:
34 |         """Choose the best candidate for every token
35 | 
36 |         Args:
37 |             batch: batch of probabilities and string values of candidates for every token in a sentence
38 | 
39 |         Returns:
40 |             batch of corrected tokenized sentences
41 |         """
42 |         return [[max(sublist)[1] for sublist in candidates] for candidates in batch]
43 | 


--------------------------------------------------------------------------------
/deeppavlov/models/spelling_correction/levenshtein/__init__.py:
--------------------------------------------------------------------------------
1 | from .searcher_component import LevenshteinSearcherComponent
2 | 


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/tokenizers/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/lazy_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | 
17 | from nltk import word_tokenize
18 | 
19 | from deeppavlov.core.common.registry import register
20 | 
21 | log = getLogger(__name__)
22 | 
23 | 
24 | @register('lazy_tokenizer')
25 | def lazy_tokenizer(batch):
26 |     """Tokenizes if there is something to tokenize."""
27 | 
28 |     if len(batch) > 0 and isinstance(batch[0], str):
29 |         batch = [word_tokenize(utt) for utt in batch]
30 |     return batch
31 | 


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/nltk_moses_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import Union, List
15 | 
16 | from sacremoses import MosesDetokenizer, MosesTokenizer
17 | 
18 | from deeppavlov.core.common.registry import register
19 | from deeppavlov.core.models.component import Component
20 | 
21 | 
22 | @register("nltk_moses_tokenizer")
23 | class NLTKMosesTokenizer(Component):
24 |     """Class for splitting texts on tokens using NLTK wrapper over MosesTokenizer
25 | 
26 |     Attributes:
27 |         escape: whether escape characters for use in html markup
28 |         tokenizer: tokenizer instance from nltk.tokenize.moses
29 |         detokenizer: detokenizer instance from nltk.tokenize.moses
30 | 
31 |     Args:
32 |         escape: whether escape characters for use in html markup
33 |     """
34 | 
35 |     def __init__(self, escape: bool = False, *args, **kwargs):
36 |         self.escape = escape
37 |         self.tokenizer = MosesTokenizer()
38 |         self.detokenizer = MosesDetokenizer()
39 | 
40 |     def __call__(self, batch: List[Union[str, List[str]]]) -> List[Union[List[str], str]]:
41 |         """Tokenize given batch of strings or detokenize given batch of lists of tokens
42 | 
43 |         Args:
44 |             batch: list of text samples or list of lists of tokens
45 | 
46 |         Returns:
47 |             list of lists of tokens or list of text samples
48 |         """
49 |         if isinstance(batch[0], str):
50 |             return [self.tokenizer.tokenize(line, escape=self.escape) for line in batch]
51 |         else:
52 |             return [self.detokenizer.detokenize(line, return_str=True, unescape=self.escape)
53 |                     for line in batch]
54 | 


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/nltk_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | import nltk
18 | 
19 | from deeppavlov.core.common.registry import register
20 | from deeppavlov.core.models.component import Component
21 | 
22 | 
23 | @register("nltk_tokenizer")
24 | class NLTKTokenizer(Component):
25 |     """Class for splitting texts on tokens using NLTK
26 | 
27 |     Args:
28 |         tokenizer: tokenization mode for `nltk.tokenize`
29 |         download: whether to download nltk data
30 | 
31 |     Attributes:
32 |         tokenizer: tokenizer instance from nltk.tokenizers
33 |     """
34 | 
35 |     def __init__(self, tokenizer: str = "wordpunct_tokenize", download: bool = False,
36 |                  *args, **kwargs):
37 |         if download:
38 |             nltk.download()
39 |         self.tokenizer = getattr(nltk.tokenize, tokenizer, None)
40 |         if not callable(self.tokenizer):
41 |             raise AttributeError("Tokenizer {} is not defined in nltk.tokenizer".format(tokenizer))
42 | 
43 |     def __call__(self, batch: List[str]) -> List[List[str]]:
44 |         """Tokenize given batch
45 | 
46 |         Args:
47 |             batch: list of text samples
48 | 
49 |         Returns:
50 |             list of lists of tokens
51 |         """
52 |         return [self.tokenizer(sent) for sent in batch]
53 | 


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/split_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import List
16 | 
17 | from deeppavlov.core.common.registry import register
18 | from deeppavlov.core.models.component import Component
19 | 
20 | 
21 | @register("split_tokenizer")
22 | class SplitTokenizer(Component):
23 |     """
24 |     Generates utterance's tokens by mere python's ``str.split()``.
25 | 
26 |     Doesn't have any parameters.
27 |     """
28 | 
29 |     def __init__(self, **kwargs) -> None:
30 |         pass
31 | 
32 |     def __call__(self, batch: List[str]) -> List[List[str]]:
33 |         """
34 |         Tokenize given batch
35 | 
36 |         Args:
37 |             batch: list of texts to tokenize
38 | 
39 |         Returns:
40 |             tokenized batch
41 |         """
42 |         if isinstance(batch, (list, tuple)):
43 |             return [sample.split() for sample in batch]
44 |         else:
45 |             raise NotImplementedError('not implemented for types other than'
46 |                                       ' list or tuple')
47 | 


--------------------------------------------------------------------------------
/deeppavlov/models/tokenizers/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | from typing import List, Generator, Any
17 | 
18 | 
19 | def detokenize(tokens):
20 |     """
21 |     Detokenizing a text undoes the tokenizing operation, restores
22 |     punctuation and spaces to the places that people expect them to be.
23 |     Ideally, `detokenize(tokenize(text))` should be identical to `text`,
24 |     except for line breaks.
25 |     """
26 |     text = ' '.join(tokens)
27 |     step0 = text.replace('. . .', '...')
28 |     step1 = step0.replace("`` ", '"').replace(" ''", '"')
29 |     step2 = step1.replace(" ( ", " (").replace(" ) ", ") ")
30 |     step3 = re.sub(r' ([.,:;?!%]+)([ \'"`])', r"\1\2", step2)
31 |     step4 = re.sub(r' ([.,:;?!%]+)$', r"\1", step3)
32 |     step5 = step4.replace(" '", "'").replace(" n't", "n't") \
33 |         .replace(" nt", "nt").replace("can not", "cannot")
34 |     step6 = step5.replace(" ` ", " '")
35 |     return step6.strip()
36 | 
37 | 
38 | def ngramize(items: List[str], ngram_range=(1, 1), doc: str = None) -> Generator[List[str], Any, None]:
39 |     """
40 |     Make ngrams from a list of tokens/lemmas
41 |     :param items: list of tokens, lemmas or other strings to form ngrams
42 |     :param ngram_range: range for producing ngrams, ex. for unigrams + bigrams should be set to
43 |     (1, 2), for bigrams only should be set to (2, 2)
44 |     :return: ngrams (as strings) generator
45 |     """
46 | 
47 |     ngrams = []
48 |     ranges = [(0, i) for i in range(ngram_range[0], ngram_range[1] + 1)]
49 |     for r in ranges:
50 |         ngrams += list(zip(*[items[j:] for j in range(*r)]))
51 | 
52 |     formatted_ngrams = [' '.join(item) for item in ngrams]
53 |     if doc is not None:
54 |         doc_lower = doc.lower()
55 |         formatted_ngrams = [ngram for ngram in formatted_ngrams if (ngram in doc or ngram in doc_lower)]
56 | 
57 |     yield formatted_ngrams
58 | 


--------------------------------------------------------------------------------
/deeppavlov/models/torch_bert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/models/torch_bert/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/models/torch_bert/crf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torchcrf import CRF as CRFbase
 5 | 
 6 | 
 7 | class CRF(CRFbase):
 8 |     """Class with Conditional Random Field from PyTorch-CRF library
 9 |        with modified training function
10 |     """
11 | 
12 |     def __init__(self, num_tags: int, batch_first: bool = False) -> None:
13 |         super().__init__(num_tags=num_tags, batch_first=batch_first)
14 |         nn.init.zeros_(self.transitions)
15 |         nn.init.zeros_(self.start_transitions)
16 |         nn.init.zeros_(self.end_transitions)
17 |         self.stats = torch.zeros((num_tags, num_tags), dtype=torch.float)
18 |         self.zeros = torch.zeros((num_tags, num_tags), dtype=torch.float)
19 |         self.neg = torch.full((num_tags, num_tags), -1000.0)
20 | 
21 |     def forward(self, tags_batch: torch.LongTensor, y_masks: np.ndarray):
22 |         seq_lengths = np.sum(y_masks, axis=1)
23 |         for seq_len, tags_list in zip(seq_lengths, tags_batch):
24 |             if seq_len > 1:
25 |                 for i in range(seq_len - 1):
26 |                     self.stats[int(tags_list[i])][int(tags_list[i + 1])] += 1.0
27 |         with torch.no_grad():
28 |             self.transitions.copy_(torch.where(self.stats > 0, self.zeros, self.neg))
29 | 


--------------------------------------------------------------------------------
/deeppavlov/models/vectorizers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/datasets.txt:
--------------------------------------------------------------------------------
1 | datasets>=1.16.0,<2.5.0;python_version<="3.10"
2 | datasets==2.2.*;python_version=="3.11.*"
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/dependency_decoding.txt:
--------------------------------------------------------------------------------
1 | ufal.chu-liu-edmonds
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/en_core_web_sm.txt:
--------------------------------------------------------------------------------
1 | https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
2 | spacy
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/faiss.txt:
--------------------------------------------------------------------------------
1 | faiss-cpu==1.7.2;python_version<="3.10"
2 | faiss-cpu==1.7.4;python_version=="3.11.*"
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/fasttext.txt:
--------------------------------------------------------------------------------
1 | fasttext==0.9.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/hdt.txt:
--------------------------------------------------------------------------------
1 | hdt==2.3
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/kenlm.txt:
--------------------------------------------------------------------------------
1 | pypi-kenlm==0.1.20220713;python_version<="3.10"
2 | kenlm==0.2.*;python_version=="3.11.*"
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/lxml.txt:
--------------------------------------------------------------------------------
1 | lxml==4.9.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/opt_einsum.txt:
--------------------------------------------------------------------------------
1 | opt-einsum==3.3.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/protobuf.txt:
--------------------------------------------------------------------------------
1 | protobuf<=3.20
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/pytorch.txt:
--------------------------------------------------------------------------------
1 | torch>=1.6.0,<1.14.0
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/rapidfuzz.txt:
--------------------------------------------------------------------------------
1 | rapidfuzz==2.1.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/razdel.txt:
--------------------------------------------------------------------------------
1 | razdel==0.5.0
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/ru_core_news_sm.txt:
--------------------------------------------------------------------------------
1 | https://github.com/explosion/spacy-models/releases/download/ru_core_news_sm-3.5.0/ru_core_news_sm-3.5.0-py3-none-any.whl
2 | spacy
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/sacremoses.txt:
--------------------------------------------------------------------------------
1 | sacremoses==0.0.53
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/sentencepiece.txt:
--------------------------------------------------------------------------------
1 | sentencepiece==0.2.0
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/slovnet.txt:
--------------------------------------------------------------------------------
1 | slovnet==0.5.*
2 | navec
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/sortedcontainers.txt:
--------------------------------------------------------------------------------
1 | sortedcontainers==2.4.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/torchcrf.txt:
--------------------------------------------------------------------------------
1 | pytorch-crf==0.7.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/transformers.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.13.0,<4.25.0;python_version<"3.8"
2 | transformers==4.30.0;python_version>="3.8"
3 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/udapi.txt:
--------------------------------------------------------------------------------
1 | udapi==0.3.*
2 | 


--------------------------------------------------------------------------------
/deeppavlov/requirements/whapi.txt:
--------------------------------------------------------------------------------
1 | bs4
2 | whapi==0.6.*
3 | 


--------------------------------------------------------------------------------
/deeppavlov/settings.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | from deeppavlov.core.common.paths import get_settings_path, populate_settings_dir
18 | 
19 | parser = argparse.ArgumentParser()
20 | 
21 | parser.add_argument("-d", "--default", action="store_true", help="return to defaults")
22 | 
23 | 
24 | def main():
25 |     """DeepPavlov console configuration utility."""
26 |     args = parser.parse_args()
27 |     path = get_settings_path()
28 | 
29 |     if args.default:
30 |         if populate_settings_dir(force=True):
31 |             print(f'Populated {path} with default settings files')
32 |         else:
33 |             print(f'{path} is already a default settings directory')
34 |     else:
35 |         print(f'Current DeepPavlov settings path: {path}')
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/utils/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/utils/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/utils/benchmarks/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/utils/connector/__init__.py:
--------------------------------------------------------------------------------
1 | from .dialog_logger import DialogLogger
2 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/pip_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | from .pip_wrapper import *
2 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/server/__init__.py:
--------------------------------------------------------------------------------
1 | from .server import get_server_params, get_ssl_params, redirect_root_to_docs, start_model_server
2 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/server/metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | from typing import Tuple
17 | 
18 | from prometheus_client import CONTENT_TYPE_LATEST, REGISTRY, generate_latest
19 | from prometheus_client import Counter, Gauge, Histogram
20 | from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
21 | from starlette.requests import Request
22 | from starlette.responses import Response
23 | from starlette.types import ASGIApp
24 | 
25 | REQUESTS_COUNT = Counter('http_requests_count', 'Number of processed requests', ['endpoint', 'status_code'])
26 | REQUESTS_LATENCY = Histogram('http_requests_latency_seconds', 'Request latency histogram', ['endpoint'])
27 | REQUESTS_IN_PROGRESS = Gauge('http_requests_in_progress', 'Number of requests currently being processed', ['endpoint'])
28 | 
29 | 
30 | def metrics(request: Request) -> Response:
31 |     return Response(generate_latest(REGISTRY), media_type=CONTENT_TYPE_LATEST)
32 | 
33 | 
34 | class PrometheusMiddleware(BaseHTTPMiddleware):
35 |     def __init__(self, app: ASGIApp, ignore_paths: Tuple = ()) -> None:
36 |         super().__init__(app)
37 |         self.ignore_paths = ignore_paths
38 | 
39 |     async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
40 |         endpoint = request.url.path
41 | 
42 |         if endpoint in self.ignore_paths:
43 |             return await call_next(request)
44 | 
45 |         REQUESTS_IN_PROGRESS.labels(endpoint=endpoint).inc()
46 | 
47 |         start_time = time.perf_counter()
48 |         status_code = 500
49 | 
50 |         try:
51 |             response = await call_next(request)
52 |             status_code = response.status_code
53 |         finally:
54 |             if status_code == 200:
55 |                 duration = time.perf_counter() - start_time
56 |                 REQUESTS_LATENCY.labels(endpoint=endpoint).observe(duration)
57 |             REQUESTS_COUNT.labels(endpoint=endpoint, status_code=status_code).inc()
58 |             REQUESTS_IN_PROGRESS.labels(endpoint=endpoint).dec()
59 | 
60 |         return response
61 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/utils/settings/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/utils/settings/dialog_logger_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "enabled": false,
3 |   "logger_name": "default",
4 |   "log_path": "~/.deeppavlov/dialog_logs",
5 |   "logfile_max_size_kb": 10240,
6 |   "ensure_ascii": false
7 | }


--------------------------------------------------------------------------------
/deeppavlov/utils/settings/log_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": 1,
 3 |   "disable_existing_loggers": false,
 4 |   "loggers": {
 5 |     "deeppavlov": {
 6 |       "level": "INFO",
 7 |       "handlers": [
 8 |         "stderr"
 9 |       ],
10 |       "propagate": true
11 |     },
12 |     "uvicorn.access": {
13 |       "level": "INFO",
14 |       "handlers": [
15 |         "uvicorn_handler"
16 |       ],
17 |       "propagate": true
18 |     },
19 |     "uvicorn.error": {
20 |       "level": "INFO",
21 |       "handlers": [
22 |         "uvicorn_handler"
23 |       ],
24 |       "propagate": true
25 |     },
26 |     "train_report": {
27 |       "level": "INFO",
28 |       "handlers": [
29 |         "train_handler"
30 |       ],
31 |       "propagate": true
32 |     },
33 |     "filelock": {
34 |       "level": "WARNING",
35 |       "handlers": [
36 |         "stdout"
37 |       ],
38 |       "propagate": true
39 |     }
40 |   },
41 |   "formatters": {
42 |     "default": {
43 |       "format": "%(asctime)s.%(msecs)d %(levelname)s in '%(name)s'['%(module)s'] at line %(lineno)d: %(message)s",
44 |       "datefmt": "%Y-%m-%d %H:%M:%S"
45 |     },
46 |     "uvicorn_fmt": {
47 |       "format": "%(asctime)s %(message)s",
48 |       "datefmt": "%Y-%m-%d %H:%M:%S"
49 |     },
50 |     "message": {
51 |       "format": "%(message)s"
52 |     }
53 |   },
54 |   "handlers": {
55 |     "file": {
56 |       "class": "logging.FileHandler",
57 |       "level": "DEBUG",
58 |       "formatter": "default",
59 |       "filename": "~/.deeppavlov/log.log"
60 |     },
61 |     "stdout": {
62 |       "class": "logging.StreamHandler",
63 |       "level": "DEBUG",
64 |       "formatter": "default",
65 |       "stream": "ext://sys.stdout"
66 |     },
67 |     "stderr": {
68 |       "class": "logging.StreamHandler",
69 |       "level": "DEBUG",
70 |       "formatter": "default",
71 |       "stream": "ext://sys.stderr"
72 |     },
73 |     "uvicorn_handler": {
74 |       "class": "logging.StreamHandler",
75 |       "level": "INFO",
76 |       "formatter": "uvicorn_fmt",
77 |       "stream": "ext://sys.stdout",
78 |       "filters": ["probeFilter"]
79 |     },
80 |     "train_handler": {
81 |       "class": "logging.StreamHandler",
82 |       "level": "INFO",
83 |       "formatter": "message",
84 |       "stream": "ext://sys.stdout"
85 |     }
86 |   },
87 |   "filters": {
88 |     "probeFilter": {
89 |       "()": "deeppavlov.core.common.log.ProbeFilter"
90 |     }
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/settings/server_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "common_defaults": {
 3 |     "host": "0.0.0.0",
 4 |     "port": 5000,
 5 |     "model_args_names": [],
 6 |     "https": false,
 7 |     "https_cert_path": "",
 8 |     "https_key_path": "",
 9 |     "socket_type": "TCP",
10 |     "unix_socket_file": "/tmp/deeppavlov_socket.s",
11 |     "socket_launch_message": "launching socket server at"
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/deeppavlov/utils/socket/__init__.py:
--------------------------------------------------------------------------------
1 | from .socket import encode, start_socket_server
2 | 


--------------------------------------------------------------------------------
/deeppavlov/vocabs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/deeppavlov/vocabs/__init__.py


--------------------------------------------------------------------------------
/deeppavlov/vocabs/wiki_sqlite.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from logging import getLogger
16 | from typing import List, Any, Optional, Union
17 | 
18 | from deeppavlov.core.common.registry import register
19 | from deeppavlov.core.models.component import Component
20 | from deeppavlov.dataset_iterators.sqlite_iterator import SQLiteDataIterator
21 | 
22 | logger = getLogger(__name__)
23 | 
24 | 
25 | @register('wiki_sqlite_vocab')
26 | class WikiSQLiteVocab(SQLiteDataIterator, Component):
27 |     """Get content from SQLite database by document ids.
28 | 
29 |     Args:
30 |         load_path: a path to local DB file
31 |         join_docs: whether to join extracted docs with ' ' or not
32 |         shuffle: whether to shuffle data or not
33 | 
34 |     Attributes:
35 |         join_docs: whether to join extracted docs with ' ' or not
36 | 
37 |     """
38 | 
39 |     def __init__(self, load_path: str, join_docs: bool = True, shuffle: bool = False, **kwargs) -> None:
40 |         SQLiteDataIterator.__init__(self, load_path=load_path, shuffle=shuffle)
41 |         self.join_docs = join_docs
42 | 
43 |     def __call__(self, doc_ids: Optional[List[List[Any]]] = None, *args, **kwargs) -> List[Union[str, List[str]]]:
44 |         """Get the contents of files, stacked by space or as they are.
45 | 
46 |         Args:
47 |             doc_ids: a batch of lists of ids to get contents for
48 | 
49 |         Returns:
50 |             a list of contents / list of lists of contents
51 |         """
52 |         all_contents = []
53 |         if not doc_ids:
54 |             logger.warning('No doc_ids are provided in WikiSqliteVocab, return all docs')
55 |             doc_ids = [self.get_doc_ids()]
56 | 
57 |         for ids in doc_ids:
58 |             contents = [self.get_doc_content(doc_id) for doc_id in ids]
59 |             if self.join_docs:
60 |                 contents = ' '.join(contents)
61 |             all_contents.append(contents)
62 | 
63 |         return all_contents
64 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    = -WT
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = DeepPavlov
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/01_login_to_aws.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/01_login_to_aws.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/02_choose_ubuntu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/02_choose_ubuntu.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/03_select_instance_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/03_select_instance_type.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/04_add_storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/04_add_storage.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/05_review_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/05_review_instance.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/06_go_to_running_instances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/06_go_to_running_instances.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/07_wait_init.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/07_wait_init.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/08_01_set_sec_group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/08_01_set_sec_group.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/08_02_set_inbound.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/08_02_set_inbound.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/09_01_select_connect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/09_01_select_connect.png


--------------------------------------------------------------------------------
/docs/_static/aws_ec2/09_02_connection_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/aws_ec2/09_02_connection_info.png


--------------------------------------------------------------------------------
/docs/_static/deeppavlov.css:
--------------------------------------------------------------------------------
 1 | .wy-side-nav-search {
 2 |     background-color: #0176bd;
 3 | }
 4 | 
 5 | .wy-nav-content {
 6 |     max-width: 1000px;
 7 | }
 8 | 
 9 | .wy-side-nav-search>div.version {
10 |     color: #ffffff;
11 | }


--------------------------------------------------------------------------------
/docs/_static/deeppavlov.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/deeppavlov.png


--------------------------------------------------------------------------------
/docs/_static/deeppavlov_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/deeppavlov_logo.png


--------------------------------------------------------------------------------
/docs/_static/dp_agnt_diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/dp_agnt_diag.png


--------------------------------------------------------------------------------
/docs/_static/gobot_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/gobot_diagram.png


--------------------------------------------------------------------------------
/docs/_static/ipavlov_footer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/ipavlov_footer.png


--------------------------------------------------------------------------------
/docs/_static/kvret_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/kvret_diagram.png


--------------------------------------------------------------------------------
/docs/_static/my_blocks.css:
--------------------------------------------------------------------------------
 1 | button.copybtn svg {
 2 |     width: 1.3em;
 3 |     height: 1.3em;
 4 |     padding: 0.1em;
 5 | }
 6 | 
 7 | button.copybtn {
 8 |     top: 0.2em;
 9 |     width: 1.4em;
10 |     height: 1.4em;   
11 | }
12 | 
13 | .rst-content .linenodiv pre, .rst-content div[class^=highlight] pre, .rst-content pre.literal-block {
14 |     font-size: 13px;
15 |     line-height: 1.4;
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/_static/social/Medium_Monogram.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg width="195px" height="195px" viewBox="0 0 195 195" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 46.2 (44496) - http://www.bohemiancoding.com/sketch -->
 4 |     <title>Monogram</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <defs></defs>
 7 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 8 |         <g id="Monogram" fill-rule="nonzero">
 9 |             <rect id="Rectangle-path" fill="#12100E" x="0" y="0" width="195" height="195"></rect>
10 |             <path d="M46.5340803,65.2157554 C46.6968378,63.6076572 46.0836,62.018231 44.8828198,60.93592 L32.6512605,46.2010582 L32.6512605,44 L70.6302521,44 L99.9859944,108.380952 L125.794585,44 L162,44 L162,46.2010582 L151.542017,56.2281011 C150.640424,56.9153477 150.193188,58.0448862 150.380019,59.1628454 L150.380019,132.837155 C150.193188,133.955114 150.640424,135.084652 151.542017,135.771899 L161.755369,145.798942 L161.755369,148 L110.38282,148 L110.38282,145.798942 L120.963119,135.527337 C122.002801,134.487948 122.002801,134.182246 122.002801,132.592593 L122.002801,73.0417402 L92.585901,147.755438 L88.6106443,147.755438 L54.3622782,73.0417402 L54.3622782,123.115814 C54.0767278,125.221069 54.7759199,127.3406 56.2581699,128.863022 L70.0186741,145.55438 L70.0186741,147.755438 L31,147.755438 L31,145.55438 L44.7605042,128.863022 C46.2319621,127.338076 46.8903838,125.204485 46.5340803,123.115814 L46.5340803,65.2157554 Z" id="Shape" fill="#FFFFFF"></path>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/social/Twitter_Social_Icon_Circle_Color.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 20.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 400 400" style="enable-background:new 0 0 400 400;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#1DA1F2;}
 7 | 	.st1{fill:#FFFFFF;}
 8 | </style>
 9 | <g id="Dark_Blue">
10 | 	<circle class="st0" cx="200" cy="200" r="200"/>
11 | </g>
12 | <g id="Logo__x2014__FIXED">
13 | 	<path class="st1" d="M163.4,305.5c88.7,0,137.2-73.5,137.2-137.2c0-2.1,0-4.2-0.1-6.2c9.4-6.8,17.6-15.3,24.1-25
14 | 		c-8.6,3.8-17.9,6.4-27.7,7.6c10-6,17.6-15.4,21.2-26.7c-9.3,5.5-19.6,9.5-30.6,11.7c-8.8-9.4-21.3-15.2-35.2-15.2
15 | 		c-26.6,0-48.2,21.6-48.2,48.2c0,3.8,0.4,7.5,1.3,11c-40.1-2-75.6-21.2-99.4-50.4c-4.1,7.1-6.5,15.4-6.5,24.2
16 | 		c0,16.7,8.5,31.5,21.5,40.1c-7.9-0.2-15.3-2.4-21.8-6c0,0.2,0,0.4,0,0.6c0,23.4,16.6,42.8,38.7,47.3c-4,1.1-8.3,1.7-12.7,1.7
17 | 		c-3.1,0-6.1-0.3-9.1-0.9c6.1,19.2,23.9,33.1,45,33.5c-16.5,12.9-37.3,20.6-59.9,20.6c-3.9,0-7.7-0.2-11.5-0.7
18 | 		C110.8,297.5,136.2,305.5,163.4,305.5"/>
19 | </g>
20 | </svg>
21 | 


--------------------------------------------------------------------------------
/docs/_static/social/telegram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/social/telegram.png


--------------------------------------------------------------------------------
/docs/_static/social/youtube_social_circle_red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/social/youtube_social_circle_red.png


--------------------------------------------------------------------------------
/docs/_static/tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/docs/_static/tree.png


--------------------------------------------------------------------------------
/docs/apiref/core.rst:
--------------------------------------------------------------------------------
 1 | core
 2 | ====
 3 | DeepPavlov Core
 4 | 
 5 | .. automodule:: deeppavlov.core
 6 |    :members:
 7 | 
 8 | .. toctree::
 9 |    :glob:
10 |    :caption: Core
11 | 
12 |    core/*
13 | 


--------------------------------------------------------------------------------
/docs/apiref/core/commands.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.core.commands
 2 | ========================
 3 | Basic training and inference functions.
 4 | 
 5 | .. automodule:: deeppavlov.core.commands.infer
 6 |    :members:
 7 | 
 8 | .. automodule:: deeppavlov.core.commands.train
 9 |    :members:
10 | 


--------------------------------------------------------------------------------
/docs/apiref/core/common.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.core.common
 2 | ======================
 3 | Registration and classes initialization functionality, class method decorators.
 4 | 
 5 | .. autoclass:: deeppavlov.core.common.chainer.Chainer
 6 |    :members:
 7 | 
 8 |    .. automethod:: __call__
 9 | 
10 | .. autoclass:: deeppavlov.core.common.base.Element
11 | 
12 |     .. automethod:: __init__
13 | 
14 | .. autoclass:: deeppavlov.core.common.base.Model
15 | 
16 |     .. automethod:: __init__
17 | 
18 | .. automodule:: deeppavlov.core.common.metrics_registry
19 |    :members:
20 | 
21 | .. automodule:: deeppavlov.core.common.params
22 |    :members:
23 | 
24 | .. automodule:: deeppavlov.core.common.registry
25 |    :members:
26 | 


--------------------------------------------------------------------------------
/docs/apiref/core/data.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.core.data
 2 | ====================
 3 | DatasetReader, Vocab, DataLearningIterator and DataFittingIterator classes.
 4 | 
 5 | .. autoclass:: deeppavlov.core.data.dataset_reader.DatasetReader
 6 | 
 7 | .. autoclass:: deeppavlov.core.data.data_fitting_iterator.DataFittingIterator
 8 | 
 9 | .. autoclass:: deeppavlov.core.data.data_learning_iterator.DataLearningIterator
10 | 
11 | .. autoclass:: deeppavlov.core.data.simple_vocab.SimpleVocabulary
12 | 


--------------------------------------------------------------------------------
/docs/apiref/core/models.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.core.models
 2 | ======================
 3 | Abstract model classes and interfaces.
 4 | 
 5 | .. autoclass:: deeppavlov.core.models.component.Component
 6 | 
 7 | .. autoclass:: deeppavlov.core.models.serializable.Serializable
 8 | 
 9 | .. autoclass:: deeppavlov.core.models.estimator.Estimator
10 | 
11 | .. autoclass:: deeppavlov.core.models.nn_model.NNModel
12 | 
13 | .. autoclass:: deeppavlov.core.models.torch_model.TorchModel
14 | 


--------------------------------------------------------------------------------
/docs/apiref/core/trainers.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.core.trainers
 2 | ========================
 3 | Trainer classes.
 4 | 
 5 | .. autoclass:: deeppavlov.core.trainers.FitTrainer
 6 |    :members:
 7 | 
 8 | .. autoclass:: deeppavlov.core.trainers.NNTrainer
 9 |    :members:
10 |    :inherited-members:
11 | 


--------------------------------------------------------------------------------
/docs/apiref/dataset_iterators.rst:
--------------------------------------------------------------------------------
 1 | dataset_iterators
 2 | =================
 3 | Concrete DatasetIterator classes.
 4 | 
 5 | .. autoclass:: deeppavlov.dataset_iterators.basic_classification_iterator.BasicClassificationDatasetIterator
 6 |     :members:
 7 | 
 8 | .. autoclass:: deeppavlov.dataset_iterators.siamese_iterator.SiameseIterator
 9 | 
10 | .. autoclass:: deeppavlov.dataset_iterators.sqlite_iterator.SQLiteDataIterator
11 | 
12 | .. autoclass:: deeppavlov.dataset_iterators.squad_iterator.SquadIterator
13 | 
14 | .. automodule:: deeppavlov.dataset_iterators.typos_iterator
15 |     :members:
16 | 
17 | .. automodule:: deeppavlov.dataset_iterators.multitask_iterator
18 |     :members:
19 | 


--------------------------------------------------------------------------------
/docs/apiref/dataset_readers.rst:
--------------------------------------------------------------------------------
 1 | dataset_readers
 2 | ===============
 3 | Concrete DatasetReader classes.
 4 | 
 5 | .. autoclass:: deeppavlov.dataset_readers.basic_classification_reader.BasicClassificationDatasetReader
 6 |    :members:
 7 | 
 8 | .. autoclass:: deeppavlov.dataset_readers.conll2003_reader.Conll2003DatasetReader
 9 | 
10 | .. autoclass:: deeppavlov.dataset_readers.faq_reader.FaqDatasetReader
11 |    :members:
12 | 
13 | .. autoclass:: deeppavlov.dataset_readers.line_reader.LineReader
14 |    :members:
15 | 
16 | .. autoclass:: deeppavlov.dataset_readers.paraphraser_reader.ParaphraserReader
17 | 
18 | .. autoclass:: deeppavlov.dataset_readers.squad_dataset_reader.SquadDatasetReader
19 |    :members:
20 | 
21 | .. automodule:: deeppavlov.dataset_readers.typos_reader
22 |    :members:
23 | 
24 | .. automodule:: deeppavlov.dataset_readers.ubuntu_v2_reader
25 |    :members:
26 | 
27 | .. automodule:: deeppavlov.dataset_readers.multitask_reader
28 |    :members:
29 | 


--------------------------------------------------------------------------------
/docs/apiref/metrics.rst:
--------------------------------------------------------------------------------
 1 | metrics
 2 | =======
 3 | Different Metric functions.
 4 | 
 5 | .. automodule:: deeppavlov.metrics
 6 |    :members:
 7 | 
 8 | .. autofunction:: deeppavlov.metrics.accuracy.sets_accuracy
 9 | 
10 | .. autofunction:: deeppavlov.metrics.fmeasure.round_f1
11 | 
12 | .. autofunction:: deeppavlov.metrics.fmeasure.round_f1_macro
13 | 
14 | .. autofunction:: deeppavlov.metrics.fmeasure.round_f1_weighted
15 | 
16 | .. autofunction:: deeppavlov.metrics.fmeasure.ner_f1
17 | 
18 | .. autofunction:: deeppavlov.metrics.fmeasure.ner_token_f1
19 | 
20 | .. autofunction:: deeppavlov.metrics.log_loss.sk_log_loss
21 | 
22 | .. autofunction:: deeppavlov.metrics.roc_auc_score.roc_auc_score
23 | 


--------------------------------------------------------------------------------
/docs/apiref/models.rst:
--------------------------------------------------------------------------------
 1 | models
 2 | ======
 3 | Concrete Model classes.
 4 | 
 5 | .. automodule:: deeppavlov.models
 6 |    :members:
 7 | 
 8 | .. toctree::
 9 |    :glob:
10 |    :caption: Models
11 | 
12 |    models/*


--------------------------------------------------------------------------------
/docs/apiref/models/api_requester.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.api_requester
 2 | ===============================
 3 | 
 4 | .. automodule:: deeppavlov.models.api_requester
 5 |     :members:
 6 | 
 7 | .. autoclass:: deeppavlov.models.api_requester.api_requester.ApiRequester
 8 | 
 9 |     .. automethod:: __call__
10 |     .. automethod:: get_async_response
11 | 
12 | 
13 | .. autoclass:: deeppavlov.models.api_requester.api_router.ApiRouter
14 | 
15 |     .. automethod:: __call__
16 | 


--------------------------------------------------------------------------------
/docs/apiref/models/classifiers.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.classifiers
 2 | =============================
 3 | 
 4 | .. automodule:: deeppavlov.models.classifiers
 5 |    :members:
 6 | 
 7 | .. autoclass:: deeppavlov.models.classifiers.torch_classification_model.TorchTextClassificationModel
 8 |     :members:
 9 | 
10 |     .. automethod:: __call__
11 | 
12 | .. autoclass:: deeppavlov.models.classifiers.cos_sim_classifier.CosineSimilarityClassifier
13 |     :members:
14 | 
15 |     .. automethod:: __call__
16 | 
17 | .. autoclass:: deeppavlov.models.classifiers.proba2labels.Proba2Labels
18 |     :members:
19 | 
20 |     .. automethod:: __call__
21 | 


--------------------------------------------------------------------------------
/docs/apiref/models/doc_retrieval.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.doc_retrieval
 2 | ===============================
 3 | 
 4 | Document retrieval classes.
 5 | 
 6 | .. automodule:: deeppavlov.models.doc_retrieval
 7 | 
 8 | .. autoclass:: deeppavlov.models.doc_retrieval.tfidf_ranker.TfidfRanker
 9 |     :members:
10 | 
11 |     .. automethod:: __call__
12 | 
13 | .. autoclass:: deeppavlov.models.doc_retrieval.logit_ranker.LogitRanker
14 |     :members:
15 | 
16 |     .. automethod:: __call__
17 | 
18 | .. autoclass:: deeppavlov.models.doc_retrieval.pop_ranker.PopRanker
19 |     :members:
20 | 
21 |     .. automethod:: __call__


--------------------------------------------------------------------------------
/docs/apiref/models/embedders.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.embedders
 2 | ============================
 3 | 
 4 | .. autoclass:: deeppavlov.models.embedders.fasttext_embedder.FasttextEmbedder
 5 | 
 6 |    .. automethod:: __call__
 7 |    .. automethod:: __iter__
 8 | 
 9 | .. autoclass:: deeppavlov.models.embedders.tfidf_weighted_embedder.TfidfWeightedEmbedder
10 | 
11 |    .. automethod:: __call__
12 | 
13 | .. autoclass:: deeppavlov.models.embedders.transformers_embedder.TransformersBertEmbedder
14 | 
15 |    .. automethod:: __call__
16 | 


--------------------------------------------------------------------------------
/docs/apiref/models/entity_extraction.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.entity_extraction
 2 | ===================================
 3 | 
 4 | .. autoclass:: deeppavlov.models.entity_extraction.ner_chunker.NerChunker
 5 | 
 6 |     .. automethod:: __init__
 7 |     .. automethod:: __call__
 8 | 
 9 | .. autoclass:: deeppavlov.models.entity_extraction.entity_linking.EntityLinker
10 | 
11 |     .. automethod:: __init__
12 |     .. automethod:: __call__
13 | 
14 | .. autoclass:: deeppavlov.models.entity_extraction.entity_detection_parser.EntityDetectionParser
15 | 
16 |     .. automethod:: __init__
17 |     .. automethod:: __call__
18 | 
19 | .. autoclass:: deeppavlov.models.entity_extraction.entity_detection_parser.QuestionSignChecker
20 | 


--------------------------------------------------------------------------------
/docs/apiref/models/kbqa.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.kbqa
 2 | ======================
 3 | 
 4 | .. automodule:: deeppavlov.models.kbqa
 5 | 
 6 | .. autoclass:: deeppavlov.models.kbqa.type_define.AnswerTypesExtractor
 7 | 
 8 |     .. automethod:: __init__
 9 |     .. automethod:: __call__
10 | 
11 | .. autoclass:: deeppavlov.models.kbqa.query_generator.QueryGenerator
12 | 
13 |     .. automethod:: __init__
14 |     .. automethod:: __call__
15 | 
16 | .. autoclass:: deeppavlov.models.kbqa.query_generator_base.QueryGeneratorBase
17 | 
18 |     .. automethod:: __init__
19 |     .. automethod:: __call__
20 | 
21 | .. autoclass:: deeppavlov.models.kbqa.rel_ranking_infer.RelRankerInfer
22 | 
23 |     .. automethod:: __init__
24 |     .. automethod:: __call__
25 | 
26 | .. autoclass:: deeppavlov.models.kbqa.template_matcher.TemplateMatcher
27 | 
28 |     .. automethod:: __init__
29 |     .. automethod:: __call__
30 | 
31 | .. autoclass:: deeppavlov.models.kbqa.ru_adj_to_noun.RuAdjToNoun
32 | 
33 |     .. automethod:: __init__
34 |     .. automethod:: __call__
35 | 
36 | .. autoclass:: deeppavlov.models.kbqa.tree_to_sparql.TreeToSparql
37 | 
38 |     .. automethod:: __init__
39 |     .. automethod:: __call__
40 | 
41 | .. autoclass:: deeppavlov.models.kbqa.wiki_parser.WikiParser
42 | 
43 |     .. automethod:: __init__
44 |     .. automethod:: __call__
45 | 


--------------------------------------------------------------------------------
/docs/apiref/models/preprocessors.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.preprocessors
 2 | ===============================
 3 | 
 4 | .. autoclass:: deeppavlov.models.preprocessors.dirty_comments_preprocessor.DirtyCommentsPreprocessor
 5 | 
 6 |     .. automethod:: __call__
 7 | 
 8 | .. autoclass:: deeppavlov.models.preprocessors.mask.Mask
 9 | 
10 | .. autoclass:: deeppavlov.models.preprocessors.one_hotter.OneHotter
11 | 
12 | .. autoclass:: deeppavlov.models.preprocessors.sanitizer.Sanitizer
13 | 
14 | .. autofunction:: deeppavlov.models.preprocessors.str_lower.str_lower
15 | 
16 | .. autoclass:: deeppavlov.models.preprocessors.str_token_reverser.StrTokenReverser
17 | 
18 |     .. automethod:: __call__
19 | 
20 | .. autoclass:: deeppavlov.models.preprocessors.str_utf8_encoder.StrUTF8Encoder
21 | 
22 |     .. automethod:: __call__
23 | 
24 | .. autoclass:: deeppavlov.models.preprocessors.odqa_preprocessors.DocumentChunker
25 | 
26 |     .. automethod:: __call__
27 | 
28 | .. autoclass:: deeppavlov.models.preprocessors.odqa_preprocessors.StringMultiplier
29 | 
30 |     .. automethod:: __call__
31 | 


--------------------------------------------------------------------------------
/docs/apiref/models/relation_extraction.rst:
--------------------------------------------------------------------------------
1 | deeppavlov.models.relation_extraction
2 | =====================================
3 | 
4 | .. autoclass:: deeppavlov.models.relation_extraction.relation_extraction_bert.REBertModel
5 | 
6 |     .. automethod:: __init__
7 |     .. automethod:: __call__
8 |     .. automethod:: train_on_batch
9 | 


--------------------------------------------------------------------------------
/docs/apiref/models/sklearn.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.sklearn
 2 | =============================
 3 | 
 4 | .. automodule:: deeppavlov.models.sklearn
 5 |    :members:
 6 | 
 7 | .. autoclass:: deeppavlov.models.sklearn.sklearn_component.SklearnComponent
 8 | 
 9 |     .. automethod:: __call__
10 |     .. automethod:: fit
11 |     .. automethod:: init_from_scratch
12 |     .. automethod:: load
13 |     .. automethod:: save
14 |     .. automethod:: compose_input_data
15 |     .. automethod:: get_class_attributes
16 |     .. automethod:: get_function_params
17 | 


--------------------------------------------------------------------------------
/docs/apiref/models/spelling_correction.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.spelling_correction
 2 | =====================================
 3 | 
 4 | .. autoclass:: deeppavlov.models.spelling_correction.brillmoore.ErrorModel
 5 | 
 6 |     .. automethod:: __call__
 7 |     .. automethod:: fit
 8 |     .. automethod:: save
 9 |     .. automethod:: load
10 | 
11 | .. autoclass:: deeppavlov.models.spelling_correction.levenshtein.LevenshteinSearcherComponent
12 | 
13 |     .. automethod:: __call__
14 | 
15 | 
16 | .. autoclass:: deeppavlov.models.spelling_correction.electors.top1_elector.TopOneElector
17 | 
18 |     .. automethod:: __call__
19 | 
20 | .. autoclass:: deeppavlov.models.spelling_correction.electors.kenlm_elector.KenlmElector
21 | 
22 |     .. automethod:: __call__
23 | 


--------------------------------------------------------------------------------
/docs/apiref/models/tokenizers.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.tokenizers
 2 | ============================
 3 | 
 4 | .. autoclass:: deeppavlov.models.tokenizers.nltk_moses_tokenizer.NLTKMosesTokenizer
 5 | 
 6 |     .. automethod:: __call__
 7 | 
 8 | .. autoclass:: deeppavlov.models.tokenizers.nltk_tokenizer.NLTKTokenizer
 9 | 
10 |     .. automethod:: __call__
11 | 
12 | .. autoclass:: deeppavlov.models.tokenizers.split_tokenizer.SplitTokenizer
13 | 
14 | .. autoclass:: deeppavlov.models.tokenizers.spacy_tokenizer.StreamSpacyTokenizer
15 | 
16 |     .. automethod:: __call__


--------------------------------------------------------------------------------
/docs/apiref/models/torch_bert.rst:
--------------------------------------------------------------------------------
 1 | deeppavlov.models.torch_bert
 2 | ============================
 3 | 
 4 | .. automodule:: deeppavlov.models.torch_bert
 5 |    :members:
 6 | 
 7 | .. autoclass:: deeppavlov.models.preprocessors.torch_transformers_preprocessor.TorchTransformersPreprocessor
 8 | 
 9 |     .. automethod:: __call__
10 | 
11 | .. autoclass:: deeppavlov.models.preprocessors.torch_transformers_preprocessor.TorchTransformersNerPreprocessor
12 | 
13 |     .. automethod:: __call__
14 | 
15 | .. autoclass:: deeppavlov.models.preprocessors.torch_transformers_preprocessor.TorchBertRankerPreprocessor
16 | 
17 |     .. automethod:: __call__
18 | 
19 | .. autoclass:: deeppavlov.models.torch_bert.torch_transformers_classifier.TorchTransformersClassifierModel
20 | 
21 |     .. automethod:: __call__
22 |     .. automethod:: train_on_batch
23 | 
24 | .. autoclass:: deeppavlov.models.torch_bert.torch_transformers_sequence_tagger.TorchTransformersSequenceTagger
25 | 
26 |     .. automethod:: __call__
27 |     .. automethod:: train_on_batch
28 | 
29 | .. autoclass:: deeppavlov.models.torch_bert.torch_transformers_squad.TorchTransformersSquad
30 | 
31 |     .. automethod:: __call__
32 |     .. automethod:: train_on_batch
33 | 
34 | .. autoclass:: deeppavlov.models.torch_bert.torch_bert_ranker.TorchBertRankerModel
35 | 
36 |     .. automethod:: __call__
37 |     .. automethod:: train_on_batch
38 | 


--------------------------------------------------------------------------------
/docs/apiref/models/vectorizers.rst:
--------------------------------------------------------------------------------
1 | deeppavlov.models.vectorizers
2 | =============================
3 | 
4 | 
5 | .. autoclass:: deeppavlov.models.vectorizers.hashing_tfidf_vectorizer.HashingTfIdfVectorizer
6 |     :members:
7 | 
8 |     .. automethod:: __call__
9 | 


--------------------------------------------------------------------------------
/docs/apiref/vocabs.rst:
--------------------------------------------------------------------------------
 1 | vocabs
 2 | ======
 3 | Concrete Vocab classes.
 4 | 
 5 | .. automodule:: deeppavlov.vocabs
 6 |    :members:
 7 | 
 8 | .. autoclass:: deeppavlov.vocabs.wiki_sqlite.WikiSQLiteVocab
 9 |    :members:
10 | 
11 |    .. automethod:: __call__
12 | 
13 | .. automodule:: deeppavlov.vocabs.typos
14 |    :members:
15 | 


--------------------------------------------------------------------------------
/docs/devguides/registry.rst:
--------------------------------------------------------------------------------
 1 | Register your model
 2 | ===================
 3 | 
 4 | In order to extend the library, you need to register your classes and functions; it is done in two steps.
 5 | 
 6 | 1. Decorate your :class:`~deeppavlov.core.models.component.Component`
 7 |    (or :class:`~deeppavlov.core.data.dataset_reader.DatasetReader`,
 8 |    or :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator`,
 9 |    or :class:`~deeppavlov.core.data.data_fitting_iterator.DataFittingIterator`)
10 |    using :func:`~deeppavlov.core.common.registry.register` and/or metrics function
11 |    using :func:`~deeppavlov.core.common.metrics_registry.register_metric`.
12 | 
13 | 2. Rebuild the registry running from DeepPavlov root directory:
14 | 
15 | ::
16 | 
17 |     python -m utils.prepare.registry
18 | 
19 | This script imports all the modules in deeppavlov package, builds the registry from them and writes it to a file.
20 | 
21 | 
22 | However, it is possible to use some classes and functions inside configuration files without registering them explicitly.
23 | There are two options available here:
24 | 
25 | - instead of ``{"class_name": "registered_component_name"}`` in config file use key-value pair similar to
26 |   ``{"class_name": "my_package.my_module:MyClass"}``
27 | 
28 | - if your classes/functions are properly decorated but not included in the registry, use ``"metadata"`` section of
29 |   your config file specifying imports as ``"metadata": {"imports": ["my_local_package.my_module", "global_package.module"]}``;
30 |   then the second step described above will be unnecessary (local packages are imported from the current working
31 |   directory).
32 | 


--------------------------------------------------------------------------------
/docs/features/hypersearch.rst:
--------------------------------------------------------------------------------
 1 | Hyperparameters optimization
 2 | ============================
 3 | 
 4 | You can search for best hyperparameters of your model in DeepPavlov by means of cross-validation.
 5 | 
 6 | Cross-validation
 7 | ~~~~~~~~~~~~~~~~
 8 | 
 9 | You can run cross-validation in DeepPavlov to select best parameters of your model.
10 | For this purpose you have to run special command 'paramserach'. for example:
11 | 
12 | .. code:: bash
13 | 
14 |     python -m deeppavlov.paramsearch path_to_json_config.json --folds 5
15 | 
16 | 
17 | Parameters
18 | ----------
19 | 
20 | Cross validation command have several parameters:
21 | 
22 | -  ``config_path``:
23 |     Specify config path, where you model is located.
24 | -  ``--folds``:
25 |     This parameter shows how many folds you need in cross validation.
26 |     Do you want to use leave one out cross validation instead of folds?
27 |     Just specify this: ``--folds loo``.
28 |     If you want not to cross-validate just omit this parameter.
29 | -  ``--search_type``:
30 |     This parameter is optional - default value is "grid" (grid search).
31 | 
32 | 
33 | .. note::
34 | 
35 |     Folds will be created automatically from union of train and validation datasets.
36 | 
37 | 
38 | Special parameters in config
39 | ----------------------------
40 | Config file of model should be consist of parameters ranges for search.
41 | For example, you try to optimize regularization coefficient in model,
42 | so you should add additional parameter in config with suffix '_range'.
43 | Let's see example for logistic regression model:
44 | 
45 | .. code:: python
46 | 
47 |       {
48 |         "class_name": "faq_logreg_model",
49 |         "in": "q_vect",
50 |         "fit_on": ["q_vect", "y"],
51 |         "c": {"search_choice": [1, 10, 100, 1000]},
52 |         "out": ["answer", "score"]
53 |       }
54 | 
55 | In this example parameter "c" described as search_choice, values for grid search:
56 | 
57 | .. code:: python
58 | 
59 |     {"search_choice": [value_0, ..., value_n]}
60 | 
61 | 
62 | Results
63 | -------
64 | As a result you'll have new json config with best model parameters.
65 | It'll be stored in the same directory as config file and will have suffix '_cvbest.json'.
66 | Also you'll see final log messages about best model:
67 | 
68 | .. code:: bash
69 | 
70 |     INFO in '__main__'['paramsearch'] at line 169: Best model params: {'C': 10000, 'penalty': 'l1', 'accuracy': 0.81466}
71 |     INFO in '__main__'['paramsearch'] at line 184: Best model saved in json-file: path_to_model_config_cvbest.json
72 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to DeepPavlov's documentation!
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 |    :maxdepth: 1
 7 | 
 8 |    Installation <intro/installation>
 9 |    QuickStart <intro/quick_start>
10 |    General concepts <intro/overview>
11 |    Configuration file <intro/configuration>
12 |    Python pipelines <intro/python.ipynb>
13 |    Models overview <features/overview>
14 | 
15 | 
16 | .. toctree::
17 |    :glob:
18 |    :maxdepth: 2
19 |    :caption: Features
20 | 
21 |    Pre-trained embeddings <features/pretrained_vectors>
22 |    AutoML <features/hypersearch>
23 | 
24 | 
25 | .. toctree::
26 |    :glob:
27 |    :maxdepth: 1
28 |    :caption: Models
29 | 
30 |    Multitask BERT <features/models/multitask_bert>
31 |    Context Question Answering <features/models/SQuAD.ipynb>
32 |    Classification <features/models/classification.ipynb>
33 |    Few-shot Classification <features/models/few_shot_classification>
34 |    Named Entity Recognition <features/models/NER.ipynb>
35 |    Entity Extraction <features/models/entity_extraction.ipynb>
36 |    BERT-based models <features/models/bert>
37 |    Morphological Tagging <features/models/morpho_tagger.ipynb>
38 |    Neural Ranking <features/models/neural_ranking.ipynb>
39 |    Spelling Correction <features/models/spelling_correction.ipynb>
40 |    Syntactic Parsing <features/models/syntax_parser.ipynb>
41 |    TF-IDF Ranking <features/models/tfidf_ranking.ipynb>
42 |    Popularity Ranking <features/models/popularity_ranking.ipynb>
43 |    Knowledge Base Question answering <features/models/KBQA.ipynb>
44 |    Relation Extraction <features/models/relation_extraction.ipynb>
45 |    SuperGLUE Submission <features/models/superglue>
46 |    Open-Domain Question Answering <features/models/ODQA.ipynb>
47 | 
48 | 
49 | .. toctree::
50 |    :glob:
51 |    :maxdepth: 3
52 |    :caption: Integrations
53 | 
54 |    REST API <integrations/rest_api>
55 |    Socket API <integrations/socket_api>
56 |    Amazon AWS deployment <integrations/aws_ec2>
57 |    DeepPavlov settings <integrations/settings>
58 | 
59 | 
60 | .. toctree::
61 |    :glob:
62 |    :maxdepth: 3
63 |    :caption: Developer Guides
64 | 
65 |    Contribution guide <devguides/contribution_guide>
66 |    Register your model <devguides/registry>
67 | 
68 | 
69 | .. toctree::
70 |    :glob:
71 |    :maxdepth: 3
72 |    :caption: Internships
73 | 
74 |    Internships <internships/internships>
75 | 
76 | 
77 | .. toctree::
78 |    :glob:
79 |    :maxdepth: 3
80 |    :caption: Package Reference
81 | 
82 |    apiref/*
83 | 
84 | 
85 | Indices and tables
86 | ==================
87 | 
88 | * :ref:`genindex`
89 | * :ref:`modindex`
90 | 


--------------------------------------------------------------------------------
/docs/integrations/settings.rst:
--------------------------------------------------------------------------------
 1 | DeepPavlov settings
 2 | ===================
 3 | 
 4 | DeepPavlov provides some tools to facilitate its usage (e.g. dialog logging, settings management). This document is aimed to guide you through them.
 5 | 
 6 | 1. Settings files access and management
 7 | ---------------------------------------
 8 | 
 9 | Most of DeepPavlov settings are located in settings files, which in turn are located in a settings folder. Default settings folder location is ``deeppavlov/utils/settings`` .
10 | 
11 | You can override a settings directory path by setting the ``DP_SETTINGS_PATH`` environment variable. Missing files will be added automatically when running any deeppavlov script.
12 | 
13 | You can get current full path to settings directory with ``python -m deeppavlov.settings``.
14 | To reset settings in the current settings directory one can use ``python -m deeppavlov.settings -d``.
15 | 
16 | 2. Dialog logging
17 | -----------------
18 | 
19 | DeepPavlov supports logging of infered utterances and DeepPavlov model responses. You can manage dialog logging by
20 | editing ``dialog_logger_config.json`` file in a settings directory.
21 | 
22 | Following dialog logging settings are available:
23 | 
24 | 1. **enabled** (default: ``false``): turns on/off dialog logging for DeepPavlov instance;
25 | 2. **log_path** (default: ``~/.deeppavlov/dialog_logs``): sets directory where dialog logs are stored;
26 | 3. **logger_name** (default: ``default``): sets subdirectory name for storing dialog logs;
27 | 4. **logfile_max_size_kb** (default: ``10240``): sets logfile maximum size in kilobytes. If exceeded, new log file is created;
28 | 5. **ensure_ascii** (default: ``false``): If ``true``, converts all non-ASCII symbols in logged content to Unicode code points.
29 | 
30 | 3. Environment variables
31 | ------------------------
32 | 
33 | - **DP_SETTINGS_PATH** — custom path to a directory that contains settings files. It's automatically populated with missing files when running any deeppavlov scripts.
34 | - **DP_SKIP_NLTK_DOWNLOAD** set to ``TRUE`` to prevent automatic downloading of **nltk** packages (``punkt``, ``stopwords``, ``perluniprops``, ``nonbreaking_prefixes``)
35 | 


--------------------------------------------------------------------------------
/docs/internships/internships.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Internships
 3 | ===========
 4 | 
 5 | Do you have ideas on how to improve dialog systems for everyone? Are you ready to make an impact across the world?
 6 | Great, then join us!
 7 | 
 8 | Let’s shape the future of Conversational AI together. An internship is for aspiring graduate and undergraduate students
 9 | who are passionate about Conversational AI technology and offer diverse perspectives.
10 | 
11 | As an intern, you will work on some of the most ambitious technical problems, develop new ML solutions that will impact
12 | future DeepPavlov products and make the lives of DeepPavlov users easier.
13 | 
14 | All interns are paired with a mentor and will participate directly in DeepPavlov's groundbreaking work.
15 | There are no restrictions on publications based on internships. International candidates are welcome to apply.
16 | 
17 | Each of our research teams has specific test assignments for interested candidates, so please familiarize yourself
18 | with our `projects <https://deeppavlov.ai/research>`_ that best match your skills and interests.
19 | 
20 | `Apply now at our website <https://deeppavlov.ai/internships#application>`_.
21 | 


--------------------------------------------------------------------------------
/docs/intro/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | DeepPavlov supports **Linux**, **Windows 10+** (through WSL/WSL2), **MacOS** (Big Sur+) platforms, **Python 3.6-3.11**.
 5 | Depending on the model used, you may need from 4 to 16 GB RAM.
 6 | 
 7 | Install with pip
 8 | ~~~~~~~~~~~~~~~~
 9 | 
10 | You should install DeepPavlov in a `virtual environment <https://docs.python.org/3/library/venv.html>`_. If you’re
11 | unfamiliar with Python virtual environments, take a look at this
12 | `guide <https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/>`_. A virtual
13 | environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
14 | 
15 | #. Create a virtual environment:
16 | 
17 |     .. code:: bash
18 | 
19 |         python -m venv env
20 | 
21 | #. Activate the virtual environment on Linux (`source` could be replaced with `.`):
22 | 
23 |     .. code:: bash
24 | 
25 |         source env/bin/activate
26 | 
27 | #. Install DeepPavlov inside this virtual environment:
28 | 
29 |     .. code:: bash
30 | 
31 |         pip install deeppavlov
32 | 
33 | Install from source
34 | ~~~~~~~~~~~~~~~~~~~
35 | 
36 | Install DeepPavlov **dev** branch from source with the following command:
37 | 
38 |     .. code:: bash
39 | 
40 |         pip install git+http://github.com/deeppavlov/DeepPavlov@dev
41 | 
42 | This command installs the bleeding edge dev version rather than the latest release version. The dev version is useful
43 | for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last release but
44 | a new release hasn’t been rolled out yet. However, this means the dev version may not always be stable.
45 | 
46 | Editable install
47 | ~~~~~~~~~~~~~~~~
48 | 
49 | You will need an editable install if you want to make changes in the DeepPavlov source code that immediately take place
50 | without requiring a new installation.
51 | 
52 | Clone the repository and install DeepPavlov with the following commands:
53 | 
54 |     .. code:: bash
55 | 
56 |         git clone http://github.com/deeppavlov/DeepPavlov.git
57 |         pip install -e DeepPavlov
58 | 
59 | Docker Images
60 | ~~~~~~~~~~~~~
61 | 
62 | We have built several DeepPavlov based Docker images, which include:
63 | 
64 |     * DeepPavlov based Jupyter notebook Docker image;
65 |     * Docker images which serve some of our models and allow to access them
66 |       via REST API (:doc:`riseapi </integrations/rest_api>` mode).
67 | 
68 | Here is our `DockerHub repository <https://hub.docker.com/u/deeppavlov/>`_ with
69 | images and deployment instructions.
70 | 


--------------------------------------------------------------------------------
/docs/intro/overview.rst:
--------------------------------------------------------------------------------
 1 | Conceptual overview
 2 | ===================
 3 | 
 4 | Our goal is to enable AI-application developers and researchers with:
 5 | 
 6 | -  A set of pre-trained NLP models, pre-defined dialog system components
 7 |    (ML/DL/Rule-based), and pipeline templates;
 8 | -  A framework for implementing and testing their own dialog models;
 9 | -  Tools for application integration with adjacent infrastructure
10 |    (messengers, helpdesk software, etc.);
11 | -  Benchmarking environments for conversational models and uniform access
12 |    to relevant datasets.
13 | 
14 | .. image:: ../_static/dp_agnt_diag.png
15 | 
16 | 
17 | Key Concepts
18 | ------------
19 | 
20 | -  A ``Model`` is any NLP model that doesn't necessarily communicates
21 |    with the user in natural language.
22 | -  A ``Component`` is a reusable functional part of a ``Model``.
23 | -  ``Rule-based Models`` cannot be trained.
24 | -  ``Machine Learning Models`` can be trained only stand alone.
25 | -  ``Deep Learning Models`` can be trained independently and in an
26 |    end-to-end mode being joined in a chain.
27 | -  A ``Chainer`` builds a model pipeline from heterogeneous
28 |    components (Rule-based/ML/DL). It allows one to train and infer models in
29 |    a pipeline as a whole.
30 | 
31 | The smallest building block of the library is a ``Component``.
32 | A ``Component`` stands for any kind of function in an NLP pipeline. It can
33 | be implemented as a neural network, a non-neural ML model, or a
34 | rule-based system.
35 | 
36 | ``Component``\ s can be joined into a ``Model``. A ``Model``
37 | solves a larger NLP task than a ``Component``. However, in terms of
38 | implementation, ``Model``\ s are not different from ``Component``\ s.
39 | 
40 | Most of DeepPavlov models are built on top of `PyTorch <https://www.pytorch.org/>`__.
41 | Other external libraries can be used to build basic components.
42 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi>=0.47.0,<=0.89.1
 2 | filelock>=3.0.0,<3.10.0
 3 | nltk>=3.2.4,<3.10.0
 4 | numpy<1.24
 5 | pandas>=1.0.0,<1.6.0
 6 | prometheus-client>=0.13.0,<=1.16.0
 7 | pydantic<2
 8 | pybind11==2.10.3
 9 | requests>=2.19.0,<3.0.0
10 | scikit-learn>=0.24,<1.1.0;python_version<="3.10"
11 | scikit-learn==1.4.0;python_version=="3.11.*"
12 | tqdm>=4.42.0,<4.65.0
13 | uvicorn>=0.13.0,<0.19.0
14 | wheel
15 | scipy<1.10.0;python_version<"3.8"
16 | scipy==1.10.0;python_version>="3.8"
17 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_configs/doc_retrieval/en_ranker_tfidf_wiki_test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "odqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/odqa/enwiki_test",
 5 |     "save_path": "{DOWNLOADS_PATH}/odqa/enwiki_test.db",
 6 |     "dataset_format": "txt"
 7 |   },
 8 |   "dataset_iterator": {
 9 |     "class_name": "sqlite_iterator",
10 |     "shuffle": false,
11 |     "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_test.db"
12 |   },
13 |   "chainer": {
14 |     "in": [
15 |       "docs"
16 |     ],
17 |     "in_y": [
18 |       "doc_ids",
19 |       "doc_nums"
20 |     ],
21 |     "out": [
22 |       "tfidf_doc_ids"
23 |     ],
24 |     "pipe": [
25 |       {
26 |         "class_name": "hashing_tfidf_vectorizer",
27 |         "id": "vectorizer",
28 |         "fit_on": [
29 |           "docs",
30 |           "doc_ids",
31 |           "doc_nums"
32 |         ],
33 |         "save_path": "{DOWNLOADS_PATH}/odqa/enwiki_test_tfidf.npz",
34 |         "load_path": "{DOWNLOADS_PATH}/odqa/enwiki_test_tfidf.npz",
35 |         "tokenizer": {
36 |           "class_name": "stream_spacy_tokenizer",
37 |           "lemmas": true,
38 |           "ngram_range": [
39 |             1,
40 |             2
41 |           ]
42 |         }
43 |       },
44 |       {
45 |         "class_name": "tfidf_ranker",
46 |         "top_n": 20,
47 |         "in": [
48 |           "docs"
49 |         ],
50 |         "out": [
51 |           "tfidf_doc_ids",
52 |           "tfidf_doc_scores"
53 |         ],
54 |         "vectorizer": "#vectorizer"
55 |       }
56 |     ]
57 |   },
58 |   "train": {
59 |     "batch_size": 2,
60 |     "evaluation_targets": [],
61 |     "class_name": "fit_trainer"
62 |   },
63 |   "metadata": {
64 |     "variables": {
65 |       "ROOT_PATH": "~/.deeppavlov",
66 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
67 |       "MODELS_PATH": "{ROOT_PATH}/models"
68 |     },
69 |     "download": [
70 |       {
71 |         "url": "http://files.deeppavlov.ai/datasets/wikipedia/enwiki_test.tar.gz",
72 |         "subdir": "{DOWNLOADS_PATH}/odqa"
73 |       }
74 |     ]
75 |   }
76 | }


--------------------------------------------------------------------------------
/tests/test_configs/doc_retrieval/ru_ranker_tfidf_wiki_test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "class_name": "odqa_reader",
 4 |     "data_path": "{DOWNLOADS_PATH}/odqa/ruwiki_test",
 5 |     "save_path": "{DOWNLOADS_PATH}/odqa/ruwiki_test.db",
 6 |     "dataset_format": "txt"
 7 |   },
 8 |   "dataset_iterator": {
 9 |     "class_name": "sqlite_iterator",
10 |     "shuffle": false,
11 |     "load_path": "{DOWNLOADS_PATH}/odqa/ruwiki_test.db"
12 |   },
13 |   "chainer": {
14 |     "in": [
15 |       "docs"
16 |     ],
17 |     "in_y": [
18 |       "doc_ids",
19 |       "doc_nums"
20 |     ],
21 |     "out": [
22 |       "tfidf_doc_ids"
23 |     ],
24 |     "pipe": [
25 |       {
26 |         "class_name": "hashing_tfidf_vectorizer",
27 |         "id": "vectorizer",
28 |         "fit_on": [
29 |           "docs",
30 |           "doc_ids",
31 |           "doc_nums"
32 |         ],
33 |         "save_path": "{DOWNLOADS_PATH}/odqa/ruwiki_test_tfidf.npz",
34 |         "load_path": "{DOWNLOADS_PATH}/odqa/ruwiki_test_tfidf.npz",
35 |         "tokenizer": {
36 |           "class_name": "stream_spacy_tokenizer",
37 |           "spacy_model": "ru_core_news_sm",
38 |           "lemmas": true,
39 |           "lowercase": true,
40 |           "filter_stopwords": true,
41 |           "ngram_range": [
42 |             1,
43 |             2
44 |           ]
45 |         }
46 |       },
47 |       {
48 |         "class_name": "tfidf_ranker",
49 |         "top_n": 20,
50 |         "in": [
51 |           "docs"
52 |         ],
53 |         "out": [
54 |           "tfidf_doc_ids",
55 |           "tfidf_doc_scores"
56 |         ],
57 |         "vectorizer": "#vectorizer"
58 |       }
59 |     ]
60 |   },
61 |   "train": {
62 |     "batch_size": 2,
63 |     "evaluation_targets": [],
64 |     "class_name": "fit_trainer"
65 |   },
66 |   "metadata": {
67 |     "variables": {
68 |       "ROOT_PATH": "~/.deeppavlov",
69 |       "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
70 |       "MODELS_PATH": "{ROOT_PATH}/models"
71 |     },
72 |     "download": [
73 |       {
74 |         "url": "http://files.deeppavlov.ai/datasets/wikipedia/ruwiki_test.tar.gz",
75 |         "subdir": "{DOWNLOADS_PATH}/odqa"
76 |       }
77 |     ]
78 |   }
79 | }


--------------------------------------------------------------------------------
/utils/Docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE
 2 | 
 3 | FROM $BASE_IMAGE
 4 | 
 5 | SHELL ["/bin/bash", "-c"]
 6 | 
 7 | ENV DP_PYTEST_API_PORT=5000
 8 | ENV DP_PYTEST_NO_CACHE=True
 9 | ENV LANG='en_US.UTF-8'
10 | 
11 | ARG DEBIAN_FRONTEND=noninteractive
12 | ARG PYTHON_VERSION
13 | 
14 | RUN rm -f /etc/apt/sources.list.d/cuda*.list && \
15 |     apt update && \
16 |     apt install -y --no-install-recommends \
17 |         build-essential \
18 |         dpkg-dev \
19 |         gcc \
20 |         git	\
21 |         libbz2-dev \
22 |         libc6-dev \
23 |         libexpat1-dev \
24 |         libffi-dev \
25 |         libgdbm-dev \
26 |         liblzma-dev \
27 |         libncursesw5-dev \
28 |         libreadline-dev \
29 |         libsqlite3-dev \
30 |         libssl-dev \
31 |         libxslt-dev \
32 |         locales \
33 |         make \
34 |         pandoc \
35 |         tk-dev \
36 |         wget \
37 |         xz-utils \
38 |         zlib1g-dev && \
39 |     locale-gen en_US.UTF-8 && \
40 |     wget --no-check-certificate -O python.tar.xz https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tar.xz && \
41 |     mkdir -p /usr/src/python && \
42 | 	tar -xC /usr/src/python --strip-components=1 -f python.tar.xz && \
43 |     rm python.tar.xz && \
44 |     cd /usr/src/python && \
45 | 	./configure && \
46 | 	make -j "$(nproc)" altinstall && \
47 | 	ln -s /usr/local/bin/python${PYTHON_VERSION%.*} /usr/local/bin/python && \
48 |     ln -s /usr/local/bin/pip${PYTHON_VERSION%.*} /usr/local/bin/pip && \
49 |     pip install --upgrade pip && \
50 |     pip install pybind11==2.2.4 && \
51 |     rm -rf /usr/src/python /var/lib/apt/lists/*
52 | 
53 | WORKDIR /app
54 | 
55 | # two commands to prevent caching of the next layers
56 | ARG EPOCH
57 | ENV EPOCH=$EPOCH
58 | 
59 | COPY . .
60 | 
61 | CMD utils/Docker/cmd.sh
62 | 


--------------------------------------------------------------------------------
/utils/Docker/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/utils/Docker/README.md


--------------------------------------------------------------------------------
/utils/Docker/cmd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | pip install .[tests,docs]
 6 | 
 7 | rm -rf `find . -mindepth 1 -maxdepth 1 ! -name tests ! -name Jenkinsfile ! -name docs`
 8 | 
 9 | cd docs
10 | make clean
11 | make html
12 | cd ..
13 | 
14 | flake8 `python -c 'import deeppavlov; print(deeppavlov.__path__[0])'` --count --select=E9,F63,F7,F82 --show-source --statistics
15 | 
16 | pytest -v --disable-warnings --instafail $PYTEST_ARGS
17 | 


--------------------------------------------------------------------------------
/utils/Docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.7'
 2 | services:
 3 |   py36:
 4 |     build:
 5 |       context: ../../
 6 |       dockerfile: utils/Docker/Dockerfile
 7 |       args:
 8 |         - EPOCH=$EPOCH
 9 |         - PYTHON_VERSION=3.6.15
10 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
11 |     user: '${UID}:${GID}'
12 |     environment:
13 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_0
14 |       - PYTEST_ARGS=$PYTEST_ARGS
15 |       - DP_PYTEST_NO_CACHE=True
16 |   py37:
17 |     build:
18 |       context: ../../
19 |       dockerfile: utils/Docker/Dockerfile
20 |       args:
21 |         - EPOCH=$EPOCH
22 |         - PYTHON_VERSION=3.7.16
23 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
24 |     user: '${UID}:${GID}'
25 |     environment:
26 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_1
27 |       - PYTEST_ARGS=$PYTEST_ARGS
28 |       - DP_PYTEST_NO_CACHE=True
29 |   py38:
30 |     build:
31 |       context: ../../
32 |       dockerfile: utils/Docker/Dockerfile
33 |       args:
34 |         - EPOCH=$EPOCH
35 |         - PYTHON_VERSION=3.8.16
36 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
37 |     user: '${UID}:${GID}'
38 |     environment:
39 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_0
40 |       - PYTEST_ARGS=$PYTEST_ARGS
41 |       - DP_PYTEST_NO_CACHE=True
42 |   py39:
43 |     build:
44 |       context: ../../
45 |       dockerfile: utils/Docker/Dockerfile
46 |       args:
47 |         - EPOCH=$EPOCH
48 |         - PYTHON_VERSION=3.9.16
49 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
50 |     user: '${UID}:${GID}'
51 |     environment:
52 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_1
53 |       - PYTEST_ARGS=$PYTEST_ARGS
54 |       - DP_PYTEST_NO_CACHE=True
55 |   py310:
56 |     build:
57 |       context: ../../
58 |       dockerfile: utils/Docker/Dockerfile
59 |       args:
60 |         - EPOCH=$EPOCH
61 |         - PYTHON_VERSION=3.10.9
62 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
63 |     user: '${UID}:${GID}'
64 |     environment:
65 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_0
66 |       - PYTEST_ARGS=$PYTEST_ARGS
67 |       - DP_PYTEST_NO_CACHE=True
68 |   py311:
69 |     build:
70 |       context: ../../
71 |       dockerfile: utils/Docker/Dockerfile
72 |       args:
73 |         - EPOCH=$EPOCH
74 |         - PYTHON_VERSION=3.11.6
75 |         - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
76 |     user: '${UID}:${GID}'
77 |     environment:
78 |       - CUDA_VISIBLE_DEVICES=$TEST_GPU_1
79 |       - PYTEST_ARGS=$PYTEST_ARGS
80 |       - DP_PYTEST_NO_CACHE=True
81 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/utils/__init__.py


--------------------------------------------------------------------------------
/utils/prepare/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deeppavlov/DeepPavlov/5f9fbed0c7191466bc7621e604b810f66f254c03/utils/prepare/__init__.py


--------------------------------------------------------------------------------
/utils/prepare/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Neural Networks and Deep Learning lab, MIPT
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import pkgutil
17 | from importlib import import_module, reload
18 | 
19 | import deeppavlov
20 | from deeppavlov.core.common.metrics_registry import _registry_path as m_registry_path, _REGISTRY as M_REGISTRY
21 | from deeppavlov.core.common.registry import _registry_path as c_registry_path, _REGISTRY as C_REGISTRY
22 | 
23 | if __name__ == '__main__':
24 |     C_REGISTRY.clear()
25 |     M_REGISTRY.clear()
26 | 
27 |     for _, pkg_name, _ in pkgutil.walk_packages(deeppavlov.__path__, deeppavlov.__name__ + '.'):
28 |         if pkg_name not in ('deeppavlov.core.common.registry', 'deeppavlov.core.common.metrics_registry'):
29 |             reload(import_module(pkg_name))
30 | 
31 |     with c_registry_path.open('w', encoding='utf-8') as f:
32 |         json.dump(dict(sorted(C_REGISTRY.items())), f, indent=2)
33 | 
34 |     with m_registry_path.open('w', encoding='utf-8') as f:
35 |         json.dump(dict(sorted(M_REGISTRY.items())), f, indent=2)
36 | 


--------------------------------------------------------------------------------