├── .coveragerc ├── .dockerignore ├── .flake8 ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── Dockerfile.commit ├── Dockerfile.release ├── Dockerfile.test ├── LICENSE ├── Makefile ├── README.md ├── allennlp_models ├── __init__.py ├── classification │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── boolq.py │ │ └── stanford_sentiment_tree_bank.py │ └── models │ │ ├── __init__.py │ │ ├── biattentive_classification_network.py │ │ └── transformer_classification_tt.py ├── common │ ├── __init__.py │ └── ontonotes.py ├── coref │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── conll.py │ │ ├── preco.py │ │ └── winobias.py │ ├── metrics │ │ ├── __init__.py │ │ ├── conll_coref_scores.py │ │ └── mention_recall.py │ ├── models │ │ ├── __init__.py │ │ └── coref.py │ ├── predictors │ │ ├── __init__.py │ │ └── coref.py │ ├── tools │ │ └── compile_coref_data.sh │ └── util.py ├── generation │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── cnn_dm.py │ │ ├── copynet_seq2seq.py │ │ └── seq2seq.py │ ├── models │ │ ├── __init__.py │ │ ├── bart.py │ │ ├── composed_seq2seq.py │ │ ├── copynet_seq2seq.py │ │ ├── simple_seq2seq.py │ │ └── t5.py │ ├── modules │ │ ├── __init__.py │ │ ├── decoder_nets │ │ │ ├── __init__.py │ │ │ ├── decoder_net.py │ │ │ ├── lstm_cell.py │ │ │ └── stacked_self_attention.py │ │ └── seq_decoders │ │ │ ├── __init__.py │ │ │ ├── auto_regressive.py │ │ │ └── seq_decoder.py │ └── predictors │ │ ├── __init__.py │ │ └── seq2seq.py ├── lm │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── masked_language_model.py │ │ ├── next_token_lm.py │ │ └── simple_language_modeling.py │ ├── models │ │ ├── __init__.py │ │ ├── bidirectional_lm.py │ │ ├── language_model.py │ │ ├── masked_language_model.py │ │ └── next_token_lm.py │ ├── modules │ │ ├── __init__.py │ │ ├── language_model_heads │ │ │ ├── __init__.py │ │ │ ├── bert.py │ │ │ ├── gpt2.py │ │ │ ├── language_model_head.py │ │ │ └── linear.py │ │ ├── seq2seq_encoders │ │ │ ├── __init__.py │ │ │ └── bidirectional_lm_transformer.py │ │ └── token_embedders │ │ │ ├── __init__.py │ │ │ ├── bidirectional_lm.py │ │ │ └── language_model.py │ ├── predictors │ │ ├── __init__.py │ │ ├── masked_language_model.py │ │ └── next_token_lm.py │ └── util │ │ ├── __init__.py │ │ └── beam_search_generators │ │ ├── __init__.py │ │ ├── beam_search_generator.py │ │ └── transformer_beam_search_generator.py ├── mc │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── commonsenseqa.py │ │ ├── fake.py │ │ ├── piqa.py │ │ ├── piqa_tt.py │ │ ├── swag.py │ │ ├── transformer_mc.py │ │ └── transformer_mc_tt.py │ ├── models │ │ ├── __init__.py │ │ ├── transformer_mc.py │ │ └── transformer_mc_tt.py │ └── predictors │ │ ├── __init__.py │ │ └── transformer_mc.py ├── modelcards │ ├── coref-spanbert.json │ ├── evaluate_rc-lerc.json │ ├── generation-bart.json │ ├── glove-sst.json │ ├── lm-masked-language-model.json │ ├── lm-next-token-lm-gpt2.json │ ├── mc-roberta-commonsenseqa.json │ ├── mc-roberta-piqa.json │ ├── mc-roberta-swag.json │ ├── modelcard-template.json │ ├── nlvr2-vilbert-head.json │ ├── nlvr2-vilbert.json │ ├── pair-classification-adversarial-binary-gender-bias-mitigated-roberta-snli.json │ ├── pair-classification-binary-gender-bias-mitigated-roberta-snli.json │ ├── pair-classification-decomposable-attention-elmo.json │ ├── pair-classification-esim.json │ ├── pair-classification-roberta-mnli.json │ ├── pair-classification-roberta-rte.json │ ├── pair-classification-roberta-snli.json │ ├── rc-bidaf-elmo.json │ ├── rc-bidaf.json │ ├── rc-naqanet.json │ ├── rc-nmn.json │ ├── rc-transformer-qa.json │ ├── roberta-sst.json │ ├── structured-prediction-biaffine-parser.json │ ├── structured-prediction-constituency-parser.json │ ├── structured-prediction-srl-bert.json │ ├── structured-prediction-srl.json │ ├── tagging-elmo-crf-tagger.json │ ├── tagging-fine-grained-crf-tagger.json │ ├── tagging-fine-grained-transformer-crf-tagger.json │ ├── ve-vilbert.json │ ├── vgqa-vilbert.json │ └── vqa-vilbert.json ├── pair_classification │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── quora_paraphrase.py │ │ ├── snli.py │ │ └── transformer_superglue_rte.py │ ├── models │ │ ├── __init__.py │ │ ├── bimpm.py │ │ ├── decomposable_attention.py │ │ └── esim.py │ └── predictors │ │ ├── __init__.py │ │ └── textual_entailment.py ├── pretrained.py ├── rc │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── drop.py │ │ ├── qangaroo.py │ │ ├── quac.py │ │ ├── record_reader.py │ │ ├── squad.py │ │ ├── transformer_squad.py │ │ ├── triviaqa.py │ │ └── utils.py │ ├── metrics │ │ ├── __init__.py │ │ ├── drop_em_and_f1.py │ │ └── squad_em_and_f1.py │ ├── models │ │ ├── __init__.py │ │ ├── bidaf.py │ │ ├── bidaf_ensemble.py │ │ ├── dialog_qa.py │ │ ├── naqanet.py │ │ ├── qanet.py │ │ ├── transformer_qa.py │ │ └── utils.py │ ├── modules │ │ ├── __init__.py │ │ └── seq2seq_encoders │ │ │ ├── __init__.py │ │ │ ├── multi_head_self_attention.py │ │ │ ├── qanet_encoder.py │ │ │ └── stacked_self_attention.py │ ├── predictors │ │ ├── __init__.py │ │ ├── bidaf.py │ │ ├── dialog_qa.py │ │ └── transformer_qa.py │ └── tools │ │ ├── __init__.py │ │ ├── drop.py │ │ ├── narrativeqa.py │ │ ├── orb.py │ │ ├── orb_utils.py │ │ ├── quoref.py │ │ ├── squad.py │ │ └── transformer_qa_eval.py ├── structured_prediction │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── penn_tree_bank.py │ │ ├── semantic_dependencies.py │ │ ├── srl.py │ │ └── universal_dependencies.py │ ├── metrics │ │ ├── __init__.py │ │ └── srl_eval_scorer.py │ ├── models │ │ ├── __init__.py │ │ ├── biaffine_dependency_parser.py │ │ ├── constituency_parser.py │ │ ├── graph_parser.py │ │ ├── srl.py │ │ └── srl_bert.py │ ├── predictors │ │ ├── __init__.py │ │ ├── biaffine_dependency_parser.py │ │ ├── constituency_parser.py │ │ ├── openie.py │ │ ├── srl.py │ │ └── util.py │ └── tools │ │ ├── __init__.py │ │ ├── convert_openie_to_conll.py │ │ ├── srl-eval.pl │ │ └── write_srl_predictions_to_conll_format.py ├── tagging │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── ccgbank.py │ │ ├── conll2000.py │ │ ├── conll2003.py │ │ └── ontonotes_ner.py │ ├── models │ │ ├── __init__.py │ │ └── crf_tagger.py │ └── predictors │ │ ├── __init__.py │ │ └── sentence_tagger.py ├── taskcards │ ├── constituency_parsing.json │ ├── coref.json │ ├── dependency_parsing.json │ ├── evaluate_rc.json │ ├── language_modeling.json │ ├── masked_language_modeling.json │ ├── ner.json │ ├── oie.json │ ├── rc.json │ ├── sentiment_analysis.json │ ├── srl.json │ ├── taskcard_template.json │ ├── textual_entailment.json │ ├── ve.json │ └── vqa.json ├── version.py └── vision │ ├── __init__.py │ ├── dataset_readers │ ├── __init__.py │ ├── flickr30k.py │ ├── gqa.py │ ├── nlvr2.py │ ├── utils.py │ ├── vgqa.py │ ├── vision_reader.py │ ├── visual_entailment.py │ └── vqav2.py │ ├── metrics │ ├── __init__.py │ └── vqa.py │ ├── models │ ├── __init__.py │ ├── heads │ │ ├── __init__.py │ │ ├── nlvr2_head.py │ │ ├── visual_entailment_head.py │ │ └── vqa_head.py │ ├── nlvr2.py │ ├── vilbert_image_retrieval.py │ ├── vilbert_vqa.py │ ├── vision_text_model.py │ └── visual_entailment.py │ └── predictors │ ├── __init__.py │ ├── nlvr2.py │ ├── vilbert_vqa.py │ └── visual_entailment.py ├── codecov.yml ├── constraints.txt ├── dev-requirements.txt ├── docs ├── css │ └── extra.css └── img │ ├── allennlp-logo-dark.png │ └── favicon.ico ├── mkdocs-skeleton.yml ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── scripts ├── .gitignore ├── build_docs_config.py ├── ensure_models_found.py ├── ensure_versions_match.py ├── get_version.py └── update_readme_model_list.py ├── setup.py ├── test_fixtures ├── bert-xsmall-dummy │ ├── config.json │ ├── pytorch_model.bin │ └── vocab.txt ├── classification │ ├── biattentive_classification_network │ │ ├── broken_experiments │ │ │ ├── elmo_in_text_field_embedder.json │ │ │ └── no_elmo_tokenizer_for_elmo.json │ │ ├── elmo_experiment.json │ │ ├── experiment.json │ │ ├── feedforward_experiment.json │ │ └── output_only_elmo_experiment.json │ ├── boolq.jsonl │ └── sst.txt ├── coref │ ├── coref.gold_conll │ ├── coref_albert_small.jsonnet │ ├── experiment.json │ ├── preco.jsonl │ ├── serialization │ │ ├── model.tar.gz │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ └── winobias.sample ├── generation │ ├── bart │ │ ├── data │ │ │ ├── cnn_stories │ │ │ │ ├── 017d27d00eb43678c15cb4a8dd4723a035323219.story │ │ │ │ ├── 06352019a19ae31e527f37f7571c6dd7f0c5da37.story │ │ │ │ ├── 0d43b97000ff852282c89d8d105e41495c0ee9bd.story │ │ │ │ ├── 203886369feea77bbc35715e6d7e518b751f57de.story │ │ │ │ ├── 230c522854991d053fe98a718b1defa077a8efef.story │ │ │ │ ├── 24521a2abb2e1f5e34e6824e0f9e56904a2b0e88.story │ │ │ │ ├── 2bd8ada1de6a7b02f59430cc82045eb8d29cf033.story │ │ │ │ ├── 42c027e4ff9730fbb3de84c1af0d2c506e41c3e4.story │ │ │ │ ├── 4495ba8f3a340d97a9df1476f8a35502bcce1f69.story │ │ │ │ ├── 469c6ac05092ca5997728c9dfc19f9ab6b936e40.story │ │ │ │ ├── 5e22bbfc7232418b8d2dd646b952e404df5bd048.story │ │ │ │ ├── 7c0e61ac829a3b3b653e2e3e7536cc4881d1f264.story │ │ │ │ ├── 7fe70cc8b12fab2d0a258fababf7d9c6b5e1262a.story │ │ │ │ ├── 88e7dac40f3e6c159e4edc0747d0cc0f50886fbb.story │ │ │ │ ├── a1ebb8bb4d370a1fdf28769206d572be60642d70.story │ │ │ │ ├── c222979bd1cfbc7d3ff821e9c738e3dbd29b14f4.story │ │ │ │ ├── c27cf1b136cc270023de959e7ab24638021bc43f.story │ │ │ │ ├── e2706dce6cf26bc61b082438188fdb6e130d9e40.story │ │ │ │ ├── ee8871b15c50d0db17b0179a6d2beab35065f1e9.story │ │ │ │ └── f001ec5c4704938247d27a44948eebb37ae98d01.story │ │ │ └── url_lists │ │ │ │ ├── all_train.txt │ │ │ │ └── all_val.txt │ │ └── experiment.jsonnet │ ├── composed │ │ ├── experiment.json │ │ ├── experiment_transformer.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── dependencies.txt │ │ │ ├── ner.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── pos.txt │ │ │ ├── source_tokens.txt │ │ │ └── target_tokens.txt │ ├── copynet │ │ ├── data │ │ │ ├── copyover.tsv │ │ │ ├── source_vocab.txt │ │ │ └── target_vocab.txt │ │ ├── experiment.json │ │ ├── experiment_transformer.jsonnet │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── source_tokens.txt │ │ │ └── target_tokens.txt │ ├── seq2seq_copy.csv │ ├── seq2seq_copy.tsv │ ├── seq2seq_max_marginal_likelihood.tsv │ ├── simple │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── dependencies.txt │ │ │ ├── ner.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── pos.txt │ │ │ ├── source_tokens.txt │ │ │ └── target_tokens.txt │ └── t5 │ │ └── experiment.jsonnet ├── glove.6B.100d.sample.txt.gz ├── glove.6B.300d.sample.txt.gz ├── lm │ ├── bidirectional_language_model │ │ ├── training_data │ │ │ ├── sentences1.txt │ │ │ └── sentences2.txt │ │ └── vocab │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ ├── conll2003.txt │ ├── elmo │ │ ├── config │ │ │ └── characters_token_embedder.json │ │ ├── elmo_token_embeddings.hdf5 │ │ ├── lm_embeddings_0.hdf5 │ │ ├── lm_embeddings_1.hdf5 │ │ ├── lm_embeddings_2.hdf5 │ │ ├── lm_weights.hdf5 │ │ ├── options.json │ │ ├── sentences.json │ │ └── vocab_test.txt │ ├── language_model │ │ ├── bidirectional_lm_characters_token_embedder.jsonnet │ │ ├── bidirectional_lm_characters_token_embedder_without_bos_eos.jsonnet │ │ ├── characters_token_embedder.json │ │ ├── characters_token_embedder_without_bos_eos.jsonnet │ │ ├── experiment.jsonnet │ │ ├── experiment_bidirectional.jsonnet │ │ ├── experiment_bidirectional_unsampled.jsonnet │ │ ├── experiment_transformer.jsonnet │ │ ├── experiment_unidirectional.jsonnet │ │ ├── experiment_unidirectional_transformer.jsonnet │ │ ├── experiment_unidirectional_unsampled.jsonnet │ │ ├── experiment_unsampled.jsonnet │ │ ├── model.tar.gz │ │ └── sentences.txt │ ├── language_modeling │ │ ├── shards │ │ │ ├── shard0 │ │ │ ├── shard1 │ │ │ ├── shard2 │ │ │ ├── shard3 │ │ │ ├── shard4 │ │ │ ├── shard5 │ │ │ ├── shard6 │ │ │ ├── shard7 │ │ │ ├── shard8 │ │ │ └── shard9 │ │ └── single_sentence.txt │ ├── masked_language_model │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ └── next_token_lm │ │ ├── experiment.json │ │ ├── experiment_transformer.json │ │ └── serialization │ │ ├── best.th │ │ ├── model.tar.gz │ │ └── vocabulary │ │ ├── non_padded_namespaces.txt │ │ └── tokens.txt ├── mc │ ├── commonsenseqa.jsonl │ ├── piqa-labels.lst │ ├── piqa.jsonl │ ├── swag.csv │ └── transformer_mc │ │ └── experiment.jsonnet ├── pair_classification │ ├── bias_mitigation │ │ ├── adversarial_experiment.json │ │ ├── experiment.json │ │ ├── snli_dev.jsonl │ │ ├── snli_test.jsonl │ │ └── snli_train.jsonl │ ├── bimpm │ │ └── experiment.json │ ├── decomposable_attention │ │ ├── experiment.json │ │ ├── parameters_inspection.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── labels.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ ├── esim │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── labels.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ ├── quora_paraphrase.tsv │ ├── snli.jsonl │ ├── snli2.jsonl │ └── snli_vocab │ │ ├── labels.txt │ │ ├── non_padded_namespaces.txt │ │ └── tokens.txt ├── rc │ ├── bidaf │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ ├── dialog_qa │ │ ├── experiment.json │ │ ├── quac_sample.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── answer_tags.txt │ │ │ ├── followup_labels.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ └── yesno_labels.txt │ ├── drop.json │ ├── naqanet │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── token_characters.txt │ │ │ └── tokens.txt │ ├── orb_sample_input.jsonl │ ├── orb_sample_predictions.json │ ├── qanet │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── token_characters.txt │ │ │ └── tokens.txt │ ├── qangaroo.json │ ├── quoref_sample.json │ ├── quoref_sample_predictions.json │ ├── record.json │ ├── squad.json │ ├── squad2.json │ ├── superglue_rte.jsonl │ ├── superglue_rte_no_labels.jsonl │ ├── transformer_qa │ │ ├── experiment.jsonnet │ │ └── experiment_v2.jsonnet │ └── triviaqa-sample.tgz ├── structured_prediction │ ├── biaffine_dependency_parser │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── head_tags.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ ├── pos.txt │ │ │ └── tokens.txt │ ├── constituency_parser │ │ ├── constituency_parser.json │ │ ├── experiment.json │ │ └── serialization │ │ │ ├── best.th │ │ │ ├── model.tar.gz │ │ │ └── vocabulary │ │ │ ├── labels.txt │ │ │ ├── non_padded_namespaces.txt │ │ │ └── tokens.txt │ ├── dependencies.conllu │ ├── example_ptb.trees │ ├── semantic_dependencies │ │ ├── dm.sdp │ │ └── experiment.json │ └── srl │ │ ├── bert │ │ ├── config.json │ │ └── vocab.txt │ │ ├── bert_srl.jsonnet │ │ ├── bert_srl_local_files.jsonnet │ │ ├── conll_2012 │ │ ├── subdomain │ │ │ └── example.gold_conll │ │ └── subdomain2 │ │ │ └── example.gold_conll │ │ ├── experiment.json │ │ └── serialization │ │ ├── best.th │ │ ├── model.tar.gz │ │ └── vocabulary │ │ ├── labels.txt │ │ ├── non_padded_namespaces.txt │ │ └── tokens.txt ├── tagging │ ├── ccgbank.txt │ ├── conll2000.txt │ ├── conll2003.txt │ └── crf_tagger │ │ ├── experiment.json │ │ ├── experiment_albert.json │ │ ├── experiment_ccgbank.json │ │ └── experiment_conll2000.json └── vision │ ├── flickr30k │ ├── experiment.jsonnet │ ├── experiment_from_huggingface.jsonnet │ ├── sentences │ │ ├── 1.txt │ │ ├── 2.txt │ │ ├── 3.txt │ │ ├── 4945942737.txt │ │ └── 6338542128.txt │ ├── test.txt │ └── tiny-dev.txt │ ├── gqa │ ├── question_dir │ │ ├── questions0.json │ │ └── questions1.json │ └── questions.json │ ├── images │ ├── feature_cache │ │ ├── coordinates │ │ ├── coordinates-lock │ │ ├── features │ │ └── features-lock │ ├── flickr30k │ │ ├── 1.jpg │ │ ├── 100652400.jpg │ │ ├── 1016887272.jpg │ │ ├── 101958970.jpg │ │ ├── 2.jpg │ │ ├── 3.jpg │ │ ├── 3359636318.jpg │ │ ├── 4945942737.jpg │ │ ├── 6338542128.jpg │ │ ├── 6959556104.jpg │ │ └── 7162685234.jpg │ ├── gqa │ │ ├── 1339.jpg │ │ ├── 2331963.jpg │ │ ├── 2354786.jpg │ │ ├── 2368326.jpg │ │ ├── 2375429.jpg │ │ ├── 2400861.jpg │ │ ├── 2405722.jpg │ │ ├── n166008.jpg │ │ └── n578564.jpg │ ├── nlvr2 │ │ ├── dev-850-0-img0.png │ │ ├── dev-850-0-img1.png │ │ ├── dev-850-1-img0.png │ │ ├── dev-850-1-img1.png │ │ ├── dev-850-2-img0.png │ │ ├── dev-850-2-img1.png │ │ ├── dev-850-3-img0.png │ │ ├── dev-850-3-img1.png │ │ ├── test1-0-0-img0.png │ │ ├── test1-0-0-img1.png │ │ ├── test1-0-1-img0.png │ │ ├── test1-0-1-img1.png │ │ ├── test1-0-2-img0.png │ │ ├── test1-0-2-img1.png │ │ ├── test1-0-3-img0.png │ │ ├── test1-0-3-img1.png │ │ ├── train-10171-0-img0.png │ │ ├── train-10171-0-img1.png │ │ ├── train-4100-0-img0.png │ │ ├── train-4100-0-img1.png │ │ ├── train-4933-2-img0.png │ │ ├── train-4933-2-img1.png │ │ ├── train-6623-1-img0.png │ │ └── train-6623-1-img1.png │ ├── vgqa │ │ ├── 1.jpg │ │ ├── 2.jpg │ │ ├── 2415125.jpg │ │ ├── 2415126.jpg │ │ ├── 4.jpg │ │ └── 5.jpg │ ├── vision_reader │ │ ├── jpeg_example.jpeg │ │ ├── jpg_example.jpg │ │ └── png_example.png │ ├── visual_entailment │ │ ├── 1016887272.jpg │ │ ├── 2248275918.jpg │ │ ├── 402978771.jpg │ │ └── 4564320256.jpg │ └── vqav2 │ │ ├── COCO_train2014_000000458752.jpg │ │ ├── COCO_val2014_000000262148.jpg │ │ └── COCO_val2014_000000393225.jpg │ ├── nlvr2 │ ├── experiment.jsonnet │ ├── experiment_from_huggingface.jsonnet │ └── tiny-dev.json │ ├── vgqa │ └── question_answers.json │ ├── vilbert_multitask │ ├── dataset.json │ ├── experiment.jsonnet │ └── model.tar.gz │ ├── vilbert_ve │ ├── experiment.jsonnet │ └── experiment_from_huggingface.jsonnet │ ├── vilbert_vqa │ ├── experiment.jsonnet │ └── experiment_from_huggingface.jsonnet │ ├── visual_entailment │ └── sample_pairs.jsonl │ └── vqav2 │ ├── annotations.json │ └── questions.json ├── tests ├── __init__.py ├── classification │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── boolq.py │ │ └── stanford_sentiment_tree_bank_test.py │ ├── interpret │ │ ├── __init__.py │ │ └── sst_test.py │ └── models │ │ ├── __init__.py │ │ └── biattentive_classification_network_test.py ├── commands │ └── multitask_predict_test.py ├── common │ └── ontonotes_test.py ├── coref │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── coref_test.py │ │ ├── preco_test.py │ │ └── winobias_test.py │ ├── interpret │ │ ├── __init__.py │ │ └── interpret_test.py │ ├── metrics │ │ ├── __init__.py │ │ ├── conll_coref_scores_test.py │ │ └── mention_recall_test.py │ ├── models │ │ ├── __init__.py │ │ └── coref_test.py │ └── predictors │ │ ├── __init__.py │ │ └── coref_test.py ├── generation │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── copynet_test.py │ │ └── seq2seq_test.py │ ├── models │ │ ├── __init__.py │ │ ├── bart_test.py │ │ ├── composed_seq2seq_test.py │ │ ├── copynet_test.py │ │ ├── simple_seq2seq_test.py │ │ └── t5_test.py │ ├── modules │ │ ├── __init__.py │ │ ├── decoder_nets │ │ │ ├── __init__.py │ │ │ ├── lstm_cell_test.py │ │ │ └── stacked_self_attention_test.py │ │ └── seq_decoders │ │ │ ├── __init__.py │ │ │ └── auto_regressive_test.py │ └── predictors │ │ ├── __init__.py │ │ └── seq2seq_test.py ├── lm │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── masked_language_modeling_test.py │ │ ├── next_token_lm_test.py │ │ └── simple_language_modeling_test.py │ ├── interpret │ │ ├── __init__.py │ │ ├── lm_hotflip_test.py │ │ └── simple_gradient_test.py │ ├── models │ │ ├── __init__.py │ │ ├── bidirectional_lm_test.py │ │ ├── language_model_test.py │ │ ├── masked_language_model_test.py │ │ └── next_token_lm_test.py │ ├── modules │ │ ├── __init__.py │ │ ├── language_model_heads │ │ │ ├── __init__.py │ │ │ ├── bert_test.py │ │ │ └── gpt2_test.py │ │ ├── seq2seq_encoders │ │ │ ├── __init__.py │ │ │ └── bidirectional_lm_transformer_test.py │ │ └── token_embedders │ │ │ ├── __init__.py │ │ │ ├── bidirectional_lm_test.py │ │ │ └── language_model_test.py │ └── predictors │ │ ├── __init__.py │ │ ├── masked_language_model_test.py │ │ └── next_token_lm_test.py ├── mc │ ├── dataset_readers │ │ ├── commonsenseqa_test.py │ │ ├── piqa_test.py │ │ └── swag_test.py │ └── models │ │ └── transformer_mc_test.py ├── pair_classification │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── quora_paraphrase_test.py │ │ ├── snli_test.py │ │ └── transformer_superglue_rte_test.py │ ├── models │ │ ├── __init__.py │ │ ├── adversarial_bias_mitigator_test.py │ │ ├── bias_mitigator_applicator_test.py │ │ ├── bimpm_test.py │ │ ├── decomposable_attention_test.py │ │ └── esim_test.py │ └── task_checklists │ │ ├── __init__.py │ │ └── textual_entailment_suite_test.py ├── pretrained_test.py ├── rc │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── drop_test.py │ │ ├── qangaroo_test.py │ │ ├── record_reader_test.py │ │ ├── squad_test.py │ │ ├── transformer_squad_test.py │ │ └── utils_test.py │ ├── evaluations │ │ ├── __init__.py │ │ ├── drop_test.py │ │ ├── orb_test.py │ │ └── quoref_test.py │ ├── interpret │ │ ├── __init__.py │ │ └── bidaf_hotflip_test.py │ ├── metrics │ │ ├── drop_em_and_f1_test.py │ │ └── squad_em_and_f1_test.py │ ├── models │ │ ├── __init__.py │ │ ├── bidaf_ensemble_test.py │ │ ├── bidaf_test.py │ │ ├── dialog_qa_test.py │ │ ├── naqanet_test.py │ │ ├── qanet_test.py │ │ ├── quac_test.py │ │ ├── transformer_qa_test.py │ │ └── utils_test.py │ ├── modules │ │ ├── __init__.py │ │ └── seq2seq_encoders │ │ │ ├── __init__.py │ │ │ ├── multi_head_self_attention_test.py │ │ │ ├── qanet_encoder_test.py │ │ │ └── stacked_self_attention_test.py │ ├── predictors │ │ ├── __init__.py │ │ ├── bidaf_test.py │ │ ├── dialog_qa_test.py │ │ └── transformer_qa_test.py │ └── task_checklists │ │ ├── __init__.py │ │ └── question_answering_suite_test.py ├── structured_prediction │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── penn_tree_bank_test.py │ │ ├── srl_test.py │ │ └── universal_dependencies_test.py │ ├── metrics │ │ ├── __init__.py │ │ ├── span_based_f1_test.py │ │ └── srl_eval_scorer_test.py │ ├── models │ │ ├── __init__.py │ │ ├── bert_srl_test.py │ │ ├── biaffine_dependency_parser_test.py │ │ ├── constituency_parser_test.py │ │ ├── graph_parser_test.py │ │ └── semantic_role_labeling_test.py │ └── predictors │ │ ├── __init__.py │ │ ├── biaffine_dependency_parser_test.py │ │ ├── constituency_parser_test.py │ │ ├── openie_test.py │ │ └── srl_test.py ├── tagging │ ├── __init__.py │ ├── dataset_readers │ │ ├── __init__.py │ │ ├── ccgbank_test.py │ │ ├── conll2000_test.py │ │ └── ontonotes_ner_test.py │ └── models │ │ ├── __init__.py │ │ ├── crf_tagger_label_weights_test.py │ │ └── crf_tagger_test.py ├── training_config_test.py ├── version_test.py └── vision │ ├── __init__.py │ ├── dataset_readers │ ├── flickr30k_test.py │ ├── gqa_test.py │ ├── nlvr2_test.py │ ├── vgqa_test.py │ ├── vision_reader_test.py │ ├── visual_entailment_test.py │ └── vqav2_test.py │ ├── metrics │ └── vqa_test.py │ └── models │ ├── __init__.py │ ├── vilbert_ir_test.py │ ├── vilbert_multitask_test.py │ ├── vilbert_nlvr2_test.py │ ├── vilbert_vqa_test.py │ └── visual_entailment_test.py └── training_config ├── classification ├── basic_stanford_sentiment_treebank.jsonnet ├── biattentive_classification_network.jsonnet ├── biattentive_classification_network_elmo.jsonnet ├── boolq_roberta.jsonnet └── stanford_sentiment_treebank_roberta.jsonnet ├── coref ├── coref.jsonnet ├── coref_bert_lstm.jsonnet └── coref_spanbert_large.jsonnet ├── generation ├── bart_cnn_dm.jsonnet └── t5_cnn_dm.jsonnet ├── lm └── bidirectional_language_model.jsonnet ├── mc ├── commonsenseqa.jsonnet ├── piqa.jsonnet ├── piqa_tt.jsonnet └── swag.jsonnet ├── pair_classification ├── adversarial_binary_gender_bias_mitigated_snli_roberta.jsonnet ├── bimpm.jsonnet ├── binary_gender_bias_mitigated_snli_roberta.jsonnet ├── decomposable_attention.jsonnet ├── decomposable_attention_elmo.jsonnet ├── esim.jsonnet ├── esim_elmo.jsonnet ├── mnli_roberta.jsonnet ├── snli_roberta.jsonnet └── superglue_rte_roberta.jsonnet ├── rc ├── bidaf.jsonnet ├── bidaf_elmo.jsonnet ├── dialog_qa.jsonnet ├── naqanet.jsonnet ├── qanet.jsonnet └── transformer_qa.jsonnet ├── structured_prediction ├── bert_base_srl.jsonnet ├── constituency_parser.jsonnet ├── constituency_parser_elmo.jsonnet ├── constituency_parser_transformer_elmo.jsonnet ├── dependency_parser.jsonnet ├── semantic_dependencies.jsonnet ├── srl.jsonnet ├── srl_elmo.jsonnet └── srl_elmo_5.5B.jsonnet ├── tagging ├── fgner_transformer.jsonnet ├── fine-grained-ner.jsonnet ├── ner.jsonnet └── ner_elmo.jsonnet └── vision ├── vilbert_flickr30k_pretrained.jsonnet ├── vilbert_gqa_from_huggingface.jsonnet ├── vilbert_gqa_from_huggingface_balanced.jsonnet ├── vilbert_multitask.jsonnet ├── vilbert_nlvr2_head_pretrained.jsonnet ├── vilbert_nlvr2_pretrained.jsonnet ├── vilbert_ve_pretrained.jsonnet ├── vilbert_vgqa_pretrained.jsonnet ├── vilbert_vqa.jsonnet ├── vilbert_vqa_bert_large.jsonnet ├── vilbert_vqa_pretrained.jsonnet └── vilbert_vqa_pretrained_bert_large.jsonnet /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = tests/* 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | **.pyc 3 | **/__pycache__ 4 | **/.mypy_cache 5 | .pytest_cache 6 | .gitignore 7 | .git 8 | build 9 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 115 3 | 4 | ignore = 5 | # these rules don't play well with black 6 | E203 # whitespace before : 7 | W503 # line break before binary operator 8 | 9 | exclude = 10 | build/** 11 | doc/** 12 | 13 | per-file-ignores = 14 | # __init__.py files are allowed to have unused imports and lines-too-long 15 | allennlp_models/__init__.py:F401 16 | allennlp_models/**/__init__.py:F401,E501 17 | 18 | # tests don't have to respect 19 | # E501: line length 20 | # E261: 2 spaces before comments (it would be ideal if they did, though) 21 | # E731: do not assign a lambda expression, use a def 22 | # F401: unused imports 23 | tests/**:E501,E261,E731,F401 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # build artifacts 2 | 3 | .eggs/ 4 | .mypy_cache 5 | allennlp_models.egg-info/ 6 | build/ 7 | dist/ 8 | 9 | 10 | # dev tools 11 | 12 | .envrc 13 | .python-version 14 | .idea 15 | /*.iml 16 | 17 | 18 | # jupyter notebooks 19 | 20 | .ipynb_checkpoints 21 | 22 | 23 | # miscellaneous 24 | 25 | .cache/ 26 | allennlp/tools/EVALB/evalb.dSYM/ 27 | doc/_build/ 28 | *.swp 29 | .DS_Store 30 | /output_path/ 31 | 32 | 33 | # python 34 | 35 | *.pyc 36 | *.pyo 37 | __pycache__ 38 | 39 | 40 | # testing and continuous integration 41 | 42 | .coverage 43 | .pytest_cache/ 44 | coverage.xml 45 | 46 | # doc stuff 47 | mkdocs.yml 48 | docs/models 49 | docs/*.md 50 | site/ 51 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile creates an environment suitable for downstream usage of AllenNLP 2 | # with allennlp-models. It's built from official release images of AllenNLP with a wheel 3 | # install of the corresponding version of allennlp-models, and is used to publish 4 | # the official allennlp/models images. 5 | # 6 | # This is very similar to Dockerfile.release, except that allennlp-models isn't 7 | # installed from PyPI, it's installed from an arbitrary wheel build. 8 | # The reason for this difference is that this image is built during a release workflow 9 | # on a GitHub Actions job, which is required to succeed *before* the PyPI release is uploaded. 10 | 11 | ARG ALLENNLP_TAG 12 | 13 | FROM allennlp/allennlp:${ALLENNLP_TAG} 14 | 15 | # Install the wheel of allennlp-models. 16 | COPY dist dist/ 17 | RUN pip install --no-cache-dir $(ls dist/*.whl) && rm -rf dist/ 18 | -------------------------------------------------------------------------------- /Dockerfile.commit: -------------------------------------------------------------------------------- 1 | # This Dockerfile is used to build an image from specific commits of allennlp and 2 | # allennlp-models. It requires three build args: ALLENNLP_COMMIT, ALLENNLP_MODELS_COMMIT, 3 | # and CUDA. 4 | # ALLENNLP_COMMIT and ALLENNLP_MODELS_COMMIT should be set to the desired commit SHAs for each repo. 5 | # CUDA should be set to a supported CUDA version such as '10.2' or '11.0'. 6 | 7 | ARG ALLENNLP_COMMIT 8 | ARG CUDA 9 | 10 | FROM allennlp/commit:${ALLENNLP_COMMIT}-cuda${CUDA} 11 | 12 | ARG ALLENNLP_MODELS_COMMIT 13 | 14 | # Ensure allennlp isn't re-installed when we install allennlp-models. 15 | ENV ALLENNLP_VERSION_OVERRIDE allennlp 16 | 17 | # To be compatible with older versions of allennlp-models. 18 | ENV IGNORE_ALLENNLP_IN_SETUP true 19 | 20 | RUN pip install --no-cache-dir git+https://github.com/allenai/allennlp-models.git@${ALLENNLP_MODELS_COMMIT} 21 | -------------------------------------------------------------------------------- /Dockerfile.release: -------------------------------------------------------------------------------- 1 | # This Dockerfile is used to build an image from a specific PyPI release of allennlp and 2 | # allennlp-models. 3 | # It requires two build args: RELEASE and CUDA. 4 | # RELEASE should be a allennlp/allennlp-models version, such as '1.2.2', 5 | # CUDA should be a supported CUDA version, such '10.2'. 6 | 7 | ARG RELEASE 8 | ARG CUDA 9 | 10 | FROM allennlp/allennlp:v${RELEASE}-cuda${CUDA} 11 | 12 | # Need to specify this ARG again because the FROM stage consumes all args before it. 13 | ARG RELEASE 14 | 15 | RUN pip install --no-cache-dir allennlp-models==${RELEASE} 16 | -------------------------------------------------------------------------------- /Dockerfile.test: -------------------------------------------------------------------------------- 1 | # Used to build an image for running tests. 2 | FROM python:3.8 3 | 4 | ENV LC_ALL=C.UTF-8 5 | ENV LANG=C.UTF-8 6 | 7 | ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 8 | 9 | # Tell nvidia-docker the driver spec that we need as well as to 10 | # use all available devices, which are mounted at /usr/local/nvidia. 11 | # The LABEL supports an older version of nvidia-docker, the env 12 | # variables a newer one. 13 | ENV NVIDIA_VISIBLE_DEVICES all 14 | ENV NVIDIA_DRIVER_CAPABILITIES compute,utility 15 | LABEL com.nvidia.volumes.needed="nvidia_driver" 16 | 17 | WORKDIR /stage/allennlp-models 18 | 19 | # Our self-hosted runner currently has CUDA 11.0. 20 | RUN pip install --no-cache-dir torch==1.7.0+cu110 -f https://download.pytorch.org/whl/torch_stable.html 21 | 22 | COPY requirements.txt requirements.txt 23 | COPY dev-requirements.txt dev-requirements.txt 24 | COPY constraints.txt constraints.txt 25 | 26 | ARG ALLENNLP_COMMIT_SHA 27 | RUN pip install --no-cache-dir -r requirements.txt -c constraints.txt && \ 28 | pip install --no-cache-dir -r dev-requirements.txt -c constraints.txt 29 | 30 | COPY . . 31 | 32 | ENTRYPOINT ["make"] 33 | -------------------------------------------------------------------------------- /allennlp_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/__init__.py -------------------------------------------------------------------------------- /allennlp_models/classification/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.classification.models import * 3 | from allennlp_models.classification.dataset_readers import * 4 | -------------------------------------------------------------------------------- /allennlp_models/classification/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.classification.dataset_readers.stanford_sentiment_tree_bank import ( 2 | StanfordSentimentTreeBankDatasetReader, 3 | ) 4 | from allennlp_models.classification.dataset_readers.boolq import BoolQDatasetReader 5 | -------------------------------------------------------------------------------- /allennlp_models/classification/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.classification.models.biattentive_classification_network import ( 2 | BiattentiveClassificationNetwork, 3 | ) 4 | from allennlp_models.classification.models.transformer_classification_tt import ( 5 | TransformerClassificationTT, 6 | ) 7 | -------------------------------------------------------------------------------- /allennlp_models/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/common/__init__.py -------------------------------------------------------------------------------- /allennlp_models/coref/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Coreference resolution is defined as follows: given a document, find and cluster entity mentions. 3 | """ 4 | 5 | from allennlp_models.coref.dataset_readers.conll import ConllCorefReader 6 | from allennlp_models.coref.dataset_readers.preco import PrecoReader 7 | from allennlp_models.coref.dataset_readers.winobias import WinobiasReader 8 | from allennlp_models.coref.models.coref import CoreferenceResolver 9 | from allennlp_models.coref.predictors.coref import CorefPredictor 10 | -------------------------------------------------------------------------------- /allennlp_models/coref/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/coref/dataset_readers/__init__.py -------------------------------------------------------------------------------- /allennlp_models/coref/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/coref/metrics/__init__.py -------------------------------------------------------------------------------- /allennlp_models/coref/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.coref.models.coref import CoreferenceResolver 2 | -------------------------------------------------------------------------------- /allennlp_models/coref/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/coref/predictors/__init__.py -------------------------------------------------------------------------------- /allennlp_models/generation/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.generation.modules import * 3 | from allennlp_models.generation.predictors import * 4 | from allennlp_models.generation.models import * 5 | from allennlp_models.generation.dataset_readers import * 6 | -------------------------------------------------------------------------------- /allennlp_models/generation/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.generation.dataset_readers.copynet_seq2seq import CopyNetDatasetReader 2 | from allennlp_models.generation.dataset_readers.seq2seq import Seq2SeqDatasetReader 3 | from allennlp_models.generation.dataset_readers.cnn_dm import CNNDailyMailDatasetReader 4 | -------------------------------------------------------------------------------- /allennlp_models/generation/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.generation.models.composed_seq2seq import ComposedSeq2Seq 2 | from allennlp_models.generation.models.copynet_seq2seq import CopyNetSeq2Seq 3 | from allennlp_models.generation.models.simple_seq2seq import SimpleSeq2Seq 4 | from allennlp_models.generation.models.bart import Bart 5 | from allennlp_models.generation.models.t5 import T5 6 | -------------------------------------------------------------------------------- /allennlp_models/generation/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.generation.modules.decoder_nets import * 3 | from allennlp_models.generation.modules.seq_decoders import * 4 | -------------------------------------------------------------------------------- /allennlp_models/generation/modules/decoder_nets/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.generation.modules.decoder_nets.lstm_cell import LstmCellDecoderNet 2 | from allennlp_models.generation.modules.decoder_nets.decoder_net import DecoderNet 3 | from allennlp_models.generation.modules.decoder_nets.stacked_self_attention import ( 4 | StackedSelfAttentionDecoderNet, 5 | ) 6 | -------------------------------------------------------------------------------- /allennlp_models/generation/modules/seq_decoders/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.generation.modules.seq_decoders.auto_regressive import AutoRegressiveSeqDecoder 2 | from allennlp_models.generation.modules.seq_decoders.seq_decoder import SeqDecoder 3 | -------------------------------------------------------------------------------- /allennlp_models/generation/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.generation.predictors.seq2seq import Seq2SeqPredictor 2 | -------------------------------------------------------------------------------- /allennlp_models/lm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.lm.dataset_readers import * 3 | from allennlp_models.lm.models import * 4 | from allennlp_models.lm.modules import * 5 | from allennlp_models.lm.predictors import * 6 | from allennlp_models.lm.util import * 7 | -------------------------------------------------------------------------------- /allennlp_models/lm/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.dataset_readers.masked_language_model import MaskedLanguageModelingReader 2 | from allennlp_models.lm.dataset_readers.next_token_lm import NextTokenLMReader 3 | from allennlp_models.lm.dataset_readers.simple_language_modeling import ( 4 | SimpleLanguageModelingDatasetReader, 5 | ) 6 | -------------------------------------------------------------------------------- /allennlp_models/lm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.models.bidirectional_lm import BidirectionalLanguageModel 2 | from allennlp_models.lm.models.language_model import LanguageModel 3 | from allennlp_models.lm.models.masked_language_model import MaskedLanguageModel 4 | from allennlp_models.lm.models.next_token_lm import NextTokenLM 5 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.lm.modules.seq2seq_encoders import * 3 | from allennlp_models.lm.modules.language_model_heads import * 4 | from allennlp_models.lm.modules.token_embedders import * 5 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/language_model_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.modules.language_model_heads.language_model_head import LanguageModelHead 2 | from allennlp_models.lm.modules.language_model_heads.bert import BertLanguageModelHead 3 | from allennlp_models.lm.modules.language_model_heads.gpt2 import Gpt2LanguageModelHead 4 | from allennlp_models.lm.modules.language_model_heads.linear import LinearLanguageModelHead 5 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/language_model_heads/language_model_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from allennlp.common import Registrable 4 | 5 | 6 | class LanguageModelHead(torch.nn.Module, Registrable): 7 | """ 8 | A `LanguageModelHead` encapsulates a function that goes from some hidden state to logits over 9 | a vocabulary. 10 | """ 11 | 12 | def get_input_dim(self) -> int: 13 | raise NotImplementedError 14 | 15 | def get_output_dim(self) -> int: 16 | raise NotImplementedError 17 | 18 | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # type: ignore 19 | raise NotImplementedError 20 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/language_model_heads/linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from allennlp.data import Vocabulary 4 | from allennlp_models.lm.modules.language_model_heads.language_model_head import LanguageModelHead 5 | 6 | 7 | @LanguageModelHead.register("linear") 8 | class LinearLanguageModelHead(LanguageModelHead): 9 | """ 10 | Uses `torch.nn.Linear` as a language model head. Does nothing else fancy. This was intended 11 | largely for testing code with small models and simple components. It's likely that you would 12 | want something nicer for actually training a language model, such as tying weights with an 13 | input embedding, or an adaptive softmax, or something. 14 | """ 15 | 16 | def __init__(self, vocab: Vocabulary, input_dim: int, vocab_namespace: str) -> None: 17 | super().__init__() 18 | self.input_dim = input_dim 19 | self.output_dim = vocab.get_vocab_size(vocab_namespace) 20 | if self.output_dim <= 0: 21 | raise ValueError("We can't embed into an empty vocabulary.") 22 | self.linear = torch.nn.Linear(self.input_dim, self.output_dim) 23 | 24 | def get_input_dim(self) -> int: 25 | return self.input_dim 26 | 27 | def get_output_dim(self) -> int: 28 | return self.output_dim 29 | 30 | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: 31 | return self.linear(hidden_states) 32 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/seq2seq_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.modules.seq2seq_encoders.bidirectional_lm_transformer import ( 2 | BidirectionalLanguageModelTransformer, 3 | ) 4 | -------------------------------------------------------------------------------- /allennlp_models/lm/modules/token_embedders/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.modules.token_embedders.language_model import LanguageModelTokenEmbedder 2 | from allennlp_models.lm.modules.token_embedders.bidirectional_lm import ( 3 | BidirectionalLanguageModelTokenEmbedder, 4 | ) 5 | -------------------------------------------------------------------------------- /allennlp_models/lm/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.lm.predictors.masked_language_model import MaskedLanguageModelPredictor 2 | from allennlp_models.lm.predictors.next_token_lm import NextTokenLMPredictor 3 | -------------------------------------------------------------------------------- /allennlp_models/lm/predictors/masked_language_model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | 4 | import numpy 5 | 6 | from allennlp.common.util import JsonDict 7 | from allennlp.data import Instance, Token 8 | from allennlp.data.fields import TextField 9 | from allennlp.predictors.predictor import Predictor 10 | 11 | 12 | @Predictor.register("masked_language_model") 13 | class MaskedLanguageModelPredictor(Predictor): 14 | def predict(self, sentence_with_masks: str) -> JsonDict: 15 | return self.predict_json({"sentence": sentence_with_masks}) 16 | 17 | def predictions_to_labeled_instances( 18 | self, instance: Instance, outputs: Dict[str, numpy.ndarray] 19 | ): 20 | new_instance = instance.duplicate() 21 | token_field: TextField = instance["tokens"] # type: ignore 22 | mask_targets = [Token(target_top_k[0]) for target_top_k in outputs["words"]] 23 | 24 | new_instance.add_field( 25 | "target_ids", 26 | TextField(mask_targets, token_field._token_indexers), 27 | vocab=self._model.vocab, 28 | ) 29 | return [new_instance] 30 | 31 | def _json_to_instance(self, json_dict: JsonDict) -> Instance: 32 | """ 33 | Expects JSON that looks like `{"sentence": "..."}`. 34 | """ 35 | sentence = json_dict["sentence"] 36 | return self._dataset_reader.text_to_instance(sentence=sentence) # type: ignore 37 | -------------------------------------------------------------------------------- /allennlp_models/lm/predictors/next_token_lm.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | 4 | import numpy 5 | 6 | from allennlp.common.util import JsonDict 7 | from allennlp.data import Instance, Token 8 | from allennlp.data.fields import TextField 9 | from allennlp.predictors.predictor import Predictor 10 | 11 | 12 | @Predictor.register("next_token_lm") 13 | class NextTokenLMPredictor(Predictor): 14 | def predict(self, sentence: str) -> JsonDict: 15 | return self.predict_json({"sentence": sentence}) 16 | 17 | def predictions_to_labeled_instances( 18 | self, instance: Instance, outputs: Dict[str, numpy.ndarray] 19 | ): 20 | new_instance = instance.duplicate() 21 | token_field: TextField = instance["tokens"] # type: ignore 22 | mask_targets = [Token(target_top_k[0]) for target_top_k in outputs["top_tokens"][0]] 23 | 24 | new_instance.add_field( 25 | "target_ids", 26 | TextField(mask_targets, token_field._token_indexers), 27 | vocab=self._model.vocab, 28 | ) 29 | return [new_instance] 30 | 31 | def _json_to_instance(self, json_dict: JsonDict) -> Instance: 32 | """ 33 | Expects JSON that looks like `{"sentence": "..."}`. 34 | """ 35 | sentence = json_dict["sentence"] 36 | return self._dataset_reader.text_to_instance(sentence=sentence) # type: ignore 37 | -------------------------------------------------------------------------------- /allennlp_models/lm/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .beam_search_generators import * # noqa: F403 2 | -------------------------------------------------------------------------------- /allennlp_models/lm/util/beam_search_generators/__init__.py: -------------------------------------------------------------------------------- 1 | from .beam_search_generator import BeamSearchGenerator 2 | from .transformer_beam_search_generator import TransformerBeamSearchGenerator 3 | -------------------------------------------------------------------------------- /allennlp_models/mc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/mc/__init__.py -------------------------------------------------------------------------------- /allennlp_models/mc/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.mc.dataset_readers.swag import SwagReader 2 | from allennlp_models.mc.dataset_readers.commonsenseqa import CommonsenseQaReader 3 | from allennlp_models.mc.dataset_readers.piqa import PiqaReader 4 | -------------------------------------------------------------------------------- /allennlp_models/mc/dataset_readers/commonsenseqa.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from allennlp.data import DatasetReader 4 | 5 | 6 | from allennlp_models.mc.dataset_readers.transformer_mc import TransformerMCReader 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @DatasetReader.register("commonsenseqa") 12 | class CommonsenseQaReader(TransformerMCReader): 13 | """ 14 | Reads the input data for the CommonsenseQA dataset (https://arxiv.org/abs/1811.00937). 15 | """ 16 | 17 | def _read(self, file_path: str): 18 | from allennlp.common.file_utils import cached_path 19 | 20 | file_path = cached_path(file_path) 21 | 22 | logger.info("Reading file at %s", file_path) 23 | from allennlp.common.file_utils import json_lines_from_file 24 | 25 | for json in json_lines_from_file(file_path): 26 | choices = [(choice["label"], choice["text"]) for choice in json["question"]["choices"]] 27 | correct_choice = [ 28 | i for i, (label, _) in enumerate(choices) if label == json["answerKey"] 29 | ][0] 30 | yield self.text_to_instance( 31 | json["id"], json["question"]["stem"], [c[1] for c in choices], correct_choice 32 | ) 33 | -------------------------------------------------------------------------------- /allennlp_models/mc/dataset_readers/piqa_tt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from allennlp.data import DatasetReader 4 | 5 | from allennlp_models.mc.dataset_readers.piqa import PiqaReader 6 | from allennlp_models.mc.dataset_readers.transformer_mc_tt import ( 7 | TransformerMCReaderTransformerToolkit, 8 | ) 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @DatasetReader.register("piqa_tt") 14 | class PiqaReaderTransformerToolkit(TransformerMCReaderTransformerToolkit, PiqaReader): 15 | pass 16 | -------------------------------------------------------------------------------- /allennlp_models/mc/dataset_readers/swag.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from allennlp.data import DatasetReader 4 | 5 | 6 | from allennlp_models.mc.dataset_readers.transformer_mc import TransformerMCReader 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @DatasetReader.register("swag") 12 | class SwagReader(TransformerMCReader): 13 | """ 14 | Reads the input data for the SWAG dataset (https://arxiv.org/abs/1808.05326). 15 | """ 16 | 17 | def _read(self, file_path: str): 18 | from allennlp.common.file_utils import cached_path 19 | 20 | file_path = cached_path(file_path) 21 | logger.info("Reading file at %s", file_path) 22 | 23 | with open(file_path, "r", encoding="utf-8") as f: 24 | import csv 25 | 26 | for line_number, line in enumerate(csv.reader(f)): 27 | if line_number == 0: 28 | continue 29 | 30 | yield self.text_to_instance( 31 | qid=line[1], start=line[3], alternatives=line[7:11], label=int(line[11]) 32 | ) 33 | -------------------------------------------------------------------------------- /allennlp_models/mc/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.mc.models.transformer_mc import TransformerMC 2 | from allennlp_models.mc.models.transformer_mc_tt import TransformerMCTransformerToolkit 3 | -------------------------------------------------------------------------------- /allennlp_models/mc/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.mc.predictors.transformer_mc import TransformerMCPredictor 2 | -------------------------------------------------------------------------------- /allennlp_models/mc/predictors/transformer_mc.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | from allennlp.common.util import JsonDict 5 | from allennlp.data import Instance 6 | from allennlp.predictors.predictor import Predictor 7 | 8 | 9 | @Predictor.register("transformer_mc") 10 | class TransformerMCPredictor(Predictor): 11 | """ 12 | Predictor for the :class:`~allennlp_models.mc.models.TransformerMC` model. 13 | """ 14 | 15 | def predict(self, prefix: str, alternatives: List[str]) -> JsonDict: 16 | return self.predict_json({"prefix": prefix, "alternatives": alternatives}) 17 | 18 | def _json_to_instance(self, json_dict: JsonDict) -> Instance: 19 | return self._dataset_reader.text_to_instance( 20 | "no_qid", json_dict["prefix"], json_dict["alternatives"] 21 | ) 22 | -------------------------------------------------------------------------------- /allennlp_models/pair_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.pair_classification.dataset_readers import * 3 | from allennlp_models.pair_classification.models import * 4 | from allennlp_models.pair_classification.predictors import * 5 | -------------------------------------------------------------------------------- /allennlp_models/pair_classification/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.pair_classification.dataset_readers.quora_paraphrase import ( 2 | QuoraParaphraseDatasetReader, 3 | ) 4 | from allennlp_models.pair_classification.dataset_readers.snli import SnliReader 5 | from allennlp_models.pair_classification.dataset_readers.transformer_superglue_rte import ( 6 | TransformerSuperGlueRteReader, 7 | ) 8 | -------------------------------------------------------------------------------- /allennlp_models/pair_classification/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.pair_classification.models.bimpm import BiMpm 2 | from allennlp_models.pair_classification.models.decomposable_attention import DecomposableAttention 3 | from allennlp_models.pair_classification.models.esim import ESIM 4 | -------------------------------------------------------------------------------- /allennlp_models/pair_classification/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.pair_classification.predictors.textual_entailment import ( 2 | TextualEntailmentPredictor, 3 | ) 4 | -------------------------------------------------------------------------------- /allennlp_models/rc/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.rc.models import * 3 | from allennlp_models.rc.predictors import * 4 | from allennlp_models.rc.dataset_readers import * 5 | from allennlp_models.rc.modules import * 6 | -------------------------------------------------------------------------------- /allennlp_models/rc/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.rc.dataset_readers.drop import DropReader 2 | from allennlp_models.rc.dataset_readers.qangaroo import QangarooReader 3 | from allennlp_models.rc.dataset_readers.quac import QuACReader 4 | from allennlp_models.rc.dataset_readers.squad import SquadReader 5 | from allennlp_models.rc.dataset_readers.transformer_squad import TransformerSquadReader 6 | from allennlp_models.rc.dataset_readers.triviaqa import TriviaQaReader 7 | -------------------------------------------------------------------------------- /allennlp_models/rc/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.rc.metrics.drop_em_and_f1 import DropEmAndF1 2 | from allennlp_models.rc.metrics.squad_em_and_f1 import SquadEmAndF1 3 | -------------------------------------------------------------------------------- /allennlp_models/rc/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.rc.models.bidaf_ensemble import BidafEnsemble 2 | from allennlp_models.rc.models.bidaf import BidirectionalAttentionFlow 3 | from allennlp_models.rc.models.dialog_qa import DialogQA 4 | from allennlp_models.rc.models.naqanet import NumericallyAugmentedQaNet 5 | from allennlp_models.rc.models.qanet import QaNet 6 | from allennlp_models.rc.models.transformer_qa import TransformerQA 7 | from allennlp_models.rc.models.utils import get_best_span 8 | -------------------------------------------------------------------------------- /allennlp_models/rc/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.rc.modules.seq2seq_encoders import * 3 | -------------------------------------------------------------------------------- /allennlp_models/rc/modules/seq2seq_encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.rc.modules.seq2seq_encoders.multi_head_self_attention import ( 2 | MultiHeadSelfAttention, 3 | ) 4 | from allennlp_models.rc.modules.seq2seq_encoders.qanet_encoder import QaNetEncoder 5 | from allennlp_models.rc.modules.seq2seq_encoders.stacked_self_attention import ( 6 | StackedSelfAttentionEncoder, 7 | ) 8 | -------------------------------------------------------------------------------- /allennlp_models/rc/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.rc.predictors.bidaf import ReadingComprehensionPredictor 2 | from allennlp_models.rc.predictors.dialog_qa import DialogQAPredictor 3 | from allennlp_models.rc.predictors.transformer_qa import TransformerQAPredictor 4 | -------------------------------------------------------------------------------- /allennlp_models/rc/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/rc/tools/__init__.py -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.structured_prediction.predictors import * 3 | from allennlp_models.structured_prediction.dataset_readers import * 4 | from allennlp_models.structured_prediction.metrics import * 5 | from allennlp_models.structured_prediction.models import * 6 | -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.structured_prediction.dataset_readers.penn_tree_bank import ( 2 | PennTreeBankConstituencySpanDatasetReader, 3 | ) 4 | from allennlp_models.structured_prediction.dataset_readers.semantic_dependencies import ( 5 | SemanticDependenciesDatasetReader, 6 | ) 7 | from allennlp_models.structured_prediction.dataset_readers.srl import SrlReader 8 | from allennlp_models.structured_prediction.dataset_readers.universal_dependencies import ( 9 | UniversalDependenciesDatasetReader, 10 | ) 11 | -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.structured_prediction.metrics.srl_eval_scorer import SrlEvalScorer 2 | -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.structured_prediction.models.biaffine_dependency_parser import ( 2 | BiaffineDependencyParser, 3 | ) 4 | from allennlp_models.structured_prediction.models.constituency_parser import SpanConstituencyParser 5 | from allennlp_models.structured_prediction.models.graph_parser import GraphParser 6 | from allennlp_models.structured_prediction.models.srl import SemanticRoleLabeler 7 | from allennlp_models.structured_prediction.models.srl_bert import SrlBert 8 | -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.structured_prediction.predictors.biaffine_dependency_parser import ( 2 | BiaffineDependencyParserPredictor, 3 | ) 4 | from allennlp_models.structured_prediction.predictors.constituency_parser import ( 5 | ConstituencyParserPredictor, 6 | ) 7 | from allennlp_models.structured_prediction.predictors.openie import OpenIePredictor 8 | from allennlp_models.structured_prediction.predictors.srl import SemanticRoleLabelerPredictor 9 | -------------------------------------------------------------------------------- /allennlp_models/structured_prediction/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/allennlp_models/structured_prediction/tools/__init__.py -------------------------------------------------------------------------------- /allennlp_models/tagging/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.tagging.predictors import * 3 | from allennlp_models.tagging.models import * 4 | from allennlp_models.tagging.dataset_readers import * 5 | -------------------------------------------------------------------------------- /allennlp_models/tagging/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.tagging.dataset_readers.ccgbank import CcgBankDatasetReader 2 | from allennlp_models.tagging.dataset_readers.conll2000 import Conll2000DatasetReader 3 | from allennlp_models.tagging.dataset_readers.conll2003 import Conll2003DatasetReader 4 | from allennlp_models.tagging.dataset_readers.ontonotes_ner import OntonotesNamedEntityRecognition 5 | -------------------------------------------------------------------------------- /allennlp_models/tagging/dataset_readers/conll2003.py: -------------------------------------------------------------------------------- 1 | from allennlp.data.dataset_readers.conll2003 import Conll2003DatasetReader # noqa: F401 2 | 3 | # This component lives in the main repo because we need it there for tests. 4 | -------------------------------------------------------------------------------- /allennlp_models/tagging/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.tagging.models.crf_tagger import CrfTagger 2 | -------------------------------------------------------------------------------- /allennlp_models/tagging/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.tagging.predictors.sentence_tagger import SentenceTaggerPredictor 2 | -------------------------------------------------------------------------------- /allennlp_models/tagging/predictors/sentence_tagger.py: -------------------------------------------------------------------------------- 1 | from allennlp.predictors.sentence_tagger import SentenceTaggerPredictor # noqa: F401 2 | 3 | # This component lives in the main repo because we need it there for tests. 4 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/constituency_parsing.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "constituency-parsing", 3 | "name": "Constituency Parsing", 4 | "description": "Constituency parsing is the task of breaking a text into sub-phrases, or constituents. Non-terminals in the parse tree are types of phrases, the terminals are the words in the sentence.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": "Subwords or constituents.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "Pierre Vinken died aged 81; immortalised aged 61."}, 10 | {"sentence" : "James went to the corner shop to buy some eggs, milk and bread for breakfast."}, 11 | {"sentence" : "If you bring $10 with you tomorrow, can you pay for me to eat too?"}, 12 | {"sentence" : "True self-control is waiting until the movie starts to eat your popcorn."}, 13 | ], 14 | } 15 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/dependency_parsing.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "dependency-parsing", 3 | "name": "Dependency Parsing", 4 | "description": "Dependency parsing is the task of analyzing the grammatical structure of a sentence and establishing the relationships between \"head\" words and the words which modify those heads.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": null, 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "James ate some cheese whilst thinking about the play."}, 10 | {"sentence" : "She decided not to take the house she'd viewed yesterday."}, 11 | {"sentence" : "The proportion of PepsiCo’s revenue coming from healthier food and beverages has risen from 38% in 2006 to 50%."}, 12 | {"sentence" : "CRISPR-Cas9 is a versatile genome editing technology for studying the functions of genetic elements."} 13 | ], 14 | } 15 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/language_modeling.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "language-modeling", 3 | "name": "Language Modeling", 4 | "description": "Language modeling is the task of determining the probability of a given sequence of words occurring in a sentence.", 5 | "expected_inputs": "Text string with a few initial tokens.", 6 | "expected_outputs": "A probability distribution over the vocabulary for the next word.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence": "AllenNLP is "}, 10 | {"sentence": "The woman wept because she was "}, 11 | {"sentence": "My dog is very "}, 12 | {"sentence": "The fire burned the "}, 13 | {"sentence": "The doctor ran to the emergency room to see the "} 14 | ], 15 | } 16 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/masked_language_modeling.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "masked-language-modeling", 3 | "name": "Masked Language Modeling", 4 | "description": "Masked language modeling is a fill-in-the-blank task, where a model uses the context words surrounding a [MASK] token to try to predict what the [MASK] word should be.", 5 | "expected_inputs": "Text string with one or more [MASK] tokens.", 6 | "expected_outputs": "Most likely word subsitutions for each [MASK] token.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence": "The doctor ran to the emergency room to see [MASK] patient."}, 10 | {"sentence": "The [MASK] blocked the [MASK]."}, 11 | {"sentence": "The woman wept because she was [MASK]."}, 12 | {"sentence": "The [MASK] burned the [MASK] quickly."}, 13 | {"sentence": "My dog is [MASK] and likes to [MASK] the entire [MASK]."} 14 | ], 15 | } 16 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/ner.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "ner", 3 | "name": "Named Entity Recognition", 4 | "description": "Named Entity Recognition is the task of identifying named entities (people, locations, organizations, etc.) in the input text.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": "The output is all the identified named entities (which can be one or more words) in the text.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "This shirt was bought at Grandpa Joe's in downtown Deep Learning."}, 10 | {"sentence" : "AllenNLP is a PyTorch-based natural language processing library developed at the Allen Institute for Artificial Intelligence in Seattle."}, 11 | {"sentence" : "Did Uriah honestly think he could beat The Legend of Zelda in under three hours?"}, 12 | {"sentence" : "Michael Jordan is a professor at Berkeley."}, 13 | {"sentence" : "My preferred candidate is Cary Moon, but she won't be the next mayor of Seattle."}, 14 | {"sentence" : "If you like Paul McCartney you should listen to the first Wings album."}, 15 | {"sentence" : "When I told John that I wanted to move to Alaska, he warned me that I'd have trouble finding a Starbucks there."} 16 | ], 17 | } 18 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/oie.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "oie", 3 | "name": "Open Information Extraction", 4 | "description": "Given an input sentence, Open Information Extraction (Open IE) extracts a list of propositions, each composed of a single predicate and an arbitrary number of arguments. These extractions break syntactically complex sentences into the relationships they express, which can then be used for various downstream tasks.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": "A list of propositions.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "In December, John decided to join the party."}, 10 | {"sentence" : "Bob agreed to take out the trash"}, 11 | {"sentence" : "Alex Honnold climbed up a New Jersey skyscraper."}, 12 | {"sentence" : "Albert Einstein, a German theoretical physicist, published the theory of relativity in 1915."}, 13 | {"sentence" : "Chair umpire Ramos managed to rob two players in the U.S. Open final."}, 14 | {"sentence" : "The CEO of a multi-million dollar company doesn't have much free time."} 15 | ], 16 | } 17 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/sentiment_analysis.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "sentiment-analysis", 3 | "name": "Sentiment Analysis", 4 | "description": "Sentiment Analysis is the task of interpreting and classifying emotions (positive or negative) in the input text.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": "The probability of the sentiment being positive/negative.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "a very well-made, funny and entertaining picture." }, 10 | {"sentence" : "so unremittingly awful that labeling it a dog probably constitutes cruelty to canines" }, 11 | {"sentence" : "all the amped up tony hawk style stunts and thrashing rap-metal can't disguise the fact that, really, we've been here, done that."}, 12 | {"sentence" : "visually imaginative, thematically instructive and thoroughly delightful, it takes us on a roller-coaster ride from innocence to experience without even a hint of that typical kiddie-flick sentimentality."} 13 | ], 14 | } 15 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/srl.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "srl", 3 | "name": "Semantic Role Labeling", 4 | "description": "Semantic Role Labeling (SRL) is the task of determining the latent predicate argument structure of a sentence and providing representations that can answer basic questions about sentence meaning, including who did what to whom, etc.", 5 | "expected_inputs": "The task expects an input sentence.", 6 | "expected_outputs": null, 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | {"sentence" : "The keys, which were needed to access the building, were locked in the car."}, 10 | {"sentence" : "However, voters decided that if the stadium was such a good idea someone would build it himself, and rejected it 59% to 41%."}, 11 | {"sentence" : "Did Uriah honestly think he could beat the game in under three hours?"}, 12 | {"sentence" : "If you liked the music we were playing last night, you will absolutely love what we're playing tomorrow!"}, 13 | {"sentence" : "More than a few CEOs say the red-carpet treatment tempts them to return to a heartland city for future meetings."}, 14 | ], 15 | } 16 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/taskcard_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "task-id", 3 | "name": null, 4 | "description": null, 5 | "expected_inputs": null, 6 | "expected_outputs": null, 7 | "scope_and_limitations": null, 8 | "examples": null, 9 | } 10 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/ve.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "ve", 3 | "name": "Visual Entailment", 4 | "description": "Visual Entailment (VE) is an inference task on image-sentence pairs. The goal is to predict whether the image (premise) semantically entails the sentence (hypothesis).", 5 | "expected_inputs": "The task expects a `premise` image and a `hypothesis` sentence.", 6 | "expected_outputs": "The expected output is whether the `premise` entails/contradicts/is neutral towards the `hypothesis`.", 7 | "scope_and_limitations": null, 8 | "examples": null, 9 | } 10 | -------------------------------------------------------------------------------- /allennlp_models/taskcards/vqa.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "vqa", 3 | "name": "Visual Question Answering", 4 | "description": "Visual Question Answering (VQA) is the task of generating a answer in response to a natural language question about the contents of an image. VQA models are typically trained and evaluated on datasets such as VQA2.0, GQA, Visual7W and VizWiz.", 5 | "expected_inputs": "The task requires an input image and a corresponding free text question about the image.", 6 | "expected_outputs": "A free text answer.", 7 | "scope_and_limitations": null, 8 | "examples": [ 9 | { 10 | "image" : "https://storage.googleapis.com/allennlp-public-data/vqav2/vqa-examples/baseball_game.jpg", 11 | "question" : "What game are they playing?" 12 | }, 13 | { 14 | "image": "https://storage.googleapis.com/allennlp-public-data/vqav2/vqa-examples/bus_stop.jpg", 15 | "question": "What are the people waiting for?" 16 | }, 17 | { 18 | "image": "https://storage.googleapis.com/allennlp-public-data/vqav2/vqa-examples/kitchen.jpg", 19 | "question": "What is in the bowls on the island?" 20 | }, 21 | { 22 | "image": "https://storage.googleapis.com/allennlp-public-data/vqav2/vqa-examples/living_room.jpg", 23 | "question": "What color is the pillow in the middle?" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /allennlp_models/version.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | _MAJOR = "2" 4 | _MINOR = "10" 5 | _PATCH = "1" 6 | # This is mainly for nightly builds which have the suffix ".dev$DATE". See 7 | # https://semver.org/#is-v123-a-semantic-version for the semantics. 8 | _SUFFIX = os.environ.get("ALLENNLP_MODELS_VERSION_SUFFIX", "") 9 | 10 | VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR) 11 | VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX) 12 | -------------------------------------------------------------------------------- /allennlp_models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F403 2 | from allennlp_models.vision.models import * 3 | from allennlp_models.vision.predictors import * 4 | from allennlp_models.vision.dataset_readers import * 5 | from allennlp_models.vision.metrics import * 6 | -------------------------------------------------------------------------------- /allennlp_models/vision/dataset_readers/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.vision.dataset_readers.vision_reader import VisionReader 2 | from allennlp_models.vision.dataset_readers.gqa import GQAReader 3 | from allennlp_models.vision.dataset_readers.nlvr2 import Nlvr2Reader 4 | from allennlp_models.vision.dataset_readers.vgqa import VGQAReader 5 | from allennlp_models.vision.dataset_readers.vqav2 import VQAv2Reader 6 | from allennlp_models.vision.dataset_readers.visual_entailment import VisualEntailmentReader 7 | from allennlp_models.vision.dataset_readers.flickr30k import Flickr30kReader 8 | -------------------------------------------------------------------------------- /allennlp_models/vision/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.vision.metrics.vqa import VqaMeasure 2 | -------------------------------------------------------------------------------- /allennlp_models/vision/models/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.vision.models.nlvr2 import Nlvr2Model 2 | from allennlp_models.vision.models.vision_text_model import VisionTextModel 3 | from allennlp_models.vision.models.visual_entailment import VisualEntailmentModel 4 | from allennlp_models.vision.models.vilbert_image_retrieval import ImageRetrievalVilbert 5 | from allennlp_models.vision.models.vilbert_vqa import VqaVilbert 6 | from allennlp_models.vision.models.heads.vqa_head import VqaHead 7 | from allennlp_models.vision.models.heads.visual_entailment_head import VisualEntailmentHead 8 | -------------------------------------------------------------------------------- /allennlp_models/vision/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.vision.models.heads.nlvr2_head import Nlvr2Head 2 | from allennlp_models.vision.models.heads.vqa_head import VqaHead 3 | from allennlp_models.vision.models.heads.visual_entailment_head import VisualEntailmentHead 4 | -------------------------------------------------------------------------------- /allennlp_models/vision/predictors/__init__.py: -------------------------------------------------------------------------------- 1 | from allennlp_models.vision.predictors.vilbert_vqa import VilbertVqaPredictor 2 | from allennlp_models.vision.predictors.visual_entailment import VisualEntailmentPredictor 3 | -------------------------------------------------------------------------------- /allennlp_models/vision/predictors/vilbert_vqa.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | 4 | import numpy 5 | 6 | from allennlp.common.file_utils import cached_path 7 | from allennlp.common.util import JsonDict 8 | from allennlp.data import Instance 9 | from allennlp.predictors.predictor import Predictor 10 | 11 | 12 | @Predictor.register("vilbert_vqa") 13 | class VilbertVqaPredictor(Predictor): 14 | def predict(self, image: str, question: str) -> JsonDict: 15 | image = cached_path(image) 16 | return self.predict_json({"question": question, "image": image}) 17 | 18 | def _json_to_instance(self, json_dict: JsonDict) -> Instance: 19 | from allennlp_models.vision.dataset_readers.vqav2 import VQAv2Reader 20 | from allennlp_models.vision import GQAReader 21 | 22 | question = json_dict["question"] 23 | image = cached_path(json_dict["image"]) 24 | if isinstance(self._dataset_reader, VQAv2Reader) or isinstance( 25 | self._dataset_reader, GQAReader 26 | ): 27 | return self._dataset_reader.text_to_instance(question, image, use_cache=False) 28 | else: 29 | raise ValueError( 30 | f"Dataset reader is of type f{self._dataset_reader.__class__.__name__}. " 31 | f"Expected {VQAv2Reader.__name__}." 32 | ) 33 | 34 | def predictions_to_labeled_instances( 35 | self, instance: Instance, outputs: Dict[str, numpy.ndarray] 36 | ) -> List[Instance]: 37 | return [instance] # TODO 38 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | precision: 0 3 | round: down 4 | status: 5 | patch: 6 | default: 7 | target: 90 8 | informational: true 9 | project: 10 | default: 11 | threshold: 1% 12 | informational: true 13 | changes: false 14 | comment: false 15 | ignore: 16 | - "tests/" 17 | -------------------------------------------------------------------------------- /constraints.txt: -------------------------------------------------------------------------------- 1 | torch<1.13.0 2 | 3 | # For structured prediction. 4 | conllu==4.4.2 5 | 6 | # Copied from https://github.com/allenai/allennlp/blob/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/constraints.txt 7 | ################################ 8 | ###### Core dependencies ####### 9 | ################################ 10 | torch<1.13.0 11 | torchvision<0.14.0 12 | cached-path<1.2.0 13 | spacy<3.4 14 | transformers<4.21 15 | filelock<3.8 16 | wandb<0.13.0 17 | 18 | # Protobuf is a dependency of wandb and tensorboard, but they are missing this pin. 19 | protobuf<4.0.0 20 | 21 | # Required so pip-compile can properly resolve the pydantic version 22 | inflect<6.0 23 | 24 | ################################################## 25 | ###### Extra dependencies for integrations ####### 26 | ################################################## 27 | # NOTE: we use a special trailing comment on each line to denote which extras 28 | # each package is needed by. For example, checklist is needed by the 'checklist' extra 29 | # that you install with 'pip install allennlp[checklist]'. 30 | checklist==0.0.11 # needed by: checklist 31 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | #### TESTING-RELATED PACKAGES #### 2 | 3 | # Checks style, syntax, and other useful errors. 4 | flake8==5.0.4 5 | 6 | # Static type checking 7 | mypy==0.961 8 | 9 | # Automatic code formatting 10 | black 11 | 12 | # Running unit tests. 13 | pytest 14 | 15 | # Allows generation of coverage reports with pytest. 16 | pytest-cov 17 | 18 | # Lets you run tests in forked subprocesses. 19 | pytest-forked 20 | 21 | # Lets you run tests in parallel. 22 | pytest-xdist 23 | 24 | # Allows codecov to generate coverage reports 25 | coverage 26 | codecov 27 | 28 | # For running tests that aren't 100% reliable 29 | flaky 30 | 31 | #### DOC-RELATED PACKAGES #### 32 | 33 | # YAML manipulation 34 | ruamel.yaml 35 | 36 | # Generating markdown files from Python modules. 37 | pydoc-markdown>=4.0.0,<5.0.0 38 | databind.core 39 | docspec<2.0.0 40 | docspec-python<2.0.0 41 | 42 | mkdocs==1.3.0 43 | mkdocs-material>=5.5.0,<8.4.0 44 | markdown-include==0.6.0 45 | 46 | #### PACKAGE-UPLOAD PACKAGES #### 47 | 48 | # Pypi uploads 49 | twine>=1.11.0 50 | setuptools 51 | wheel 52 | -------------------------------------------------------------------------------- /docs/css/extra.css: -------------------------------------------------------------------------------- 1 | h4 { 2 | font-size: 0.9rem !important; 3 | font-weight: 400 !important; 4 | margin-top: 1.2em !important; 5 | } 6 | 7 | h2, h3, h4 { 8 | color: #213744; 9 | } 10 | 11 | .alignleft { 12 | float: left; 13 | } 14 | 15 | .alignright { 16 | float: right; 17 | } 18 | 19 | a.sourcelink { 20 | color: #888; 21 | } 22 | -------------------------------------------------------------------------------- /docs/img/allennlp-logo-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/docs/img/allennlp-logo-dark.png -------------------------------------------------------------------------------- /docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/docs/img/favicon.ico -------------------------------------------------------------------------------- /mkdocs-skeleton.yml: -------------------------------------------------------------------------------- 1 | site_name: AllenNLP Models 2 | site_description: AllenNLP is a .. 3 | site_url: https://allennlp.org/ 4 | 5 | extra_css: 6 | - "css/extra.css" 7 | 8 | theme: 9 | name: material 10 | palette: 11 | primary: deep orange 12 | accent: grey 13 | logo: img/favicon.ico 14 | favicon: img/favicon.ico 15 | highlightjs: true 16 | hljs_languages: 17 | - python 18 | - typescript 19 | - json 20 | 21 | 22 | repo_name: allenai/allennlp-models 23 | 24 | nav: 25 | - Home: README.md 26 | - Repository: https://github.com/allenai/allennlp-models 27 | - Versions: 28 | - Latest: /models/latest/ 29 | - Stable: /models/stable/ 30 | - Commit: /models/main/ 31 | - Models: 'This section is autogenerated, do not edit.' 32 | - CHANGELOG: CHANGELOG.md 33 | - License: https://raw.githubusercontent.com/allenai/allennlp-models/main/LICENSE 34 | 35 | markdown_extensions: 36 | - toc: 37 | permalink: '#' 38 | - admonition 39 | - extra 40 | - pymdownx.highlight 41 | - pymdownx.superfences 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | 4 | include = '\.pyi?$' 5 | 6 | exclude = ''' 7 | ( 8 | __pycache__ 9 | | \bbuild\b 10 | | \.git 11 | | \.mypy_cache 12 | | \.pytest_cache 13 | | \.vscode 14 | | \.venv 15 | | \bdist\b 16 | | \bdoc\b 17 | ) 18 | ''' 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | allennlp[all] @ git+https://github.com/allenai/allennlp.git@main 2 | 3 | torch>=1.7.0 4 | 5 | # For structured prediction. 6 | conllu>=4.4.2 7 | 8 | # For RC models 9 | word2number>=1.1 10 | py-rouge==1.1 11 | nltk>=3.6.5 12 | 13 | # For CNN/DailyMail dataset reader 14 | ftfy 15 | 16 | # For sentiment tree bank 17 | datasets 18 | -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | py2md.py 2 | -------------------------------------------------------------------------------- /scripts/ensure_models_found.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Ensures models are automatically found by allennlp. 5 | """ 6 | import logging 7 | 8 | from allennlp.common.plugins import import_plugins 9 | from allennlp.models import Model 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | import_plugins() 14 | Model.by_name("copynet_seq2seq") 15 | -------------------------------------------------------------------------------- /scripts/ensure_versions_match.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Ensures allennlp and models versions are the same. 5 | """ 6 | 7 | from allennlp.version import VERSION as CORE_VERSION 8 | from allennlp_models.version import VERSION as MODELS_VERSION 9 | 10 | 11 | assert CORE_VERSION == MODELS_VERSION, f"core: {CORE_VERSION}, models: {MODELS_VERSION}" 12 | -------------------------------------------------------------------------------- /test_fixtures/bert-xsmall-dummy/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertModel" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 20, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 40, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 1, 15 | "num_hidden_layers": 1, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 250 19 | } 20 | -------------------------------------------------------------------------------- /test_fixtures/bert-xsmall-dummy/pytorch_model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/bert-xsmall-dummy/pytorch_model.bin -------------------------------------------------------------------------------- /test_fixtures/classification/sst.txt: -------------------------------------------------------------------------------- 1 | (4 (2 (2 The) (2 actors)) (3 (4 (2 are) (3 fantastic)) (2 .))) 2 | (0 (2 It) (0 (1 (2 was) (0 terrible)) (2 .))) 3 | (2 (2 Chomp) (2 (2 chomp) (2 !))) 4 | -------------------------------------------------------------------------------- /test_fixtures/coref/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/coref/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/coref/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/coref/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | . 3 | Hong 4 | Kong 5 | the 6 | of 7 | , 8 | to 9 | The 10 | a 11 | people 12 | have 13 | - 14 | will 15 | is 16 | resources 17 | In 18 | summer 19 | 2005 20 | picture 21 | that 22 | long 23 | been 24 | looking 25 | forward 26 | started 27 | emerging 28 | with 29 | frequency 30 | in 31 | various 32 | major 33 | media 34 | With 35 | their 36 | unique 37 | charm 38 | these 39 | well 40 | known 41 | cartoon 42 | images 43 | once 44 | again 45 | caused 46 | be 47 | focus 48 | worldwide 49 | attention 50 | world 51 | 's 52 | fifth 53 | Disney 54 | park 55 | soon 56 | open 57 | public 58 | here 59 | area 60 | only 61 | one 62 | thousand 63 | plus 64 | square 65 | kilometers 66 | population 67 | dense 68 | Natural 69 | are 70 | relatively 71 | scarce 72 | However 73 | clever 74 | utilize 75 | all 76 | they 77 | created 78 | for 79 | developing 80 | tourism 81 | industry 82 | -------------------------------------------------------------------------------- /test_fixtures/coref/winobias.sample: -------------------------------------------------------------------------------- 1 | The designer argued with [the developer] and slapped [her] in the face. 2 | [The salesperson] sold (some books) to the librarian because [she] was trying to sell (them). 3 | -------------------------------------------------------------------------------- /test_fixtures/generation/bart/data/cnn_stories/2bd8ada1de6a7b02f59430cc82045eb8d29cf033.story: -------------------------------------------------------------------------------- 1 | (CNN)For the first time in eight years, a TV legend returned to doing what he does best. 2 | 3 | Contestants told to "come on down!" on the April 1 edition of "The Price Is Right" encountered not host Drew Carey but another familiar face in charge of the proceedings. 4 | 5 | Instead, there was Bob Barker, who hosted the TV game show for 35 years before stepping down in 2007. 6 | 7 | Looking spry at 91, Barker handled the first price-guessing game of the show, the classic "Lucky Seven," before turning hosting duties over to Carey, who finished up. 8 | 9 | Despite being away from the show for most of the past eight years, Barker didn't seem to miss a beat. 10 | 11 | @highlight 12 | 13 | Bob Barker returned to host "The Price Is Right" on Wednesday 14 | 15 | @highlight 16 | 17 | Barker, 91, had retired as host in 2007 -------------------------------------------------------------------------------- /test_fixtures/generation/bart/data/url_lists/all_train.txt: -------------------------------------------------------------------------------- 1 | http://web.archive.org/web/20070716092219id_/http://us.cnn.com:80/2007/US/07/13/btsc.obrien.criminallyinsane/index.html 2 | http://web.archive.org/web/20070804173413id_/http://www.cnn.com:80/2007/SHOWBIZ/Movies/07/23/potter.radcliffe.reut/index.html?iref=newssearch 3 | http://web.archive.org/web/20070817151404id_/http://us.cnn.com:80/2007/US/08/02/bridge.survivors/index.html 4 | http://web.archive.org/web/20070827221123id_/http://www.cnn.com:80/2007/WORLD/meast/08/24/iraq.boyfolo/index.html?iref=topnews 5 | http://web.archive.org/web/20070830082937id_/http://www.cnn.com:80/2007/POLITICS/07/21/bush.colonoscopy/index.html?eref=rss_topstories 6 | http://web.archive.org/web/20070830193806id_/http://www.cnn.com:80/2007/US/law/08/24/michael.vick/index.html?eref=time_us 7 | http://web.archive.org/web/20070902195602id_/http://www.cnn.com:80/2007/WORLD/meast/08/15/iraq.prostitution/index.html?eref=ib_world 8 | http://web.archive.org/web/20070903175945id_/http://www.cnn.com:80/2007/POLITICS/08/31/white.house.snow/index.html?iref=topnews 9 | http://web.archive.org/web/20070903180821id_/http://www.cnn.com:80/2007/POLITICS/08/21/bush.iraq.speech/index.html?iref=topnews 10 | http://web.archive.org/web/20070905072114id_/http://www.cnn.com:80/2007/US/09/01/utah.mine/index.html?iref=mpstoryview 11 | -------------------------------------------------------------------------------- /test_fixtures/generation/bart/data/url_lists/all_val.txt: -------------------------------------------------------------------------------- 1 | http://web.archive.org/web/20150401100102id_/http://www.cnn.com/2015/04/01/europe/france-germanwings-plane-crash-main/ 2 | http://web.archive.org/web/20150401123500id_/http://www.cnn.com/2015/04/01/middleeast/palestinians-icc-membership/ 3 | http://web.archive.org/web/20150401232105id_/http://www.cnn.com/2015/03/31/world/amnesty-2014-death-penalty-report/ 4 | http://web.archive.org/web/20150402151325id_/http://edition.cnn.com/2015/04/01/opinions/shetty-end-executions/index.html 5 | http://web.archive.org/web/20150402214555id_/http://www.cnn.com/2015/04/01/europe/anne-frank-date-of-death/ 6 | http://web.archive.org/web/20150403063719id_/http://www.cnn.com/2015/04/02/us/north-carolina-duke-noose/index.html 7 | http://web.archive.org/web/20150403122220id_/http://www.cnn.com/2015/04/02/us/robert-schuller-death/ 8 | http://web.archive.org/web/20150403153913id_/http://edition.cnn.com/2015/04/03/living/buried-dog-survives-feat/index.html 9 | http://web.archive.org/web/20150403153946id_/http://edition.cnn.com/2015/04/03/middleeast/irans-foreign-minister-six-things-to-know/index.html 10 | http://web.archive.org/web/20150403170234id_/http://www.cnn.com/2015/04/01/entertainment/price-is-right-bob-barker-feat 11 | -------------------------------------------------------------------------------- /test_fixtures/generation/bart/experiment.jsonnet: -------------------------------------------------------------------------------- 1 | local model_name = "sshleifer/bart-tiny-random"; 2 | local data_base_url = "test_fixtures/generation/bart/data/"; 3 | 4 | { 5 | "train_data_path": data_base_url + "/url_lists/all_train.txt", 6 | "validation_data_path": data_base_url + "/url_lists/all_val.txt", 7 | "dataset_reader": { 8 | "type": "cnn_dm", 9 | "source_tokenizer": { 10 | "type": "pretrained_transformer", 11 | "model_name": model_name 12 | }, 13 | "source_token_indexers": { 14 | "tokens": { 15 | "type": "pretrained_transformer", 16 | "model_name": model_name, 17 | "namespace": "tokens" 18 | } 19 | }, 20 | "source_max_tokens": 1022, 21 | "target_max_tokens": 54, 22 | }, 23 | "model": { 24 | "type": "bart", 25 | "model_name": model_name, 26 | "beam_search": { 27 | "max_steps": 140, 28 | "beam_size": 4 29 | }, 30 | }, 31 | "data_loader": { 32 | "batch_size": 2, 33 | "shuffle": true 34 | }, 35 | "trainer": { 36 | "num_epochs": 1, 37 | "optimizer": { 38 | "type": "huggingface_adamw", 39 | "lr": 3e-5, 40 | "betas": [0.9, 0.999], 41 | "eps": 1e-8, 42 | "correct_bias": true 43 | }, 44 | "learning_rate_scheduler": { 45 | "type": "polynomial_decay", 46 | }, 47 | "grad_norm": 1.0, 48 | "run_confidence_checks": false 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/composed/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/composed/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/dependencies.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | NONE 3 | nsubj 4 | ROOT 5 | det 6 | attr 7 | predet 8 | aux 9 | acomp 10 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/ner.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | 3 | NONE 4 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/pos.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | NONE 3 | DET 4 | VERB 5 | NOUN 6 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/source_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | @start@ 3 | @end@ 4 | this 5 | is 6 | a 7 | sentence 8 | another 9 | all 10 | these 11 | sentences 12 | should 13 | get 14 | copied 15 | -------------------------------------------------------------------------------- /test_fixtures/generation/composed/serialization/vocabulary/target_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | @start@ 3 | @end@ 4 | this 5 | is 6 | a 7 | sentence 8 | another 9 | all 10 | these 11 | sentences 12 | should 13 | get 14 | copied 15 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/data/copyover.tsv: -------------------------------------------------------------------------------- 1 | these tokens should be copied over : hello world the tokens " hello world " were copied 2 | these should also be copied : copynet is cool the tokens " copynet is cool " were copied 3 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/data/source_vocab.txt: -------------------------------------------------------------------------------- 1 | these 0 2 | tokens 0 3 | should 0 4 | be 0 5 | copied 0 6 | over 0 7 | : 0 8 | hello 0 9 | word 0 10 | also 0 11 | copynet 0 12 | is 0 13 | cool 0 14 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/data/target_vocab.txt: -------------------------------------------------------------------------------- 1 | the 0 2 | tokens 0 3 | " 0 4 | were 0 5 | copied 0 6 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/copynet/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/copynet/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/serialization/vocabulary/source_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | these 3 | should 4 | be 5 | copied 6 | : 7 | tokens 8 | over 9 | hello 10 | also 11 | copynet 12 | is 13 | cool 14 | -------------------------------------------------------------------------------- /test_fixtures/generation/copynet/serialization/vocabulary/target_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | " 3 | the 4 | tokens 5 | were 6 | copied 7 | @COPY@ 8 | -------------------------------------------------------------------------------- /test_fixtures/generation/seq2seq_copy.csv: -------------------------------------------------------------------------------- 1 | "this is a sentence","this is a sentence" 2 | "this is another","this is another" 3 | "all these sentences should get copied","all these sentences should get copied" 4 | -------------------------------------------------------------------------------- /test_fixtures/generation/seq2seq_copy.tsv: -------------------------------------------------------------------------------- 1 | this is a sentence this is a sentence 2 | this is another this is another 3 | all these sentences should get copied all these sentences should get copied 4 | -------------------------------------------------------------------------------- /test_fixtures/generation/seq2seq_max_marginal_likelihood.tsv: -------------------------------------------------------------------------------- 1 | thisisasentence this is a sentence thisis a sentence 2 | thisisanother this is another this isanother this1sanother 3 | allthesesentencesshouldgetcopied all these sentences should get copied 4 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/simple/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/generation/simple/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/dependencies.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | NONE 3 | nsubj 4 | ROOT 5 | det 6 | attr 7 | predet 8 | aux 9 | acomp 10 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/ner.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | 3 | NONE 4 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/pos.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | NONE 3 | DET 4 | VERB 5 | NOUN 6 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/source_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | @start@ 3 | @end@ 4 | this 5 | is 6 | a 7 | sentence 8 | another 9 | all 10 | these 11 | sentences 12 | should 13 | get 14 | copied 15 | -------------------------------------------------------------------------------- /test_fixtures/generation/simple/serialization/vocabulary/target_tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | @start@ 3 | @end@ 4 | this 5 | is 6 | a 7 | sentence 8 | another 9 | all 10 | these 11 | sentences 12 | should 13 | get 14 | copied 15 | -------------------------------------------------------------------------------- /test_fixtures/generation/t5/experiment.jsonnet: -------------------------------------------------------------------------------- 1 | local model_name = "patrickvonplaten/t5-tiny-random"; 2 | local data_base_url = "test_fixtures/generation/bart/data/"; 3 | 4 | { 5 | "train_data_path": data_base_url + "/url_lists/all_train.txt", 6 | "validation_data_path": data_base_url + "/url_lists/all_val.txt", 7 | "dataset_reader": { 8 | "type": "cnn_dm", 9 | "source_tokenizer": { 10 | "type": "pretrained_transformer", 11 | "model_name": model_name 12 | }, 13 | "source_token_indexers": { 14 | "tokens": { 15 | "type": "pretrained_transformer", 16 | "model_name": model_name, 17 | "namespace": "tokens" 18 | } 19 | }, 20 | "source_max_tokens": 512, 21 | "target_max_tokens": 54, 22 | }, 23 | "model": { 24 | "type": "t5", 25 | "model_name": model_name 26 | }, 27 | "data_loader": { 28 | "batch_size": 2, 29 | "shuffle": true 30 | }, 31 | "trainer": { 32 | "num_epochs": 1, 33 | "optimizer": { 34 | "type": "huggingface_adamw", 35 | "lr": 3e-5, 36 | "betas": [0.9, 0.999], 37 | "eps": 1e-8, 38 | "correct_bias": true 39 | }, 40 | "learning_rate_scheduler": { 41 | "type": "polynomial_decay", 42 | }, 43 | "grad_norm": 1.0, 44 | "enable_default_callbacks": false 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /test_fixtures/glove.6B.100d.sample.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/glove.6B.100d.sample.txt.gz -------------------------------------------------------------------------------- /test_fixtures/glove.6B.300d.sample.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/glove.6B.300d.sample.txt.gz -------------------------------------------------------------------------------- /test_fixtures/lm/bidirectional_language_model/training_data/sentences1.txt: -------------------------------------------------------------------------------- 1 | I run the unit test thrice. 2 | I run the unit test four times. 3 | -------------------------------------------------------------------------------- /test_fixtures/lm/bidirectional_language_model/training_data/sentences2.txt: -------------------------------------------------------------------------------- 1 | I run the unit test once. 2 | I run the unit test twice. 3 | -------------------------------------------------------------------------------- /test_fixtures/lm/bidirectional_language_model/vocab/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels\n*tags 2 | -------------------------------------------------------------------------------- /test_fixtures/lm/conll2003.txt: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- O 2 | 3 | U.N. NNP I-NP I-ORG 4 | official NN I-NP O 5 | Ekeus NNP I-NP I-PER 6 | heads VBZ I-VP O 7 | for IN I-PP O 8 | Baghdad NNP I-NP I-LOC 9 | . . O O 10 | 11 | -DOCSTART- -X- -X- O 12 | 13 | AI2 NNP I-NP I-ORG 14 | engineer NN I-NP O 15 | Joel NNP I-NP I-PER 16 | lives VBZ I-VP O 17 | in IN I-PP O 18 | Seattle NNP I-NP I-LOC 19 | . . O O 20 | -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/elmo_token_embeddings.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/elmo/elmo_token_embeddings.hdf5 -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/lm_embeddings_0.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/elmo/lm_embeddings_0.hdf5 -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/lm_embeddings_1.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/elmo/lm_embeddings_1.hdf5 -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/lm_embeddings_2.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/elmo/lm_embeddings_2.hdf5 -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/lm_weights.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/elmo/lm_weights.hdf5 -------------------------------------------------------------------------------- /test_fixtures/lm/elmo/options.json: -------------------------------------------------------------------------------- 1 | { 2 | "lstm": { 3 | "cell_clip": 3, 4 | "use_skip_connections": true, 5 | "n_layers": 2, 6 | "proj_clip": 3, 7 | "projection_dim": 16, 8 | "dim": 64 9 | }, 10 | "char_cnn": { 11 | "embedding": { 12 | "dim": 4 13 | }, 14 | "filters": [ 15 | [1, 4], 16 | [2, 8], 17 | [3, 16], 18 | [4, 32], 19 | [5, 64] 20 | ], 21 | "n_highway": 2, 22 | "n_characters": 262, 23 | "max_characters_per_token": 50, 24 | "activation": "relu" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/bidirectional_lm_characters_token_embedder.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "characters_token_embedder.json"; 2 | 3 | config + { 4 | "model"+: { 5 | "text_field_embedder"+: { 6 | "token_embedders"+: { 7 | "elmo"+: { 8 | "type": "bidirectional_lm_token_embedder", 9 | } 10 | } 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/bidirectional_lm_characters_token_embedder_without_bos_eos.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "bidirectional_lm_characters_token_embedder.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "text_field_embedder"+: { 6 | "token_embedders"+: { 7 | "elmo"+: { 8 | "bos_eos_tokens": null, 9 | "remove_bos_eos": false 10 | } 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/characters_token_embedder_without_bos_eos.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "characters_token_embedder.json"; 2 | 3 | config + { 4 | "model"+: { 5 | "text_field_embedder"+: { 6 | "token_embedders"+: { 7 | "elmo"+: { 8 | "bos_eos_tokens": null, 9 | "remove_bos_eos": false 10 | } 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "num_samples": 10, 6 | "sparse_embeddings": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_bidirectional.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_bidirectional_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "num_samples": 10, 6 | "sparse_embeddings": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_bidirectional_unsampled.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "type": "bidirectional-language-model", 6 | // Hide the bidirectional field, since the bidirectional_language_model 7 | // does not accept it. 8 | bidirectional:: super.bidirectional, 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_transformer.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "num_samples": 10, 6 | "sparse_embeddings": true, 7 | "contextualizer": { 8 | "type": "bidirectional_language_model_transformer", 9 | "input_dim": 16, 10 | "hidden_dim": 7, 11 | "num_layers": 3, 12 | "dropout": 0.1, 13 | "input_dropout": 0.1 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_unidirectional.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unidirectional_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "num_samples": 10, 6 | "sparse_embeddings": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_unidirectional_transformer.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unidirectional_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "num_samples": 10, 6 | "sparse_embeddings": true, 7 | "contextualizer": { 8 | "type": "pytorch_transformer", 9 | "input_dim": 16, 10 | "feedforward_hidden_dim": 20, 11 | "num_attention_heads": 4, 12 | "num_layers": 3, 13 | "dropout_prob": 0.1, 14 | "positional_encoding": "sinusoidal" 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/experiment_unidirectional_unsampled.jsonnet: -------------------------------------------------------------------------------- 1 | local config = import "experiment_unsampled.jsonnet"; 2 | 3 | config + { 4 | "model"+: { 5 | "bidirectional": false, 6 | "contextualizer" +: { 7 | "bidirectional": false 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/language_model/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/lm/language_model/sentences.txt: -------------------------------------------------------------------------------- 1 | This is the first sentence. 2 | This is yet another sentence. 3 | -------------------------------------------------------------------------------- /test_fixtures/lm/language_modeling/single_sentence.txt: -------------------------------------------------------------------------------- 1 | The U.S. Centers for Disease Control and Prevention initially advised school systems to close if outbreaks occurred , then reversed itself , saying the apparent mildness of the virus meant most schools and day care centers should stay open , even if they had confirmed cases of swine flu . 2 | -------------------------------------------------------------------------------- /test_fixtures/lm/masked_language_model/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "masked_language_modeling" 4 | }, 5 | "train_data_path": "test_fixtures/lm/language_model/sentences.txt", 6 | "validation_data_path": "test_fixtures/lm/language_model/sentences.txt", 7 | "model": { 8 | "type": "masked_language_model", 9 | "text_field_embedder": { 10 | "token_embedders": { 11 | "tokens": { 12 | "type": "embedding", 13 | "embedding_dim": 4 14 | } 15 | } 16 | }, 17 | "language_model_head": { 18 | "type": "linear", 19 | "input_dim": 4, 20 | "vocab_namespace": "tokens" 21 | }, 22 | "target_namespace": "tokens" 23 | }, 24 | "data_loader": { 25 | "batch_size": 32, 26 | }, 27 | "trainer": { 28 | "num_epochs": 1, 29 | "cuda_device" : -1, 30 | "optimizer": { 31 | "type": "sgd", 32 | "lr": 0.01 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /test_fixtures/lm/masked_language_model/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/masked_language_model/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/lm/masked_language_model/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/masked_language_model/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/lm/masked_language_model/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/lm/masked_language_model/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | [MASK] 3 | is 4 | sentence. 5 | This 6 | the 7 | first 8 | yet 9 | another 10 | -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "next_token_lm" 4 | }, 5 | "train_data_path": "test_fixtures/lm/language_model/sentences.txt", 6 | "validation_data_path": "test_fixtures/lm/language_model/sentences.txt", 7 | "model": { 8 | "type": "next_token_lm", 9 | "target_namespace": "tokens", 10 | "text_field_embedder": { 11 | "token_embedders": { 12 | "tokens": { 13 | "type": "embedding", 14 | "embedding_dim": 4 15 | } 16 | } 17 | }, 18 | "language_model_head": { 19 | "type": "linear", 20 | "input_dim": 4, 21 | "vocab_namespace": "tokens" 22 | } 23 | }, 24 | "data_loader": { 25 | "batch_size": 32 26 | }, 27 | "trainer": { 28 | "num_epochs": 1, 29 | "cuda_device" : -1, 30 | "optimizer": { 31 | "type": "sgd", 32 | "lr": 0.01 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/experiment_transformer.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "next_token_lm", 4 | "tokenizer": { 5 | "type": "pretrained_transformer", 6 | "model_name": "test_fixtures/bert-xsmall-dummy" 7 | }, 8 | "token_indexers": { 9 | "tokens": { 10 | "type": "pretrained_transformer", 11 | "model_name": "test_fixtures/bert-xsmall-dummy" 12 | } 13 | } 14 | }, 15 | "train_data_path": "test_fixtures/lm/language_model/sentences.txt", 16 | "validation_data_path": "test_fixtures/lm/language_model/sentences.txt", 17 | "model": { 18 | "type": "next_token_lm", 19 | "target_namespace": "tokens", 20 | "text_field_embedder": { 21 | "token_embedders": { 22 | "tokens": { 23 | "type": "pretrained_transformer", 24 | "model_name": "test_fixtures/bert-xsmall-dummy" 25 | } 26 | } 27 | }, 28 | "language_model_head": { 29 | "type": "bert", 30 | "model_name": "test_fixtures/bert-xsmall-dummy" 31 | }, 32 | "beam_search_generator": { 33 | "type": "transformer", 34 | "beam_search": { 35 | "end_index": 3, 36 | "beam_size": 2, 37 | "max_steps": 5, 38 | } 39 | } 40 | }, 41 | "data_loader": { 42 | "batch_size": 32 43 | }, 44 | "trainer": { 45 | "num_epochs": 1, 46 | "cuda_device" : -1, 47 | "optimizer": { 48 | "type": "sgd", 49 | "lr": 0.01 50 | }, 51 | "run_confidence_checks": false 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/next_token_lm/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/lm/next_token_lm/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/lm/next_token_lm/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | the 3 | This 4 | is 5 | sentence. 6 | first 7 | yet 8 | another 9 | -------------------------------------------------------------------------------- /test_fixtures/mc/piqa-labels.lst: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 1 4 | 1 5 | 0 6 | 1 7 | 1 8 | 0 9 | 0 10 | 0 11 | -------------------------------------------------------------------------------- /test_fixtures/mc/transformer_mc/experiment.jsonnet: -------------------------------------------------------------------------------- 1 | local transformer_model = "bert-base-uncased"; 2 | 3 | local epochs = 2; 4 | local batch_size = 3; 5 | 6 | { 7 | "dataset_reader": { 8 | "type": "piqa", 9 | "transformer_model_name": transformer_model, 10 | }, 11 | "train_data_path": "test_fixtures/mc/piqa.jsonl", 12 | "validation_data_path": "test_fixtures/mc/piqa.jsonl", 13 | "model": { 14 | "type": "transformer_mc", 15 | "transformer_model": transformer_model 16 | }, 17 | "data_loader": { 18 | "batch_size": batch_size 19 | }, 20 | "trainer": { 21 | "optimizer": { 22 | "type": "huggingface_adamw", 23 | "weight_decay": 0.01, 24 | "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], 25 | "lr": 1e-5, 26 | "eps": 1e-8, 27 | "correct_bias": true 28 | }, 29 | "learning_rate_scheduler": { 30 | "type": "linear_with_warmup", 31 | "warmup_steps": 100 32 | }, 33 | // "grad_norm": 1.0, 34 | "num_epochs": epochs, 35 | }, 36 | "random_seed": 42, 37 | "numpy_seed": 42, 38 | "pytorch_seed": 42, 39 | } 40 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/parameters_inspection.json: -------------------------------------------------------------------------------- 1 | { 2 | "_aggregate_feedforward": { 3 | "_linear_layers": { 4 | "0": { 5 | "bias": "tunable", 6 | "weight": "tunable" 7 | } 8 | } 9 | }, 10 | "_attend_feedforward": { 11 | "_module": { 12 | "_linear_layers": { 13 | "0": { 14 | "bias": "tunable", 15 | "weight": "tunable" 16 | } 17 | } 18 | } 19 | }, 20 | "_compare_feedforward": { 21 | "_module": { 22 | "_linear_layers": { 23 | "0": { 24 | "bias": "tunable", 25 | "weight": "tunable" 26 | } 27 | } 28 | } 29 | }, 30 | "_text_field_embedder": { 31 | "token_embedder_tokens": { 32 | "_projection": { 33 | "bias": "tunable", 34 | "weight": "tunable" 35 | }, 36 | "weight": "frozen" 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/pair_classification/decomposable_attention/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/pair_classification/decomposable_attention/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/serialization/vocabulary/labels.txt: -------------------------------------------------------------------------------- 1 | neutral 2 | contradiction 3 | entailment 4 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/decomposable_attention/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | a 3 | person 4 | . 5 | horse 6 | on 7 | jumps 8 | over 9 | broken 10 | down 11 | airplane 12 | is 13 | , 14 | training 15 | his 16 | for 17 | competition 18 | at 19 | diner 20 | ordering 21 | an 22 | omelette 23 | outdoors 24 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/esim/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/pair_classification/esim/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/pair_classification/esim/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/pair_classification/esim/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/pair_classification/esim/serialization/vocabulary/labels.txt: -------------------------------------------------------------------------------- 1 | neutral 2 | contradiction 3 | entailment 4 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/esim/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/esim/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | a 3 | person 4 | . 5 | horse 6 | on 7 | jumps 8 | over 9 | broken 10 | down 11 | airplane 12 | is 13 | , 14 | training 15 | his 16 | for 17 | competition 18 | at 19 | diner 20 | ordering 21 | an 22 | omelette 23 | outdoors 24 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/quora_paraphrase.tsv: -------------------------------------------------------------------------------- 1 | 1 What should I do to avoid sleeping in class ? How do I not sleep in a boring class ? 50018 2 | 0 Do women support each other more than men do ? Do women need more compliments than men ? 126924 3 | 1 How can one root android devices ? How do I root an Android device ? 391187 4 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/snli2.jsonl: -------------------------------------------------------------------------------- 1 | {"annotator_labels": ["neutral"],"captionID": "3416050480.jpg#4", "gold_label": "neutral", "pairID": "3416050480.jpg#4r1n", "sentence1": "A person on a horse jumps over a broken down airplane.", "sentence1_binary_parse": "( ( ( A person ) ( on ( a seahorse ) ) ) ( ( jumps ( over ( a ( broken ( down airplane ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN on) (NP (DT a) (NN seahorse)))) (VP (VBZ jumps) (PP (IN over) (NP (DT a) (JJ broken) (JJ down) (NN airplane)))) (. .)))", "sentence2": "A person is training his seahorse for a competition.", "sentence2_binary_parse": "( ( A person ) ( ( is ( ( training ( his horse ) ) ( for ( a competition ) ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) (VP (VBG training) (NP (PRP$ his) (NN seahorse)) (PP (IN for) (NP (DT a) (NN competition))))) (. .)))"} 2 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/snli_vocab/labels.txt: -------------------------------------------------------------------------------- 1 | neutral 2 | contradiction 3 | entailment 4 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/snli_vocab/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *tags 2 | *labels 3 | -------------------------------------------------------------------------------- /test_fixtures/pair_classification/snli_vocab/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | a 3 | person 4 | . 5 | horse 6 | on 7 | jumps 8 | over 9 | broken 10 | down 11 | airplane 12 | is 13 | , 14 | training 15 | his 16 | for 17 | competition 18 | at 19 | diner 20 | ordering 21 | an 22 | omelette 23 | outdoors 24 | -------------------------------------------------------------------------------- /test_fixtures/rc/bidaf/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/bidaf/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/rc/bidaf/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/bidaf/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/rc/bidaf/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/dialog_qa/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/dialog_qa/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/vocabulary/answer_tags.txt: -------------------------------------------------------------------------------- 1 | O 2 | <1_in> 3 | <2_in> 4 | <1_start> 5 | <1_end> 6 | <2_start> 7 | <2_end> 8 | -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/vocabulary/followup_labels.txt: -------------------------------------------------------------------------------- 1 | m 2 | y 3 | n 4 | -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *tags 2 | *labels 3 | -------------------------------------------------------------------------------- /test_fixtures/rc/dialog_qa/serialization/vocabulary/yesno_labels.txt: -------------------------------------------------------------------------------- 1 | x 2 | y 3 | -------------------------------------------------------------------------------- /test_fixtures/rc/naqanet/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/naqanet/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/rc/naqanet/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/naqanet/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/rc/naqanet/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/rc/naqanet/serialization/vocabulary/token_characters.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | e 3 | t 4 | a 5 | r 6 | i 7 | n 8 | o 9 | s 10 | h 11 | d 12 | l 13 | g 14 | f 15 | u 16 | c 17 | y 18 | m 19 | w 20 | p 21 | k 22 | C 23 | . 24 | , 25 | - 26 | D 27 | T 28 | v 29 | q 30 | 1 31 | A 32 | W 33 | b 34 | 2 35 | R 36 | I 37 | S 38 | 3 39 | 8 40 | B 41 | K 42 | J 43 | N 44 | 0 45 | P 46 | Q 47 | H 48 | F 49 | G 50 | 4 51 | 6 52 | 5 53 | E 54 | ? 55 | V 56 | L 57 | x 58 | M 59 | 7 60 | ' 61 | — 62 | z 63 | U 64 | j 65 | 9 66 | : 67 | -------------------------------------------------------------------------------- /test_fixtures/rc/orb_sample_predictions.json: -------------------------------------------------------------------------------- 1 | { 2 | "3109101566": [ 3 | "village a from ten thousand years ago to village b" 4 | ], 5 | "3656065124": [ 6 | "village b" 7 | ], 8 | "f37e81fa-ef7b-4583-b671-762fc433faa9": [ 9 | "chaz schilens" 10 | ], 11 | "ac6ba235-3024-4f63-a6ab-730a14def4cb": [ 12 | "3" 13 | ], 14 | "0a92ef8a-1780-4379-8ab8-6f2939fce562": [ 15 | "80" 16 | ], 17 | "56dde0379a695914005b9637": [ 18 | "9th century" 19 | ], 20 | "56dde0379a695914005b9636": [ 21 | "norseman , viking" 22 | ], 23 | "dd938bbe-e16f-11e9-b0fa-107b449211b9": [ 24 | "his brother is being selfish" 25 | ], 26 | "dd938bbf-e16f-11e9-b0fa-107b449211b9": [ 27 | "breaks his hand" 28 | ] 29 | } -------------------------------------------------------------------------------- /test_fixtures/rc/qanet/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/qanet/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/rc/qanet/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/qanet/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/rc/qanet/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *tags 2 | *labels 3 | -------------------------------------------------------------------------------- /test_fixtures/rc/qanet/serialization/vocabulary/token_characters.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | e 3 | t 4 | a 5 | r 6 | i 7 | n 8 | o 9 | s 10 | h 11 | l 12 | d 13 | c 14 | u 15 | m 16 | p 17 | f 18 | g 19 | y 20 | w 21 | b 22 | . 23 | , 24 | A 25 | M 26 | 1 27 | I 28 | 8 29 | J 30 | N 31 | 9 32 | B 33 | S 34 | 3 35 | Z 36 | V 37 | G 38 | v 39 | F 40 | D 41 | ' 42 | ( 43 | ) 44 | ? 45 | C 46 | " 47 | k 48 | L 49 | 5 50 | W 51 | 2 52 | P 53 | 6 54 | O 55 | x 56 | H 57 | T 58 | j 59 | -------------------------------------------------------------------------------- /test_fixtures/rc/quoref_sample_predictions.json: -------------------------------------------------------------------------------- 1 | { 2 | "ba3f052c7a557909526b59713430403dd134e01d": 3 | [ 4 | "Catherine" 5 | ], 6 | "335654892c66647dd8531140c9bcd28e3f7500ec": 7 | [ 8 | "Heseltine" 9 | ], 10 | "2142d85e9eacd549bc6164583d14407383d15692": 11 | [ 12 | "Delius", 13 | "Heseltine" 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /test_fixtures/rc/superglue_rte.jsonl: -------------------------------------------------------------------------------- 1 | {"premise": "No Weapons of Mass Destruction Found in Iraq Yet.", "hypothesis": "Weapons of Mass Destruction Found in Iraq.", "label": "not_entailment", "idx": 0} 2 | {"premise": "A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI.", "hypothesis": "Pope Benedict XVI is the new leader of the Roman Catholic Church.", "label": "entailment", "idx": 1} 3 | {"premise": "Herceptin was already approved to treat the sickest breast cancer patients, and the company said, Monday, it will discuss with federal regulators the possibility of prescribing the drug for more breast cancer patients.", "hypothesis": "Herceptin can be used to treat breast cancer.", "label": "entailment", "idx": 2} 4 | {"premise": "Judie Vivian, chief executive at ProMedica, a medical service company that helps sustain the 2-year-old Vietnam Heart Institute in Ho Chi Minh City (formerly Saigon), said that so far about 1,500 children have received treatment.", "hypothesis": "The previous name of Ho Chi Minh City was Saigon.", "label": "entailment", "idx": 3} 5 | -------------------------------------------------------------------------------- /test_fixtures/rc/superglue_rte_no_labels.jsonl: -------------------------------------------------------------------------------- 1 | {"premise": "No Weapons of Mass Destruction Found in Iraq Yet.", "hypothesis": "Weapons of Mass Destruction Found in Iraq.", "idx": 0} 2 | {"premise": "A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI.", "hypothesis": "Pope Benedict XVI is the new leader of the Roman Catholic Church.", "idx": 1} 3 | {"premise": "Herceptin was already approved to treat the sickest breast cancer patients, and the company said, Monday, it will discuss with federal regulators the possibility of prescribing the drug for more breast cancer patients.", "hypothesis": "Herceptin can be used to treat breast cancer.", "idx": 2} 4 | {"premise": "Judie Vivian, chief executive at ProMedica, a medical service company that helps sustain the 2-year-old Vietnam Heart Institute in Ho Chi Minh City (formerly Saigon), said that so far about 1,500 children have received treatment.", "hypothesis": "The previous name of Ho Chi Minh City was Saigon.", "idx": 3} 5 | -------------------------------------------------------------------------------- /test_fixtures/rc/transformer_qa/experiment_v2.jsonnet: -------------------------------------------------------------------------------- 1 | local transformer_model = "test_fixtures/bert-xsmall-dummy"; 2 | local epochs = 2; 3 | local batch_size = 3; 4 | 5 | { 6 | "dataset_reader": { 7 | "type": "transformer_squad", 8 | "transformer_model_name": transformer_model, 9 | }, 10 | "train_data_path": "test_fixtures/rc/squad2.json", 11 | "validation_data_path": "test_fixtures/rc/squad2.json", 12 | "model": { 13 | "type": "transformer_qa", 14 | "transformer_model_name": transformer_model, 15 | }, 16 | "data_loader": { 17 | "batch_size": batch_size 18 | }, 19 | "trainer": { 20 | "optimizer": { 21 | "type": "huggingface_adamw", 22 | "weight_decay": 0.0, 23 | "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], 24 | "lr": 5e-5, 25 | "eps": 1e-8 26 | }, 27 | "grad_clipping": 1.0, 28 | "num_epochs": epochs, 29 | "cuda_device": -1 30 | }, 31 | } 32 | -------------------------------------------------------------------------------- /test_fixtures/rc/triviaqa-sample.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/rc/triviaqa-sample.tgz -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader":{ 3 | "type":"universal_dependencies" 4 | }, 5 | "train_data_path": "test_fixtures/structured_prediction/dependencies.conllu", 6 | "validation_data_path": "test_fixtures/structured_prediction/dependencies.conllu", 7 | "model": { 8 | "type": "biaffine_parser", 9 | "text_field_embedder": { 10 | "token_embedders": { 11 | "tokens": { 12 | "type": "embedding", 13 | "embedding_dim": 2, 14 | "trainable": true 15 | } 16 | } 17 | }, 18 | "encoder": { 19 | "type": "lstm", 20 | "input_size": 2, 21 | "hidden_size": 4, 22 | "num_layers": 1 23 | }, 24 | "arc_representation_dim": 3, 25 | "tag_representation_dim": 3 26 | }, 27 | 28 | "data_loader": { 29 | "batch_sampler": { 30 | "type": "bucket", 31 | "batch_size": 5, 32 | "padding_noise": 0.0 33 | } 34 | }, 35 | "trainer": { 36 | "num_epochs": 1, 37 | "grad_norm": 1.0, 38 | "patience": 500, 39 | "cuda_device": -1, 40 | "optimizer": { 41 | "type": "adadelta", 42 | "lr": 0.000001, 43 | "rho": 0.95 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/vocabulary/head_tags.txt: -------------------------------------------------------------------------------- 1 | punct 2 | case 3 | root 4 | obl 5 | compound 6 | advmod 7 | mark 8 | nsubj 9 | advcl 10 | cc 11 | conj 12 | flat 13 | nmod:poss 14 | det 15 | amod 16 | nmod 17 | nummod 18 | nsubj:pass 19 | aux:pass 20 | xcomp 21 | orphan 22 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/vocabulary/pos.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | PROPN 3 | PUNCT 4 | ADP 5 | NOUN 6 | ADV 7 | PRON 8 | VERB 9 | ADJ 10 | SCONJ 11 | CCONJ 12 | NUM 13 | DET 14 | AUX 15 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/biaffine_dependency_parser/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | What 3 | if 4 | Google 5 | ? 6 | - 7 | and 8 | Morphed 9 | Into 10 | GoogleOS 11 | expanded 12 | on 13 | its 14 | search 15 | engine 16 | ( 17 | now 18 | e-mail 19 | ) 20 | wares 21 | into 22 | a 23 | full 24 | fledged 25 | operating 26 | system 27 | [ 28 | via 29 | Microsoft 30 | Watch 31 | from 32 | Mary 33 | Jo 34 | Foley 35 | ] 36 | Over 37 | 300 38 | Iraqis 39 | are 40 | reported 41 | dead 42 | 500 43 | wounded 44 | in 45 | Fallujah 46 | alone 47 | . 48 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader":{ 3 | "type":"ptb_trees", 4 | "use_pos_tags": false 5 | }, 6 | "train_data_path": "test_fixtures/structured_prediction/example_ptb.trees", 7 | "validation_data_path": "test_fixtures/structured_prediction/example_ptb.trees", 8 | "model": { 9 | "type": "constituency_parser", 10 | "text_field_embedder": { 11 | "token_embedders": { 12 | "tokens": { 13 | "type": "embedding", 14 | "embedding_dim": 2, 15 | "trainable": true 16 | } 17 | } 18 | }, 19 | "encoder": { 20 | "type": "lstm", 21 | "input_size": 2, 22 | "hidden_size": 4, 23 | "num_layers": 1 24 | }, 25 | "feedforward": { 26 | "input_dim": 8, 27 | "num_layers": 1, 28 | "hidden_dims": 4, 29 | "activations": "relu" 30 | }, 31 | "span_extractor": { 32 | "type": "endpoint", 33 | "input_dim": 4 34 | } 35 | }, 36 | 37 | "data_loader": { 38 | "batch_sampler": { 39 | "type": "bucket", 40 | "batch_size": 5, 41 | "padding_noise": 0.0 42 | } 43 | }, 44 | "trainer": { 45 | "num_epochs": 1, 46 | "grad_norm": 1.0, 47 | "patience": 500, 48 | "cuda_device": -1, 49 | "optimizer": { 50 | "type": "adadelta", 51 | "lr": 0.000001, 52 | "rho": 0.95 53 | } 54 | } 55 | } 56 | 57 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/constituency_parser/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/constituency_parser/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/serialization/vocabulary/labels.txt: -------------------------------------------------------------------------------- 1 | NO-LABEL 2 | NP 3 | VP 4 | S 5 | S-VP 6 | ADVP 7 | SBAR 8 | PP 9 | ADJP 10 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/constituency_parser/serialization/vocabulary/tokens.txt: -------------------------------------------------------------------------------- 1 | @@UNKNOWN@@ 2 | the 3 | to 4 | , 5 | UAL 6 | and 7 | other 8 | be 9 | him 10 | . 11 | Also 12 | because 13 | Chairman 14 | Stephen 15 | Wolf 16 | executives 17 | have 18 | joined 19 | pilots 20 | ' 21 | bid 22 | board 23 | might 24 | forced 25 | exclude 26 | from 27 | its 28 | deliberations 29 | in 30 | order 31 | fair 32 | bidders 33 | That 34 | could 35 | cost 36 | chance 37 | influence 38 | outcome 39 | perhaps 40 | join 41 | winning 42 | bidder 43 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/example_ptb.trees: -------------------------------------------------------------------------------- 1 | (VROOT(S(ADVP(RB Also))(, ,)(SBAR-PRP(IN because)(S(NP-SBJ(NP(NNP UAL)(NNP Chairman)(NNP Stephen)(NNP Wolf))(CC and)(NP(JJ other)(NNP UAL)(NNS executives)))(VP(VBP have)(VP(VBN joined)(NP(NP(DT the)(NNS pilots)(POS '))(NN bid))))))(, ,)(NP-SBJ(DT the)(NN board))(VP(MD might)(VP(VB be)(VP(VBN forced)(S(VP(TO to)(VP(VB exclude)(NP(PRP him))(PP-CLR(IN from)(NP(PRP$ its)(NNS deliberations)))(SBAR-PRP(IN in)(NN order)(S(VP(TO to)(VP(VB be)(ADJP-PRD(JJ fair)(PP(TO to)(NP(JJ other)(NNS bidders))))))))))))))(. .))) 2 | (VROOT(S(NP-SBJ(DT That))(VP(MD could)(VP(VB cost)(NP(PRP him))(NP(DT the)(NN chance)(S(VP(TO to)(VP(VP(VB influence)(NP(DT the)(NN outcome)))(CC and)(VP(ADVP(RB perhaps))(VB join)(NP(DT the)(VBG winning)(NN bidder)))))))))(. .))) 3 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/semantic_dependencies/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader":{ 3 | "type":"semantic_dependencies" 4 | }, 5 | "train_data_path": "test_fixtures/structured_prediction/semantic_dependencies/dm.sdp", 6 | "validation_data_path": "test_fixtures/structured_prediction/semantic_dependencies/dm.sdp", 7 | "model": { 8 | "type": "graph_parser", 9 | "text_field_embedder": { 10 | "token_embedders": { 11 | "tokens": { 12 | "type": "embedding", 13 | "embedding_dim": 2, 14 | "trainable": true 15 | } 16 | } 17 | }, 18 | "encoder": { 19 | "type": "lstm", 20 | "input_size": 2, 21 | "hidden_size": 4, 22 | "num_layers": 1 23 | }, 24 | "arc_representation_dim": 3, 25 | "tag_representation_dim": 3 26 | }, 27 | 28 | "data_loader": { 29 | "batch_sampler": { 30 | "type": "bucket", 31 | "batch_size": 5, 32 | "padding_noise": 0.0 33 | } 34 | }, 35 | "trainer": { 36 | "num_epochs": 1, 37 | "grad_norm": 1.0, 38 | "patience": 500, 39 | "cuda_device": -1, 40 | "optimizer": { 41 | "type": "adadelta", 42 | "lr": 0.000001, 43 | "rho": 0.95 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/bert/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "vocab_size": 18, 3 | "hidden_size": 12, 4 | "num_hidden_layers": 2, 5 | "num_attention_heads": 3, 6 | "intermediate_size": 6, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "attention_probs_dropout_prob": 0.1, 10 | "max_position_embeddings": 64, 11 | "type_vocab_size": 2, 12 | "initializer_range": 0.02 13 | } 14 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/bert/vocab.txt: -------------------------------------------------------------------------------- 1 | [PAD] 2 | [UNK] 3 | the 4 | quick 5 | ##est 6 | brown 7 | fox 8 | ##iest 9 | jumped 10 | over 11 | ##zie 12 | ##st 13 | dog 14 | . 15 | lazy 16 | la 17 | [CLS] 18 | [SEP] 19 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/bert_srl.jsonnet: -------------------------------------------------------------------------------- 1 | local bert_model = "epwalsh/bert-xsmall-dummy"; 2 | 3 | { 4 | "dataset_reader":{ 5 | "type":"srl", 6 | "bert_model_name": bert_model, 7 | }, 8 | "train_data_path": "test_fixtures/structured_prediction/srl", 9 | "validation_data_path": "test_fixtures/structured_prediction/srl", 10 | "model": { 11 | "type": "srl_bert", 12 | "bert_model": bert_model, 13 | "embedding_dropout": 0.0 14 | }, 15 | "data_loader": { 16 | "batch_sampler": { 17 | "type": "bucket", 18 | "batch_size": 5, 19 | "padding_noise": 0.0 20 | } 21 | }, 22 | "trainer": { 23 | "optimizer": { 24 | "type": "adam", 25 | "lr": 0.001 26 | }, 27 | "checkpointer": { 28 | "keep_most_recent_by_count": 1 29 | }, 30 | "num_epochs": 3, 31 | "grad_norm": 10.0, 32 | "patience": 5, 33 | "cuda_device": -1 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader":{"type":"srl"}, 3 | "train_data_path": "test_fixtures/structured_prediction/srl", 4 | "validation_data_path": "test_fixtures/structured_prediction/srl", 5 | "model": { 6 | "type": "srl", 7 | "text_field_embedder": { 8 | "token_embedders": { 9 | "tokens": { 10 | "type": "embedding", 11 | "pretrained_file": "test_fixtures/glove.6B.100d.sample.txt.gz", 12 | "embedding_dim": 100, 13 | "trainable": true 14 | } 15 | } 16 | }, 17 | "encoder": { 18 | "type": "lstm", 19 | "input_size": 150, 20 | "hidden_size": 10, 21 | "num_layers": 1 22 | }, 23 | "binary_feature_dim": 50 24 | }, 25 | "data_loader": { 26 | "batch_sampler": { 27 | "type": "bucket", 28 | "batch_size": 80, 29 | "padding_noise": 0.0 30 | } 31 | }, 32 | 33 | "trainer": { 34 | "num_epochs": 1, 35 | "grad_norm": 1.0, 36 | "patience": 500, 37 | "cuda_device": -1, 38 | "optimizer": { 39 | "type": "adadelta", 40 | "lr": 0.000001, 41 | "rho": 0.9 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/serialization/best.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/srl/serialization/best.th -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/serialization/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/structured_prediction/srl/serialization/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/serialization/vocabulary/labels.txt: -------------------------------------------------------------------------------- 1 | O 2 | I-ARG1 3 | B-V 4 | B-ARG1 5 | B-ARG0 6 | I-ARG2 7 | B-ARG2 8 | I-ARGM-TMP 9 | B-ARGM-TMP 10 | B-ARGM-DIS 11 | I-ARG0 12 | B-ARGM-NEG 13 | B-ARGM-MOD 14 | I-ARGM-MNR 15 | I-ARGM-DIS 16 | B-ARGM-MNR 17 | B-ARGM-ADV 18 | I-ARGM-PRP 19 | I-ARGM-ADV 20 | B-ARG3 21 | I-ARG3 22 | B-ARGM-GOL 23 | I-ARGM-GOL 24 | B-R-ARG2 25 | B-ARGM-PRP 26 | B-R-ARG1 27 | B-R-ARG0 28 | -------------------------------------------------------------------------------- /test_fixtures/structured_prediction/srl/serialization/vocabulary/non_padded_namespaces.txt: -------------------------------------------------------------------------------- 1 | *labels 2 | *tags 3 | -------------------------------------------------------------------------------- /test_fixtures/tagging/conll2000.txt: -------------------------------------------------------------------------------- 1 | Confidence NN B-NP 2 | in IN B-PP 3 | the DT B-NP 4 | pound NN I-NP 5 | is VBZ B-VP 6 | widely RB I-VP 7 | expected VBN I-VP 8 | to TO I-VP 9 | take VB I-VP 10 | another DT B-NP 11 | sharp JJ I-NP 12 | dive NN I-NP 13 | if IN B-SBAR 14 | trade NN B-NP 15 | figures NNS I-NP 16 | for IN B-PP 17 | September NNP B-NP 18 | , , O 19 | due JJ B-ADJP 20 | for IN B-PP 21 | release NN B-NP 22 | tomorrow NN B-NP 23 | , , O 24 | fail VB B-VP 25 | to TO I-VP 26 | show VB I-VP 27 | a DT B-NP 28 | substantial JJ I-NP 29 | improvement NN I-NP 30 | from IN B-PP 31 | July NNP B-NP 32 | and CC I-NP 33 | August NNP I-NP 34 | 's POS B-NP 35 | near-record JJ I-NP 36 | deficits NNS I-NP 37 | . . O 38 | 39 | Chancellor NNP O 40 | of IN B-PP 41 | the DT B-NP 42 | Exchequer NNP I-NP 43 | Nigel NNP B-NP 44 | Lawson NNP I-NP 45 | 's POS B-NP 46 | restated VBN I-NP 47 | commitment NN I-NP 48 | to TO B-PP 49 | a DT B-NP 50 | firm NN I-NP 51 | monetary JJ I-NP 52 | policy NN I-NP 53 | has VBZ B-VP 54 | helped VBN I-VP 55 | to TO I-VP 56 | prevent VB I-VP 57 | a DT B-NP 58 | freefall NN I-NP 59 | in IN B-PP 60 | sterling NN B-NP 61 | over IN B-PP 62 | the DT B-NP 63 | past JJ I-NP 64 | week NN I-NP 65 | . . O 66 | -------------------------------------------------------------------------------- /test_fixtures/tagging/conll2003.txt: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- O 2 | 3 | U.N. NNP I-NP I-ORG 4 | official NN I-NP O 5 | Ekeus NNP I-NP I-PER 6 | heads VBZ I-VP O 7 | for IN I-PP O 8 | Baghdad NNP I-NP I-LOC 9 | . . O O 10 | 11 | -DOCSTART- -X- -X- O 12 | 13 | AI2 NNP I-NP I-ORG 14 | engineer NN I-NP O 15 | Joel NNP I-NP I-PER 16 | lives VBZ I-VP O 17 | in IN I-PP O 18 | Seattle NNP I-NP I-LOC 19 | . . O O 20 | -------------------------------------------------------------------------------- /test_fixtures/tagging/crf_tagger/experiment_albert.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_reader": { 3 | "type": "conll2003", 4 | "tag_label": "ner", 5 | "token_indexers": { 6 | "transformer": { 7 | "type": "pretrained_transformer_mismatched", 8 | "model_name": "albert-base-v2" 9 | } 10 | } 11 | }, 12 | "train_data_path": "test_fixtures/tagging/conll2003.txt", 13 | "validation_data_path": "test_fixtures/tagging/conll2003.txt", 14 | "model": { 15 | "type": "crf_tagger", 16 | "include_start_end_transitions": false, 17 | "text_field_embedder": { 18 | "token_embedders": { 19 | "transformer": { 20 | "type": "pretrained_transformer_mismatched", 21 | "model_name": "albert-base-v2" 22 | } 23 | } 24 | }, 25 | "encoder": { 26 | "type": "pass_through", 27 | "input_dim": 768 28 | } 29 | }, 30 | "data_loader": {"batch_size": 32}, 31 | "trainer": { 32 | "optimizer": "adam", 33 | "num_epochs": 5, 34 | "cuda_device": -1 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/sentences/1.txt: -------------------------------------------------------------------------------- 1 | [/EN#221796/people A girl] with [/EN#221804/bodyparts brown hair] sits on [/EN#221799/scene the edge of a cement area] [/EN#221798/scene overlooking water] . 2 | [/EN#221796/people A woman] in [/EN#221797/clothing black] , seen from [/EN#221800/other behind] , sits next to [/EN#221798/scene a body of water] . 3 | [/EN#221796/people A girl] sitting outside on [/EN#221799/other concrete] near [/EN#221798/scene water] in [/EN#221797/clothing a black dress] . 4 | [/EN#221796/people A small girl] sits on [/EN#221799/other a ledge] by [/EN#221798/scene the water] contemplating [/EN#221802/other life] . 5 | [/EN#221796/people A dark-haired girl] is sitting on [/EN#221798/scene the waters edge] . 6 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/sentences/2.txt: -------------------------------------------------------------------------------- 1 | [/EN#221796/people A girl] with [/EN#221804/bodyparts brown hair] sits on [/EN#221799/scene the edge of a concrete area] [/EN#221798/scene overlooking water] . 2 | [/EN#221796/people A woman] in [/EN#221797/clothing black] , seen from [/EN#221800/other behind] , sits by [/EN#221798/scene a body of water] . 3 | [/EN#221796/people A girl] sitting outside on [/EN#221799/other cement] near [/EN#221798/scene water] in [/EN#221797/clothing a black dress] . 4 | [/EN#221796/people A small girl] sits on [/EN#221799/other an edge] by [/EN#221798/scene the water] contemplating [/EN#221802/other life] . 5 | [/EN#221796/people A dark-haired girl] is sitting next to [/EN#221798/scene the waters edge] . 6 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/sentences/3.txt: -------------------------------------------------------------------------------- 1 | [/EN#221796/people A girl] without [/EN#221804/bodyparts brown hair] sits on [/EN#221799/scene the edge of a cement area] [/EN#221798/scene overlooking water] . 2 | [/EN#221796/people A woman] wearing [/EN#221797/clothing black] , seen from [/EN#221800/other behind] , sits next to [/EN#221798/scene a body of water] . 3 | [/EN#221796/people A girl] sitting inside on [/EN#221799/other concrete] near [/EN#221798/scene water] in [/EN#221797/clothing a black dress] . 4 | [/EN#221796/people A small girl] sits on top of [/EN#221799/other a ledge] by [/EN#221798/scene the water] contemplating [/EN#221802/other life] . 5 | [/EN#221796/people A dark-haired girl] is sitting by [/EN#221798/scene the waters edge] . 6 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/sentences/4945942737.txt: -------------------------------------------------------------------------------- 1 | [/EN#221796/people A girl] with [/EN#221804/bodyparts brown hair] sits on [/EN#221799/scene the edge of a cement area] [/EN#221798/scene overlooking water] . 2 | [/EN#221796/people A woman] in [/EN#221797/clothing black] , seen from [/EN#221800/other behind] , sits next to [/EN#221798/scene a body of water] . 3 | [/EN#221796/people A girl] sitting outside on [/EN#221799/other concrete] near [/EN#221798/scene water] in [/EN#221797/clothing a black dress] . 4 | [/EN#221796/people A small girl] sits on [/EN#221799/other a ledge] by [/EN#221798/scene the water] contemplating [/EN#221802/other life] . 5 | [/EN#221796/people A dark-haired girl] is sitting on [/EN#221798/scene the waters edge] . 6 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/sentences/6338542128.txt: -------------------------------------------------------------------------------- 1 | On [/EN#253080/scene a sunny , dry day] , wearing [/EN#253081/other full football gear] , [/EN#253069/people a Texas A&M football player] tries to reach [/EN#253070/people an Iowa State football player] , for [/EN#253072/other the football] during [/EN#253078/other the game] . 2 | [/EN#253070/people An offensive player] running with [/EN#253077/other a football] while [/EN#253069/people a football player] tries to stop [/EN#0/notvisual him] during [/EN#253071/other a football game] . 3 | [/EN#253069/people A football player] from [/EN#253074/scene Iowa State blocks] [/EN#253069/people a player] from [/EN#253075/other Texas A&M] from taking [/EN#253072/other the football] from [/EN#0/notvisual him] . 4 | [/EN#253070/scene The Iowa State football player blocks] [/EN#253068/people a Texas A&M defenseman] while running with [/EN#253072/other the ball] . 5 | [/EN#253073/other # 8] for [/EN#253083/bodyparts Iowa State stiff arms] [/EN#253069/people a Texas AM player] attempting to tackle [/EN#0/notvisual him] . 6 | -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/test.txt: -------------------------------------------------------------------------------- 1 | 6338542128 2 | 4945942737 3 | 1 4 | 2 5 | 3 -------------------------------------------------------------------------------- /test_fixtures/vision/flickr30k/tiny-dev.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 -------------------------------------------------------------------------------- /test_fixtures/vision/gqa/question_dir/questions0.json: -------------------------------------------------------------------------------- 1 | { 2 | "202218649": { 3 | "semantic": [ 4 | { 5 | "operation": "select", 6 | "dependencies": [], 7 | "argument": "chalkboard (0)" 8 | }, 9 | { 10 | "operation": "relate", 11 | "dependencies": [0], 12 | "argument": "_,hanging above,s (12)" 13 | }, 14 | { 15 | "operation": "query", 16 | "dependencies": [1], 17 | "argument": "name" 18 | } 19 | ], 20 | "entailed": ["202218648"], 21 | "equivalent": ["202218649"], 22 | "question": "What is hanging above the chalkboard?", 23 | "imageId": "n578564", 24 | "isBalanced": true, 25 | "groups": { 26 | "global": "thing", 27 | "local": "14-chalkboard_hanging above,s" 28 | }, 29 | "answer": "picture", 30 | "semanticStr": "select: chalkboard (0)->relate: _,hanging above,s (12) [0]->query: name [1]", 31 | "annotations": { 32 | "answer": {"0": "12"}, 33 | "question": {}, 34 | "fullAnswer": {"1": "12", "6": "0"} 35 | }, 36 | "types": { 37 | "detailed": "relS", 38 | "semantic": "rel", 39 | "structural": "query" 40 | }, 41 | "fullAnswer": "The picture is hanging above the chalkboard." 42 | } 43 | } -------------------------------------------------------------------------------- /test_fixtures/vision/gqa/questions.json: -------------------------------------------------------------------------------- 1 | { 2 | "202218649": { 3 | "semantic": [ 4 | { 5 | "operation": "select", 6 | "dependencies": [], 7 | "argument": "chalkboard (0)" 8 | }, 9 | { 10 | "operation": "relate", 11 | "dependencies": [0], 12 | "argument": "_,hanging above,s (12)" 13 | }, 14 | { 15 | "operation": "query", 16 | "dependencies": [1], 17 | "argument": "name" 18 | } 19 | ], 20 | "entailed": ["202218648"], 21 | "equivalent": ["202218649"], 22 | "question": "What is hanging above the chalkboard?", 23 | "imageId": "n578564", 24 | "isBalanced": true, 25 | "groups": { 26 | "global": "thing", 27 | "local": "14-chalkboard_hanging above,s" 28 | }, 29 | "answer": "picture", 30 | "semanticStr": "select: chalkboard (0)->relate: _,hanging above,s (12) [0]->query: name [1]", 31 | "annotations": { 32 | "answer": {"0": "12"}, 33 | "question": {}, 34 | "fullAnswer": {"1": "12", "6": "0"} 35 | }, 36 | "types": { 37 | "detailed": "relS", 38 | "semantic": "rel", 39 | "structural": "query" 40 | }, 41 | "fullAnswer": "The picture is hanging above the chalkboard." 42 | } 43 | } -------------------------------------------------------------------------------- /test_fixtures/vision/images/feature_cache/coordinates: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/feature_cache/coordinates -------------------------------------------------------------------------------- /test_fixtures/vision/images/feature_cache/coordinates-lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/feature_cache/coordinates-lock -------------------------------------------------------------------------------- /test_fixtures/vision/images/feature_cache/features: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/feature_cache/features -------------------------------------------------------------------------------- /test_fixtures/vision/images/feature_cache/features-lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/feature_cache/features-lock -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/1.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/100652400.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/100652400.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/1016887272.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/1016887272.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/101958970.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/101958970.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/2.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/3.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/3359636318.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/3359636318.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/4945942737.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/4945942737.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/6338542128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/6338542128.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/6959556104.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/6959556104.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/flickr30k/7162685234.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/flickr30k/7162685234.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/1339.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/1339.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2331963.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2331963.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2354786.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2354786.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2368326.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2368326.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2375429.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2375429.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2400861.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2400861.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/2405722.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/2405722.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/n166008.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/n166008.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/gqa/n578564.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/gqa/n578564.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-0-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-0-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-0-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-0-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-1-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-1-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-1-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-1-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-2-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-2-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-2-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-2-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-3-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-3-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/dev-850-3-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/dev-850-3-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-0-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-0-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-0-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-0-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-1-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-1-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-1-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-1-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-2-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-2-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-2-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-2-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-3-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-3-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/test1-0-3-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/test1-0-3-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-10171-0-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-10171-0-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-10171-0-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-10171-0-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-4100-0-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-4100-0-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-4100-0-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-4100-0-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-4933-2-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-4933-2-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-4933-2-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-4933-2-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-6623-1-img0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-6623-1-img0.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/nlvr2/train-6623-1-img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/nlvr2/train-6623-1-img1.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/1.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/2.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/2415125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/2415125.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/2415126.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/2415126.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/4.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vgqa/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vgqa/5.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vision_reader/jpeg_example.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vision_reader/jpeg_example.jpeg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vision_reader/jpg_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vision_reader/jpg_example.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vision_reader/png_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vision_reader/png_example.png -------------------------------------------------------------------------------- /test_fixtures/vision/images/visual_entailment/1016887272.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/visual_entailment/1016887272.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/visual_entailment/2248275918.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/visual_entailment/2248275918.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/visual_entailment/402978771.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/visual_entailment/402978771.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/visual_entailment/4564320256.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/visual_entailment/4564320256.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vqav2/COCO_train2014_000000458752.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vqav2/COCO_train2014_000000458752.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vqav2/COCO_val2014_000000262148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vqav2/COCO_val2014_000000262148.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/images/vqav2/COCO_val2014_000000393225.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/images/vqav2/COCO_val2014_000000393225.jpg -------------------------------------------------------------------------------- /test_fixtures/vision/vilbert_multitask/dataset.json: -------------------------------------------------------------------------------- 1 | {"task": "vqa", "image": "https://i.imgur.com/UOt9Q4J.jpeg", "question": "What's the color of the pyramid in the foreground?"} 2 | {"task": "vqa", "image": "https://i.imgur.com/9JNTNQd.jpeg", "question": "How many human skulls are there?"} 3 | {"task": "ve", "image": "https://i.imgur.com/FB4749j.jpeg", "hypothesis": "The statues are hugging each other."} 4 | -------------------------------------------------------------------------------- /test_fixtures/vision/vilbert_multitask/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/test_fixtures/vision/vilbert_multitask/model.tar.gz -------------------------------------------------------------------------------- /test_fixtures/vision/vqav2/questions.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "description": "This is v2.0 of the VQA dataset.", 4 | "url": "http://visualqa.org", 5 | "version": "2.0", 6 | "year": 2017, 7 | "contributor": "VQA Team", 8 | "date_created": "2017-04-26 17:07:13" 9 | }, 10 | "task_type": "Open-Ended", 11 | "data_type": "mscoco", 12 | "license": { 13 | "url": "http://creativecommons.org/licenses/by/4.0/", 14 | "name": "Creative Commons Attribution 4.0 International License" 15 | }, 16 | "data_subtype": "train2014", 17 | "questions": [ 18 | { 19 | "image_id": 458752, 20 | "question": "What is this photo taken looking through?", 21 | "question_id": 458752000 22 | }, 23 | { 24 | "image_id": 458752, 25 | "question": "What position is this man playing?", 26 | "question_id": 458752001 27 | }, 28 | { 29 | "image_id": 458752, 30 | "question": "What color is the players shirt?", 31 | "question_id": 458752002 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | PROJECT_ROOT = (pathlib.Path(__file__).parent / "..").resolve() # pylint: disable=no-member 4 | TESTS_ROOT = PROJECT_ROOT / "tests" 5 | FIXTURES_ROOT = PROJECT_ROOT / "test_fixtures" 6 | -------------------------------------------------------------------------------- /tests/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/classification/__init__.py -------------------------------------------------------------------------------- /tests/classification/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/classification/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/classification/interpret/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/classification/interpret/__init__.py -------------------------------------------------------------------------------- /tests/classification/interpret/sst_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def test_gradient_visualization(): 5 | from allennlp.predictors.predictor import Predictor 6 | 7 | predictor = Predictor.from_path( 8 | "https://storage.googleapis.com/allennlp-public-models/sst-roberta-large-2020.06.08.tar.gz" 9 | ) 10 | sentence = "a very well-made, funny and entertaining picture." 11 | 12 | inputs = {"sentence": sentence} 13 | from allennlp.interpret.saliency_interpreters import SimpleGradient 14 | 15 | simple_gradient_interpreter = SimpleGradient(predictor) 16 | simple_gradient_interpretation = simple_gradient_interpreter.saliency_interpret_from_json( 17 | inputs 18 | ) 19 | 20 | gradients = simple_gradient_interpretation["instance_1"]["grad_input_1"] 21 | assert max(gradients) - min(gradients) < 0.75 22 | -------------------------------------------------------------------------------- /tests/classification/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/classification/models/__init__.py -------------------------------------------------------------------------------- /tests/coref/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/__init__.py -------------------------------------------------------------------------------- /tests/coref/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/coref/interpret/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/interpret/__init__.py -------------------------------------------------------------------------------- /tests/coref/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/metrics/__init__.py -------------------------------------------------------------------------------- /tests/coref/metrics/mention_recall_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from allennlp.common.testing import ( 4 | AllenNlpTestCase, 5 | global_distributed_metric, 6 | run_distributed_test, 7 | ) 8 | 9 | from allennlp_models.coref.metrics.mention_recall import MentionRecall 10 | 11 | 12 | class MentionRecallTest(AllenNlpTestCase): 13 | def test_mention_recall(self): 14 | metric = MentionRecall() 15 | 16 | batched_top_spans = torch.tensor([[[2, 4], [1, 3]], [[5, 6], [7, 8]]]) 17 | batched_metadata = [{"clusters": [[(2, 4), (3, 5)]]}, {"clusters": [[(5, 6), (7, 8)]]}] 18 | 19 | metric(batched_top_spans, batched_metadata) 20 | recall = metric.get_metric() 21 | assert recall == 0.75 22 | 23 | def test_distributed_mention_recall(self): 24 | batched_top_spans = [torch.tensor([[[2, 4], [1, 3]]]), torch.tensor([[[5, 6], [7, 8]]])] 25 | batched_metadata = [[{"clusters": [[(2, 4), (3, 5)]]}], [{"clusters": [[(5, 6), (7, 8)]]}]] 26 | 27 | metric_kwargs = { 28 | "batched_top_spans": batched_top_spans, 29 | "batched_metadata": batched_metadata, 30 | } 31 | desired_values = 0.75 32 | run_distributed_test( 33 | [-1, -1], 34 | global_distributed_metric, 35 | MentionRecall(), 36 | metric_kwargs, 37 | desired_values, 38 | exact=True, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/coref/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/models/__init__.py -------------------------------------------------------------------------------- /tests/coref/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/coref/predictors/__init__.py -------------------------------------------------------------------------------- /tests/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/__init__.py -------------------------------------------------------------------------------- /tests/generation/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/generation/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/models/__init__.py -------------------------------------------------------------------------------- /tests/generation/models/bart_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from allennlp.common import Params 3 | from allennlp.common.testing import ModelTestCase 4 | from allennlp.models import Model 5 | from allennlp_models import generation # noqa: F401 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class BartTest(ModelTestCase): 10 | def setup_method(self): 11 | super().setup_method() 12 | self.set_up_model( 13 | FIXTURES_ROOT / "generation" / "bart" / "experiment.jsonnet", 14 | FIXTURES_ROOT / "generation" / "bart" / "data" / "url_lists" / "all_train.txt", 15 | ) 16 | 17 | def test_backwards_compatibility_with_beam_search_args(self): 18 | # These values are arbitrary but should be different than the config. 19 | beam_size, max_decoding_steps = 100, 1000 20 | params = Params.from_file(self.param_file) 21 | params["model"]["beam_size"] = beam_size 22 | params["model"]["max_decoding_steps"] = max_decoding_steps 23 | # The test harness is set up to treat DeprecationWarning's like errors, so this needs to 24 | # be called within the pytest context manager. 25 | with pytest.raises(DeprecationWarning): 26 | model = Model.from_params(vocab=self.vocab, params=params.get("model")) 27 | assert model._beam_search.beam_size == beam_size 28 | assert model._beam_search.max_steps == max_decoding_steps 29 | 30 | def test_model_can_train_save_load_predict(self): 31 | self.ensure_model_can_train_save_and_load(self.param_file, tolerance=1e-2) 32 | -------------------------------------------------------------------------------- /tests/generation/models/t5_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | 5 | from allennlp_models import generation # noqa: F401 6 | 7 | 8 | class T5Test(ModelTestCase): 9 | def setup_method(self): 10 | super().setup_method() 11 | self.set_up_model( 12 | FIXTURES_ROOT / "generation" / "t5" / "experiment.jsonnet", 13 | FIXTURES_ROOT / "generation" / "bart" / "data" / "url_lists" / "all_train.txt", 14 | ) 15 | 16 | def test_model_can_train_save_load_predict(self): 17 | self.ensure_model_can_train_save_and_load(self.param_file, tolerance=1e-2) 18 | -------------------------------------------------------------------------------- /tests/generation/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/modules/__init__.py -------------------------------------------------------------------------------- /tests/generation/modules/decoder_nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/modules/decoder_nets/__init__.py -------------------------------------------------------------------------------- /tests/generation/modules/seq_decoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/modules/seq_decoders/__init__.py -------------------------------------------------------------------------------- /tests/generation/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/generation/predictors/__init__.py -------------------------------------------------------------------------------- /tests/lm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/__init__.py -------------------------------------------------------------------------------- /tests/lm/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/lm/interpret/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/interpret/__init__.py -------------------------------------------------------------------------------- /tests/lm/interpret/lm_hotflip_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import AllenNlpTestCase 2 | from allennlp.interpret.attackers import Hotflip 3 | from allennlp.models.archival import load_archive 4 | from allennlp.predictors import Predictor 5 | 6 | from allennlp_models.lm import LinearLanguageModelHead 7 | from tests import FIXTURES_ROOT 8 | 9 | 10 | class TestHotflip(AllenNlpTestCase): 11 | def test_targeted_attack_from_json(self): 12 | inputs = {"sentence": "The doctor ran to the emergency room to see [MASK] patient."} 13 | 14 | archive = load_archive( 15 | FIXTURES_ROOT / "lm" / "masked_language_model" / "serialization" / "model.tar.gz" 16 | ) 17 | predictor = Predictor.from_archive(archive, "masked_language_model") 18 | 19 | hotflipper = Hotflip(predictor, vocab_namespace="tokens") 20 | hotflipper.initialize() 21 | attack = hotflipper.attack_from_json(inputs, target={"words": ["hi"]}) 22 | assert attack is not None 23 | assert "final" in attack 24 | assert "original" in attack 25 | assert "outputs" in attack 26 | assert len(attack["final"][0]) == len( 27 | attack["original"] 28 | ) # hotflip replaces words without removing 29 | assert attack["final"][0] != attack["original"] 30 | -------------------------------------------------------------------------------- /tests/lm/interpret/simple_gradient_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import AllenNlpTestCase 2 | from allennlp.interpret.saliency_interpreters import SimpleGradient 3 | from allennlp.models.archival import load_archive 4 | from allennlp.predictors import Predictor 5 | 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class TestSimpleGradient(AllenNlpTestCase): 10 | def test_simple_gradient_masked_lm(self): 11 | inputs = { 12 | "sentence": "This is a single string [MASK] about a test . Sometimes it " 13 | "contains coreferent parts ." 14 | } 15 | archive = load_archive( 16 | FIXTURES_ROOT / "lm" / "masked_language_model" / "serialization" / "model.tar.gz" 17 | ) 18 | predictor = Predictor.from_archive(archive, "masked_language_model") 19 | interpreter = SimpleGradient(predictor) 20 | interpretation = interpreter.saliency_interpret_from_json(inputs) 21 | assert interpretation is not None 22 | assert "instance_1" in interpretation 23 | assert "grad_input_1" in interpretation["instance_1"] 24 | grad_input_1 = interpretation["instance_1"]["grad_input_1"] 25 | assert len(grad_input_1) == 16 # 16 words in input 26 | -------------------------------------------------------------------------------- /tests/lm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/models/__init__.py -------------------------------------------------------------------------------- /tests/lm/models/masked_language_model_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | 5 | 6 | class TestMaskedLanguageModel(ModelTestCase): 7 | def setup_method(self): 8 | super().setup_method() 9 | self.set_up_model( 10 | FIXTURES_ROOT / "lm" / "masked_language_model" / "experiment.json", 11 | FIXTURES_ROOT / "lm" / "language_model" / "sentences.txt", 12 | ) 13 | 14 | def test_model_can_train_save_and_load(self): 15 | self.ensure_model_can_train_save_and_load(self.param_file) 16 | -------------------------------------------------------------------------------- /tests/lm/models/next_token_lm_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | from allennlp_models import lm # noqa: F401 5 | 6 | 7 | class TestNextTokenLanguageModel(ModelTestCase): 8 | def setup_method(self): 9 | super().setup_method() 10 | self.set_up_model( 11 | FIXTURES_ROOT / "lm" / "next_token_lm" / "experiment.json", 12 | FIXTURES_ROOT / "lm" / "language_model" / "sentences.txt", 13 | ) 14 | 15 | def test_model_can_train_save_and_load(self): 16 | self.ensure_model_can_train_save_and_load(self.param_file) 17 | 18 | 19 | class TestNextTokenTransformerLm(ModelTestCase): 20 | def setup_method(self): 21 | super().setup_method() 22 | self.set_up_model( 23 | FIXTURES_ROOT / "lm" / "next_token_lm" / "experiment_transformer.json", 24 | FIXTURES_ROOT / "lm" / "language_model" / "sentences.txt", 25 | ) 26 | 27 | def test_model_can_train_save_and_load(self): 28 | self.ensure_model_can_train_save_and_load( 29 | self.param_file, 30 | tolerance=1e-3, 31 | gradients_to_ignore={ 32 | "_text_field_embedder.token_embedder_tokens.transformer_model.pooler.dense.weight", 33 | "_text_field_embedder.token_embedder_tokens.transformer_model.pooler.dense.bias", 34 | }, 35 | ) 36 | -------------------------------------------------------------------------------- /tests/lm/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/modules/__init__.py -------------------------------------------------------------------------------- /tests/lm/modules/language_model_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/modules/language_model_heads/__init__.py -------------------------------------------------------------------------------- /tests/lm/modules/language_model_heads/bert_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from allennlp.common import Params 4 | from allennlp.common.testing.test_case import AllenNlpTestCase 5 | 6 | from allennlp_models.lm.modules.language_model_heads import LanguageModelHead, BertLanguageModelHead 7 | 8 | 9 | class TestBertLanguageModelHead(AllenNlpTestCase): 10 | def test_can_init_and_run(self): 11 | # The LM head code reads a module from somewhere else; we're basically just testing here 12 | # that we can initialize the expected model `from_params`. 13 | head = LanguageModelHead.from_params( 14 | Params({"type": "bert", "model_name": "bert-base-uncased"}) 15 | ) 16 | assert isinstance(head, BertLanguageModelHead) 17 | assert head.get_input_dim() == 768 18 | assert head.get_output_dim() == 30522 19 | tensor = torch.rand(1, 768) 20 | logits = head(tensor) 21 | assert tuple(logits.size()) == (1, 30522) 22 | -------------------------------------------------------------------------------- /tests/lm/modules/language_model_heads/gpt2_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from allennlp.common import Params 4 | from allennlp.common.testing.test_case import AllenNlpTestCase 5 | 6 | from allennlp_models.lm.modules.language_model_heads import LanguageModelHead, Gpt2LanguageModelHead 7 | 8 | 9 | class TestGpt2LanguageModelHead(AllenNlpTestCase): 10 | def test_can_init_and_run(self): 11 | # The LM head code reads a module from somewhere else; we're basically just testing here 12 | # that we can initialize the expected model `from_params`. 13 | head = LanguageModelHead.from_params(Params({"type": "gpt2", "model_name": "gpt2"})) 14 | assert isinstance(head, Gpt2LanguageModelHead) 15 | assert head.get_input_dim() == 768 16 | assert head.get_output_dim() == 50257 17 | tensor = torch.rand(1, 768) 18 | logits = head(tensor) 19 | assert tuple(logits.size()) == (1, 50257) 20 | -------------------------------------------------------------------------------- /tests/lm/modules/seq2seq_encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/modules/seq2seq_encoders/__init__.py -------------------------------------------------------------------------------- /tests/lm/modules/token_embedders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/modules/token_embedders/__init__.py -------------------------------------------------------------------------------- /tests/lm/modules/token_embedders/bidirectional_lm_test.py: -------------------------------------------------------------------------------- 1 | from tests import FIXTURES_ROOT 2 | from tests.lm.modules.token_embedders.language_model_test import TestLanguageModelTokenEmbedder 3 | 4 | 5 | class TestBidirectionalLanguageModelTokenEmbedder(TestLanguageModelTokenEmbedder): 6 | def setup_method(self): 7 | super().setup_method() 8 | self.set_up_model( 9 | FIXTURES_ROOT 10 | / "lm" 11 | / "language_model" 12 | / "bidirectional_lm_characters_token_embedder.jsonnet", 13 | FIXTURES_ROOT / "lm" / "conll2003.txt", 14 | ) 15 | 16 | 17 | class TestBidirectionalLanguageModelTokenEmbedderWithoutBosEos(TestLanguageModelTokenEmbedder): 18 | def setup_method(self): 19 | super().setup_method() 20 | self.set_up_model( 21 | FIXTURES_ROOT 22 | / "lm" 23 | / "language_model" 24 | / "bidirectional_lm_characters_token_embedder_without_bos_eos.jsonnet", 25 | FIXTURES_ROOT / "lm" / "conll2003.txt", 26 | ) 27 | -------------------------------------------------------------------------------- /tests/lm/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/lm/predictors/__init__.py -------------------------------------------------------------------------------- /tests/lm/predictors/masked_language_model_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import AllenNlpTestCase 2 | from allennlp.models.archival import load_archive 3 | from allennlp.predictors import Predictor 4 | 5 | from tests import FIXTURES_ROOT 6 | 7 | 8 | class TestMaskedLanguageModelPredictor(AllenNlpTestCase): 9 | def test_predictions_to_labeled_instances(self): 10 | inputs = {"sentence": "Eric [MASK] was an intern at [MASK]"} 11 | 12 | archive = load_archive( 13 | FIXTURES_ROOT / "lm" / "masked_language_model" / "serialization" / "model.tar.gz" 14 | ) 15 | predictor = Predictor.from_archive(archive, "masked_language_model") 16 | 17 | instance = predictor._json_to_instance(inputs) 18 | outputs = predictor._model.forward_on_instance(instance) 19 | new_instances = predictor.predictions_to_labeled_instances(instance, outputs) 20 | assert len(new_instances) == 1 21 | assert "target_ids" in new_instances[0] 22 | assert len(new_instances[0]["target_ids"].tokens) == 2 # should have added two words 23 | -------------------------------------------------------------------------------- /tests/lm/predictors/next_token_lm_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import AllenNlpTestCase 2 | from allennlp.models.archival import load_archive 3 | from allennlp.predictors import Predictor 4 | 5 | from tests import FIXTURES_ROOT 6 | from allennlp_models import lm # noqa: F401 7 | 8 | 9 | class TestNextTokenLMPredictor(AllenNlpTestCase): 10 | def test_predictions_to_labeled_instances(self): 11 | inputs = {"sentence": "Eric Wallace was an intern at"} 12 | 13 | archive = load_archive( 14 | FIXTURES_ROOT / "lm" / "next_token_lm" / "serialization" / "model.tar.gz" 15 | ) 16 | predictor = Predictor.from_archive(archive, "next_token_lm") 17 | 18 | instance = predictor._json_to_instance(inputs) 19 | outputs = predictor._model.forward_on_instance(instance) 20 | new_instances = predictor.predictions_to_labeled_instances(instance, outputs) 21 | assert len(new_instances) == 1 22 | assert "target_ids" in new_instances[0] 23 | assert len(new_instances[0]["target_ids"].tokens) == 1 # should have added one word 24 | -------------------------------------------------------------------------------- /tests/mc/dataset_readers/piqa_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from allennlp.common.util import ensure_list 4 | 5 | from allennlp_models.mc.dataset_readers.piqa import PiqaReader 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class TestCommonsenseQaReader: 10 | def test_read_from_file(self): 11 | reader = PiqaReader(transformer_model_name="bert-base-uncased") 12 | instances = ensure_list(reader.read(str(FIXTURES_ROOT / "mc" / "piqa.jsonl"))) 13 | assert len(instances) == 10 14 | 15 | instance = instances[0] 16 | assert len(instance.fields["alternatives"]) == 2 17 | 18 | alternative = instance.fields["alternatives"][0] 19 | token_text = [t.text for t in alternative.tokens] 20 | token_type_ids = [t.type_id for t in alternative.tokens] 21 | 22 | assert token_text[:3] == ["[CLS]", "how", "do"] 23 | assert token_type_ids[:3] == [0, 0, 0] 24 | 25 | assert token_text[-3:] == ["dish", ".", "[SEP]"] 26 | assert token_type_ids[-3:] == [1, 1, 1] 27 | 28 | assert instance.fields["correct_alternative"] == 0 29 | 30 | def test_length_limit_works(self): 31 | length_limit = 20 32 | 33 | reader = PiqaReader(transformer_model_name="bert-base-uncased", length_limit=length_limit) 34 | instances = ensure_list(reader.read(str(FIXTURES_ROOT / "mc" / "piqa.jsonl"))) 35 | 36 | assert len(instances) == 10 37 | for instance in instances: 38 | for alternative in instance.fields["alternatives"]: 39 | assert len(alternative) <= length_limit 40 | -------------------------------------------------------------------------------- /tests/mc/dataset_readers/swag_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from allennlp.common.util import ensure_list 4 | 5 | from allennlp_models.mc.dataset_readers.swag import SwagReader 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class TestSwagReader: 10 | def test_read_from_file(self): 11 | reader = SwagReader(transformer_model_name="bert-base-uncased") 12 | instances = ensure_list(reader.read(FIXTURES_ROOT / "mc" / "swag.csv")) 13 | assert len(instances) == 11 14 | 15 | instance = instances[0] 16 | assert len(instance.fields["alternatives"]) == 4 17 | 18 | alternative = instance.fields["alternatives"][0] 19 | token_text = [t.text for t in alternative.tokens] 20 | token_type_ids = [t.type_id for t in alternative.tokens] 21 | 22 | assert token_text[:3] == ["[CLS]", "students", "lower"] 23 | assert token_type_ids[:3] == [0, 0, 0] 24 | 25 | assert token_text[-3:] == ["someone", ".", "[SEP]"] 26 | assert token_type_ids[-3:] == [1, 1, 1] 27 | 28 | assert instance.fields["correct_alternative"] == 2 29 | 30 | def test_length_limit_works(self): 31 | length_limit = 20 32 | 33 | reader = SwagReader(transformer_model_name="bert-base-uncased", length_limit=length_limit) 34 | instances = ensure_list(reader.read(FIXTURES_ROOT / "mc" / "swag.csv")) 35 | 36 | assert len(instances) == 11 37 | for instance in instances: 38 | for alternative in instance.fields["alternatives"]: 39 | assert len(alternative) <= length_limit 40 | -------------------------------------------------------------------------------- /tests/pair_classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/pair_classification/__init__.py -------------------------------------------------------------------------------- /tests/pair_classification/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/pair_classification/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/pair_classification/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/pair_classification/models/__init__.py -------------------------------------------------------------------------------- /tests/pair_classification/models/bimpm_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | 5 | 6 | class TestBiMPM(ModelTestCase): 7 | def setup_method(self): 8 | super().setup_method() 9 | self.set_up_model( 10 | FIXTURES_ROOT / "pair_classification" / "bimpm" / "experiment.json", 11 | FIXTURES_ROOT / "pair_classification" / "quora_paraphrase.tsv", 12 | ) 13 | 14 | def test_forward_pass_runs_correctly(self): 15 | training_tensors = self.dataset.as_tensor_dict() 16 | output_dict = self.model(**training_tensors) 17 | assert "logits" in output_dict and "loss" in output_dict 18 | 19 | def test_model_can_train_save_and_load(self): 20 | self.ensure_model_can_train_save_and_load(self.param_file) 21 | 22 | def test_batch_predictions_are_consistent(self): 23 | self.ensure_batch_predictions_are_consistent() 24 | 25 | def test_decode_runs_correctly(self): 26 | training_tensors = self.dataset.as_tensor_dict() 27 | output_dict = self.model(**training_tensors) 28 | decode_output_dict = self.model.make_output_human_readable(output_dict) 29 | assert "label" in decode_output_dict 30 | -------------------------------------------------------------------------------- /tests/pair_classification/models/esim_test.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.testing import assert_almost_equal 3 | 4 | from allennlp.common.testing import ModelTestCase 5 | 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class TestESIM(ModelTestCase): 10 | def setup_method(self): 11 | super().setup_method() 12 | self.set_up_model( 13 | FIXTURES_ROOT / "pair_classification" / "esim" / "experiment.json", 14 | FIXTURES_ROOT / "pair_classification" / "snli.jsonl", 15 | ) 16 | 17 | def test_forward_pass_runs_correctly(self): 18 | training_tensors = self.dataset.as_tensor_dict() 19 | output_dict = self.model(**training_tensors) 20 | assert_almost_equal(numpy.sum(output_dict["label_probs"][0].data.numpy(), -1), 1, decimal=6) 21 | 22 | def test_model_can_train_save_and_load(self): 23 | self.ensure_model_can_train_save_and_load(self.param_file) 24 | 25 | def test_batch_predictions_are_consistent(self): 26 | self.ensure_batch_predictions_are_consistent() 27 | 28 | def test_decode_runs_correctly(self): 29 | training_tensors = self.dataset.as_tensor_dict() 30 | output_dict = self.model(**training_tensors) 31 | decode_output_dict = self.model.make_output_human_readable(output_dict) 32 | assert "label" in decode_output_dict 33 | -------------------------------------------------------------------------------- /tests/pair_classification/task_checklists/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/pair_classification/task_checklists/__init__.py -------------------------------------------------------------------------------- /tests/pair_classification/task_checklists/textual_entailment_suite_test.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import pytest 4 | from allennlp.confidence_checks.task_checklists.textual_entailment_suite import ( 5 | TextualEntailmentSuite, 6 | ) 7 | from allennlp.common.testing import AllenNlpTestCase 8 | from allennlp.models.archival import load_archive 9 | from allennlp.predictors import Predictor 10 | import torch 11 | import numpy 12 | 13 | from allennlp_models.pair_classification.predictors import * # noqa: F403 14 | from tests import FIXTURES_ROOT 15 | 16 | 17 | class TestTextualEntailmentSuite(AllenNlpTestCase): 18 | @pytest.mark.parametrize( 19 | "model", 20 | [ 21 | "decomposable_attention", 22 | "esim", 23 | ], 24 | ) 25 | def test_run(self, model: str): 26 | torch.manual_seed(1) 27 | numpy.random.seed(1) 28 | random.seed(1) 29 | 30 | archive = load_archive( 31 | FIXTURES_ROOT / "pair_classification" / model / "serialization" / "model.tar.gz" 32 | ) 33 | predictor = Predictor.from_archive(archive) 34 | 35 | data = [ 36 | ("Alice and Bob are friends.", "Alice is Bob's friend."), 37 | ("The park had children playing", "The park was empty."), 38 | ] 39 | 40 | suite = TextualEntailmentSuite(probs_key="label_probs", add_default_tests=True, data=data) 41 | suite.run(predictor, max_examples=10) 42 | -------------------------------------------------------------------------------- /tests/rc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/__init__.py -------------------------------------------------------------------------------- /tests/rc/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/rc/dataset_readers/qangaroo_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from allennlp.common import Params 4 | from allennlp.common.util import ensure_list 5 | 6 | from allennlp_models.rc import QangarooReader 7 | from tests import FIXTURES_ROOT 8 | 9 | 10 | class TestQangarooReader: 11 | def test_read_from_file(self): 12 | reader = QangarooReader() 13 | instances = ensure_list(reader.read(FIXTURES_ROOT / "rc" / "qangaroo.json")) 14 | assert len(instances) == 2 15 | 16 | assert [t.text for t in instances[0].fields["candidates"][3]] == ["german", "confederation"] 17 | assert [t.text for t in instances[0].fields["query"]] == ["country", "sms", "braunschweig"] 18 | assert [t.text for t in instances[0].fields["supports"][0][:3]] == [ 19 | "The", 20 | "North", 21 | "German", 22 | ] 23 | assert [t.text for t in instances[0].fields["answer"]] == ["german", "empire"] 24 | assert instances[0].fields["answer_index"].sequence_index == 4 25 | 26 | def test_can_build_from_params(self): 27 | reader = QangarooReader.from_params(Params({})) 28 | 29 | assert reader._token_indexers["tokens"].__class__.__name__ == "SingleIdTokenIndexer" 30 | -------------------------------------------------------------------------------- /tests/rc/dataset_readers/utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from allennlp_models.rc.dataset_readers.utils import char_span_to_token_span 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "token_offsets, character_span, expected_result", 8 | [ 9 | ([(0, 3), (4, 4), (5, 8)], (5, 8), ((2, 2), False)), 10 | ([(0, 3), (4, 4), (5, 8)], (4, 8), ((1, 2), False)), 11 | ([(0, 3), (4, 4), (5, 8)], (0, 8), ((0, 2), False)), 12 | ([(0, 3), (4, 4), (5, 8)], (1, 8), ((0, 2), True)), 13 | ([(0, 3), (4, 4), (5, 8)], (7, 8), ((2, 2), True)), 14 | ([(0, 3), (4, 4), (5, 8)], (7, 9), ((2, 2), True)), 15 | ], 16 | ) 17 | def test_char_span_to_token_span(token_offsets, character_span, expected_result): 18 | assert char_span_to_token_span(token_offsets, character_span) == expected_result 19 | 20 | 21 | def test_char_span_to_token_span_throws(): 22 | with pytest.raises(ValueError): 23 | char_span_to_token_span([(0, 3), (4, 4), (5, 8)], (7, 19)) 24 | -------------------------------------------------------------------------------- /tests/rc/evaluations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/evaluations/__init__.py -------------------------------------------------------------------------------- /tests/rc/interpret/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/interpret/__init__.py -------------------------------------------------------------------------------- /tests/rc/metrics/drop_em_and_f1_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ( 2 | AllenNlpTestCase, 3 | global_distributed_metric, 4 | run_distributed_test, 5 | ) 6 | 7 | from allennlp_models.rc.metrics import DropEmAndF1 8 | 9 | 10 | class DropEmAndF1Test(AllenNlpTestCase): 11 | def test_drop_em_and_f1(self): 12 | metric = DropEmAndF1() 13 | 14 | metric( 15 | "this is the best span", [{"spans": ["this is a good span", "something irrelevant"]}] 16 | ) 17 | exact_match, f1_score = metric.get_metric() 18 | assert exact_match == 0.0 19 | assert f1_score == 0.38 20 | 21 | def test_distributed_drop_em_and_f1(self): 22 | prediction = ["this is the best span", "this is another span"] 23 | ground_truths = [ 24 | [{"spans": ["this is a good span", "something irrelevant"]}], 25 | [{"spans": ["this is another span"]}], 26 | ] 27 | 28 | metric_kwargs = {"prediction": prediction, "ground_truths": ground_truths} 29 | desired_values = (1 / 2, 1.38 / 2) 30 | run_distributed_test( 31 | [-1, -1], 32 | global_distributed_metric, 33 | DropEmAndF1(), 34 | metric_kwargs, 35 | desired_values, 36 | exact=True, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/rc/metrics/squad_em_and_f1_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ( 2 | AllenNlpTestCase, 3 | global_distributed_metric, 4 | run_distributed_test, 5 | ) 6 | 7 | from allennlp_models.rc.metrics import SquadEmAndF1 8 | 9 | 10 | class SquadEmAndF1Test(AllenNlpTestCase): 11 | def test_squad_em_and_f1(self): 12 | metric = SquadEmAndF1() 13 | 14 | metric("this is the best span", ["this is a good span", "something irrelevant"]) 15 | 16 | exact_match, f1_score = metric.get_metric() 17 | assert exact_match == 0.0 18 | assert f1_score == 0.75 19 | 20 | def test_distributed_squad_em_and_f1(self): 21 | best_span_strings = ["this is the best span", "this is another span"] 22 | answer_strings = [ 23 | ["this is a good span", "something irrelevant"], 24 | ["this is another span", "this one is less perfect"], 25 | ] 26 | 27 | metric_kwargs = {"best_span_strings": best_span_strings, "answer_strings": answer_strings} 28 | desired_values = (1 / 2, 1.75 / 2) 29 | run_distributed_test( 30 | [-1, -1], 31 | global_distributed_metric, 32 | SquadEmAndF1(), 33 | metric_kwargs, 34 | desired_values, 35 | exact=True, 36 | ) 37 | -------------------------------------------------------------------------------- /tests/rc/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/models/__init__.py -------------------------------------------------------------------------------- /tests/rc/models/dialog_qa_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | from allennlp.data import Batch 3 | import torch 4 | 5 | import allennlp_models.rc 6 | from tests import FIXTURES_ROOT 7 | 8 | 9 | class DialogQATest(ModelTestCase): 10 | def setup_method(self): 11 | super().setup_method() 12 | self.set_up_model( 13 | FIXTURES_ROOT / "rc" / "dialog_qa" / "experiment.json", 14 | FIXTURES_ROOT / "rc" / "dialog_qa" / "quac_sample.json", 15 | seed=42, 16 | ) 17 | self.batch = Batch(self.instances) 18 | self.batch.index_instances(self.vocab) 19 | torch.use_deterministic_algorithms(True) 20 | 21 | def teardown_method(self): 22 | super().teardown_method() 23 | torch.use_deterministic_algorithms(False) 24 | 25 | def test_forward_pass_runs_correctly(self): 26 | training_tensors = self.batch.as_tensor_dict() 27 | output_dict = self.model(**training_tensors) 28 | assert "best_span_str" in output_dict and "loss" in output_dict 29 | assert "followup" in output_dict and "yesno" in output_dict 30 | 31 | def test_model_can_train_save_and_load(self): 32 | self.ensure_model_can_train_save_and_load( 33 | self.param_file, tolerance=1e-4, gradients_to_ignore={"_matrix_attention._bias"} 34 | ) 35 | 36 | def test_batch_predictions_are_consistent(self): 37 | self.ensure_batch_predictions_are_consistent() 38 | -------------------------------------------------------------------------------- /tests/rc/models/naqanet_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | 5 | from allennlp_models.rc import NumericallyAugmentedQaNet 6 | 7 | 8 | class NumericallyAugmentedQaNetTest(ModelTestCase): 9 | def setup_method(self): 10 | super().setup_method() 11 | self.set_up_model( 12 | FIXTURES_ROOT / "rc" / "naqanet" / "experiment.json", 13 | FIXTURES_ROOT / "rc" / "drop.json", 14 | ) 15 | 16 | def test_model_can_train_save_and_load(self): 17 | import torch 18 | 19 | torch.autograd.set_detect_anomaly(True) 20 | self.ensure_model_can_train_save_and_load( 21 | self.param_file, 22 | # Due to numerical instability, these scalar tensors might sometimes 23 | # have zero gradients. 24 | gradients_to_ignore={ 25 | "_passage_span_end_predictor._linear_layers.1.bias", 26 | "_question_span_end_predictor._linear_layers.1.bias", 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /tests/rc/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/modules/__init__.py -------------------------------------------------------------------------------- /tests/rc/modules/seq2seq_encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/modules/seq2seq_encoders/__init__.py -------------------------------------------------------------------------------- /tests/rc/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/predictors/__init__.py -------------------------------------------------------------------------------- /tests/rc/task_checklists/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/rc/task_checklists/__init__.py -------------------------------------------------------------------------------- /tests/rc/task_checklists/question_answering_suite_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from allennlp.confidence_checks.task_checklists.question_answering_suite import ( 3 | QuestionAnsweringSuite, 4 | ) 5 | from allennlp.common.testing import AllenNlpTestCase 6 | from allennlp.models.archival import load_archive 7 | from allennlp.predictors import Predictor 8 | 9 | from allennlp_models.rc.predictors import * # noqa: F403 10 | from tests import FIXTURES_ROOT 11 | 12 | 13 | class TestQuestionAnsweringSuite(AllenNlpTestCase): 14 | @pytest.mark.parametrize( 15 | "model", 16 | [ 17 | "bidaf", 18 | ], 19 | ) 20 | def test_run(self, model: str): 21 | archive = load_archive(FIXTURES_ROOT / "rc" / model / "serialization" / "model.tar.gz") 22 | predictor = Predictor.from_archive(archive) 23 | 24 | data = [ 25 | ("Alice is taller than Bob.", "Who is taller?"), 26 | ("Children were playing in the park.", "Was the park empty?"), 27 | ] 28 | suite = QuestionAnsweringSuite(context_key="passage", add_default_tests=True, data=data) 29 | suite.run(predictor, max_examples=10) 30 | -------------------------------------------------------------------------------- /tests/structured_prediction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/structured_prediction/__init__.py -------------------------------------------------------------------------------- /tests/structured_prediction/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/structured_prediction/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/structured_prediction/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/structured_prediction/metrics/__init__.py -------------------------------------------------------------------------------- /tests/structured_prediction/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/structured_prediction/models/__init__.py -------------------------------------------------------------------------------- /tests/structured_prediction/models/graph_parser_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing.model_test_case import ModelTestCase 2 | from tests import FIXTURES_ROOT 3 | 4 | import allennlp_models.structured_prediction 5 | 6 | 7 | class GraphParserTest(ModelTestCase): 8 | def setup_method(self): 9 | super().setup_method() 10 | self.set_up_model( 11 | FIXTURES_ROOT / "structured_prediction" / "semantic_dependencies" / "experiment.json", 12 | FIXTURES_ROOT / "structured_prediction" / "semantic_dependencies" / "dm.sdp", 13 | ) 14 | 15 | def test_graph_parser_can_save_and_load(self): 16 | self.ensure_model_can_train_save_and_load(self.param_file) 17 | 18 | def test_batch_predictions_are_consistent(self): 19 | self.ensure_batch_predictions_are_consistent() 20 | 21 | def test_model_can_decode(self): 22 | self.model.eval() 23 | training_tensors = self.dataset.as_tensor_dict() 24 | output_dict = self.model(**training_tensors) 25 | decode_output_dict = self.model.make_output_human_readable(output_dict) 26 | 27 | assert set(decode_output_dict.keys()) == { 28 | "arc_loss", 29 | "tag_loss", 30 | "loss", 31 | "arcs", 32 | "arc_tags", 33 | "arc_tag_probs", 34 | "arc_probs", 35 | "tokens", 36 | "mask", 37 | } 38 | -------------------------------------------------------------------------------- /tests/structured_prediction/predictors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/structured_prediction/predictors/__init__.py -------------------------------------------------------------------------------- /tests/tagging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/tagging/__init__.py -------------------------------------------------------------------------------- /tests/tagging/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/tagging/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/tagging/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/tagging/models/__init__.py -------------------------------------------------------------------------------- /tests/version_test.py: -------------------------------------------------------------------------------- 1 | from allennlp_models import version 2 | 3 | 4 | class TestVersion: 5 | def test_version_exists(self): 6 | assert version.VERSION.startswith(version.VERSION_SHORT) 7 | -------------------------------------------------------------------------------- /tests/vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/vision/__init__.py -------------------------------------------------------------------------------- /tests/vision/dataset_readers/vision_reader_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.lazy import Lazy 2 | from allennlp.common.testing import AllenNlpTestCase 3 | from allennlp.data.image_loader import TorchImageLoader 4 | from allennlp.modules.vision.grid_embedder import NullGridEmbedder 5 | from allennlp.modules.vision.region_detector import RandomRegionDetector 6 | from allennlp_models.vision.dataset_readers.vision_reader import VisionReader 7 | from tests import FIXTURES_ROOT 8 | 9 | 10 | class TestVisionReader(AllenNlpTestCase): 11 | def test_load_images(self): 12 | reader = VisionReader( 13 | image_dir=FIXTURES_ROOT / "vision" / "images" / "vision_reader", 14 | image_loader=TorchImageLoader(), 15 | image_featurizer=Lazy(NullGridEmbedder), 16 | region_detector=Lazy(RandomRegionDetector), 17 | ) 18 | assert len(reader.images) == 3 19 | assert set(reader.images.keys()) == { 20 | "png_example.png", 21 | "jpg_example.jpg", 22 | "jpeg_example.jpeg", 23 | } 24 | -------------------------------------------------------------------------------- /tests/vision/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/allennlp-models/b1f372248c17ad12684d344955fbcd98e957e77e/tests/vision/models/__init__.py -------------------------------------------------------------------------------- /tests/vision/models/vilbert_multitask_test.py: -------------------------------------------------------------------------------- 1 | from allennlp.common.testing import ModelTestCase 2 | 3 | from tests import FIXTURES_ROOT 4 | 5 | 6 | class TestVilbertMultitask(ModelTestCase): 7 | def test_predict(self): 8 | from allennlp.models import load_archive 9 | from allennlp.predictors import Predictor 10 | import allennlp_models.vision 11 | 12 | archive = load_archive(FIXTURES_ROOT / "vision" / "vilbert_multitask" / "model.tar.gz") 13 | predictor = Predictor.from_archive(archive) 14 | 15 | with open( 16 | FIXTURES_ROOT / "vision" / "vilbert_multitask" / "dataset.json", "r" 17 | ) as file_input: 18 | json_input = [predictor.load_line(line) for line in file_input if not line.isspace()] 19 | predictions = predictor.predict_batch_json(json_input) 20 | assert all( 21 | "gqa_best_answer" in p or "vqa_best_answer" in p or "ve_entailment_answer" in p 22 | for p in predictions 23 | ) 24 | 25 | def test_model_can_train_save_and_load_small_model(self): 26 | param_file = FIXTURES_ROOT / "vision" / "vilbert_multitask" / "experiment.jsonnet" 27 | 28 | # The VQA weights are going to be zero because the last batch is Visual Entailment only, 29 | # and so the gradients for VQA don't get set. 30 | self.ensure_model_can_train_save_and_load( 31 | param_file, 32 | gradients_to_ignore={"_heads.vqa.classifier.bias", "_heads.vqa.classifier.weight"}, 33 | ) 34 | -------------------------------------------------------------------------------- /training_config/mc/piqa.jsonnet: -------------------------------------------------------------------------------- 1 | local transformer_model = "roberta-large"; 2 | 3 | local epochs = 20; 4 | local batch_size = 64; 5 | 6 | local gpu_batch_size = 4; 7 | local gradient_accumulation_steps = batch_size / gpu_batch_size; 8 | 9 | { 10 | "dataset_reader": { 11 | "type": "piqa", 12 | "transformer_model_name": transformer_model, 13 | //"max_instances": 2000 // debug setting 14 | }, 15 | "train_data_path": "https://yonatanbisk.com/piqa/data/train.jsonl", 16 | "validation_data_path": "https://yonatanbisk.com/piqa/data/valid.jsonl", 17 | "model": { 18 | "type": "transformer_mc", 19 | "transformer_model": transformer_model 20 | }, 21 | "data_loader": { 22 | "shuffle": true, 23 | "batch_size": gpu_batch_size 24 | }, 25 | "trainer": { 26 | "optimizer": { 27 | "type": "huggingface_adamw", 28 | "weight_decay": 0.01, 29 | "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], 30 | "lr": 1e-5, 31 | "eps": 1e-8, 32 | "correct_bias": true 33 | }, 34 | "learning_rate_scheduler": { 35 | "type": "linear_with_warmup", 36 | "warmup_steps": 100 37 | }, 38 | // "grad_norm": 1.0, 39 | "num_epochs": epochs, 40 | "num_gradient_accumulation_steps": gradient_accumulation_steps, 41 | "patience": 3, 42 | "validation_metric": "+acc", 43 | }, 44 | "random_seed": 2, 45 | "numpy_seed": 2, 46 | "pytorch_seed": 2, 47 | } 48 | -------------------------------------------------------------------------------- /training_config/mc/piqa_tt.jsonnet: -------------------------------------------------------------------------------- 1 | local transformer_model = "roberta-large"; 2 | 3 | local epochs = 20; 4 | local batch_size = 64; 5 | 6 | local gpu_batch_size = 4; 7 | local gradient_accumulation_steps = batch_size / gpu_batch_size; 8 | 9 | { 10 | "dataset_reader": { 11 | "type": "piqa_tt", 12 | "transformer_model_name": transformer_model, 13 | //"max_instances": 2000 // debug setting 14 | }, 15 | "train_data_path": "https://yonatanbisk.com/piqa/data/train.jsonl", 16 | "validation_data_path": "https://yonatanbisk.com/piqa/data/valid.jsonl", 17 | "model": { 18 | "type": "transformer_mc_tt", 19 | "transformer_model": transformer_model 20 | }, 21 | "data_loader": { 22 | "shuffle": true, 23 | "batch_size": gpu_batch_size 24 | }, 25 | "trainer": { 26 | "optimizer": { 27 | "type": "huggingface_adamw", 28 | "weight_decay": 0.01, 29 | "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], 30 | "lr": 1e-5, 31 | "eps": 1e-8, 32 | "correct_bias": true 33 | }, 34 | "learning_rate_scheduler": { 35 | "type": "linear_with_warmup", 36 | "warmup_steps": 100 37 | }, 38 | // "grad_norm": 1.0, 39 | "num_epochs": epochs, 40 | "num_gradient_accumulation_steps": gradient_accumulation_steps, 41 | "patience": 3, 42 | "validation_metric": "+acc", 43 | }, 44 | "random_seed": 2, 45 | "numpy_seed": 2, 46 | "pytorch_seed": 2, 47 | } 48 | -------------------------------------------------------------------------------- /training_config/mc/swag.jsonnet: -------------------------------------------------------------------------------- 1 | local transformer_model = "roberta-large"; 2 | 3 | local epochs = 20; 4 | local batch_size = 64; 5 | 6 | local gpu_batch_size = 4; 7 | local gradient_accumulation_steps = batch_size / gpu_batch_size; 8 | 9 | { 10 | "dataset_reader": { 11 | "type": "swag", 12 | "transformer_model_name": transformer_model, 13 | //"max_instances": 200 // debug setting 14 | }, 15 | "train_data_path": "https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv", 16 | "validation_data_path": "https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv", 17 | // "test_data_path": "https://raw.githubusercontent.com/rowanz/swagaf/master/data/test.csv", 18 | "model": { 19 | "type": "transformer_mc", 20 | "transformer_model": transformer_model, 21 | }, 22 | "data_loader": { 23 | "shuffle": true, 24 | "batch_size": gpu_batch_size 25 | }, 26 | "trainer": { 27 | "optimizer": { 28 | "type": "huggingface_adamw", 29 | "weight_decay": 0.01, 30 | "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], 31 | "lr": 1e-5, 32 | "eps": 1e-8, 33 | "correct_bias": true 34 | }, 35 | "learning_rate_scheduler": { 36 | "type": "linear_with_warmup", 37 | "warmup_steps": 100 38 | }, 39 | // "grad_norm": 1.0, 40 | "num_epochs": epochs, 41 | "num_gradient_accumulation_steps": gradient_accumulation_steps, 42 | "patience": 3, 43 | "validation_metric": "+acc", 44 | }, 45 | "random_seed": 42, 46 | "numpy_seed": 42, 47 | "pytorch_seed": 42, 48 | } 49 | -------------------------------------------------------------------------------- /training_config/structured_prediction/bert_base_srl.jsonnet: -------------------------------------------------------------------------------- 1 | local bert_model = "bert-base-uncased"; 2 | 3 | { 4 | "dataset_reader": { 5 | "type": "srl", 6 | "bert_model_name": bert_model, 7 | }, 8 | 9 | "data_loader": { 10 | "batch_sampler": { 11 | "type": "bucket", 12 | "batch_size" : 32 13 | } 14 | }, 15 | 16 | // "train_data_path": "/net/nfs.corp/allennlp/data/ontonotes/conll-formatted-ontonotes-5.0/data/train", 17 | // "validation_data_path": "/net/nfs.corp/allennlp/data/ontonotes/conll-formatted-ontonotes-5.0/data/development", 18 | "train_data_path": std.extVar("SRL_TRAIN_DATA_PATH"), 19 | "validation_data_path": std.extVar("SRL_VALIDATION_DATA_PATH"), 20 | 21 | "model": { 22 | "type": "srl_bert", 23 | "embedding_dropout": 0.1, 24 | "bert_model": bert_model, 25 | }, 26 | 27 | "trainer": { 28 | "optimizer": { 29 | "type": "huggingface_adamw", 30 | "lr": 5e-5, 31 | "correct_bias": false, 32 | "weight_decay": 0.01, 33 | "parameter_groups": [ 34 | [["bias", "LayerNorm.bias", "LayerNorm.weight", "layer_norm.weight"], {"weight_decay": 0.0}], 35 | ], 36 | }, 37 | 38 | "learning_rate_scheduler": { 39 | "type": "slanted_triangular", 40 | }, 41 | "checkpointer": { 42 | "keep_most_recent_by_count": 2, 43 | }, 44 | "grad_norm": 1.0, 45 | "num_epochs": 15, 46 | "validation_metric": "+f1-measure-overall", 47 | }, 48 | } 49 | -------------------------------------------------------------------------------- /training_config/structured_prediction/srl.jsonnet: -------------------------------------------------------------------------------- 1 | // Configuration for a semantic role labeler model based on: 2 | // He, Luheng et al. “Deep Semantic Role Labeling: What Works and What's Next.” ACL (2017). 3 | { 4 | "dataset_reader":{"type":"srl"}, 5 | "train_data_path": std.extVar("SRL_TRAIN_DATA_PATH"), 6 | "validation_data_path": std.extVar("SRL_VALIDATION_DATA_PATH"), 7 | "model": { 8 | "type": "srl", 9 | "text_field_embedder": { 10 | "token_embedders": { 11 | "tokens": { 12 | "type": "embedding", 13 | "embedding_dim": 100, 14 | "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz", 15 | "trainable": true 16 | } 17 | } 18 | }, 19 | "initializer": { 20 | "regexes": [ 21 | ["tag_projection_layer.*weight", { "type": "orthogonal" }] 22 | ] 23 | }, 24 | "encoder": { 25 | "type": "alternating_lstm", 26 | "input_size": 200, 27 | "hidden_size": 300, 28 | "num_layers": 8, 29 | "recurrent_dropout_probability": 0.1, 30 | "use_highway": true 31 | }, 32 | "binary_feature_dim": 100 33 | }, 34 | "data_loader": { 35 | "batch_sampler": { 36 | "type": "bucket", 37 | "batch_size": 80 38 | } 39 | }, 40 | 41 | "trainer": { 42 | "num_epochs": 500, 43 | "grad_clipping": 1.0, 44 | "patience": 20, 45 | "validation_metric": "+f1-measure-overall", 46 | "optimizer": { 47 | "type": "adadelta", 48 | "rho": 0.95 49 | } 50 | } 51 | } 52 | --------------------------------------------------------------------------------