├── .coveragerc
├── .dockerignore
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── question.md
    ├── pull_request_template.md
    └── workflows
    │   ├── cffconvert.yml
    │   ├── ci.yml
    │   └── issues.yml
├── .gitignore
├── CHANGELOG.md
├── CITATION.cff
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile.test
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── RELEASE_PROCESS.md
├── STYLE.md
├── allennlp
    ├── __init__.py
    ├── __main__.py
    ├── commands
    │   ├── __init__.py
    │   ├── _checklist_internal.py
    │   ├── build_vocab.py
    │   ├── cached_path.py
    │   ├── checklist.py
    │   ├── count_instances.py
    │   ├── diff.py
    │   ├── evaluate.py
    │   ├── find_learning_rate.py
    │   ├── predict.py
    │   ├── print_results.py
    │   ├── push_to_hf.py
    │   ├── subcommand.py
    │   ├── test_install.py
    │   └── train.py
    ├── common
    │   ├── __init__.py
    │   ├── cached_transformers.py
    │   ├── checks.py
    │   ├── file_utils.py
    │   ├── from_params.py
    │   ├── lazy.py
    │   ├── logging.py
    │   ├── meta.py
    │   ├── model_card.py
    │   ├── params.py
    │   ├── plugins.py
    │   ├── push_to_hf.py
    │   ├── registrable.py
    │   ├── sequences.py
    │   ├── task_card.py
    │   ├── testing
    │   │   ├── __init__.py
    │   │   ├── checklist_test.py
    │   │   ├── confidence_check_test.py
    │   │   ├── distributed_test.py
    │   │   ├── interpret_test.py
    │   │   ├── model_test_case.py
    │   │   └── test_case.py
    │   ├── tqdm.py
    │   └── util.py
    ├── confidence_checks
    │   ├── __init__.py
    │   ├── normalization_bias_verification.py
    │   ├── task_checklists
    │   │   ├── __init__.py
    │   │   ├── question_answering_suite.py
    │   │   ├── sentiment_analysis_suite.py
    │   │   ├── task_suite.py
    │   │   ├── textual_entailment_suite.py
    │   │   └── utils.py
    │   └── verification_base.py
    ├── data
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── data_loaders
    │   │   ├── __init__.py
    │   │   ├── data_collator.py
    │   │   ├── data_loader.py
    │   │   ├── multiprocess_data_loader.py
    │   │   ├── multitask_data_loader.py
    │   │   ├── multitask_epoch_sampler.py
    │   │   ├── multitask_scheduler.py
    │   │   └── simple_data_loader.py
    │   ├── dataset_readers
    │   │   ├── __init__.py
    │   │   ├── babi.py
    │   │   ├── conll2003.py
    │   │   ├── dataset_reader.py
    │   │   ├── dataset_utils
    │   │   │   ├── __init__.py
    │   │   │   └── span_utils.py
    │   │   ├── interleaving_dataset_reader.py
    │   │   ├── multitask.py
    │   │   ├── sequence_tagging.py
    │   │   ├── sharded_dataset_reader.py
    │   │   └── text_classification_json.py
    │   ├── fields
    │   │   ├── __init__.py
    │   │   ├── adjacency_field.py
    │   │   ├── array_field.py
    │   │   ├── field.py
    │   │   ├── flag_field.py
    │   │   ├── index_field.py
    │   │   ├── label_field.py
    │   │   ├── list_field.py
    │   │   ├── metadata_field.py
    │   │   ├── multilabel_field.py
    │   │   ├── namespace_swapping_field.py
    │   │   ├── sequence_field.py
    │   │   ├── sequence_label_field.py
    │   │   ├── span_field.py
    │   │   ├── tensor_field.py
    │   │   ├── text_field.py
    │   │   └── transformer_text_field.py
    │   ├── image_loader.py
    │   ├── instance.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── batch_sampler.py
    │   │   ├── bucket_batch_sampler.py
    │   │   └── max_tokens_batch_sampler.py
    │   ├── token_indexers
    │   │   ├── __init__.py
    │   │   ├── elmo_indexer.py
    │   │   ├── pretrained_transformer_indexer.py
    │   │   ├── pretrained_transformer_mismatched_indexer.py
    │   │   ├── single_id_token_indexer.py
    │   │   ├── spacy_indexer.py
    │   │   ├── token_characters_indexer.py
    │   │   └── token_indexer.py
    │   ├── tokenizers
    │   │   ├── __init__.py
    │   │   ├── character_tokenizer.py
    │   │   ├── letters_digits_tokenizer.py
    │   │   ├── pretrained_transformer_tokenizer.py
    │   │   ├── sentence_splitter.py
    │   │   ├── spacy_tokenizer.py
    │   │   ├── token_class.py
    │   │   ├── tokenizer.py
    │   │   └── whitespace_tokenizer.py
    │   └── vocabulary.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── evaluator.py
    │   ├── postprocessors
    │   │   └── __init__.py
    │   └── serializers
    │   │   ├── __init__.py
    │   │   └── serializers.py
    ├── fairness
    │   ├── __init__.py
    │   ├── adversarial_bias_mitigator.py
    │   ├── bias_direction.py
    │   ├── bias_direction_wrappers.py
    │   ├── bias_metrics.py
    │   ├── bias_mitigator_applicator.py
    │   ├── bias_mitigator_wrappers.py
    │   ├── bias_mitigators.py
    │   ├── bias_utils.py
    │   └── fairness_metrics.py
    ├── interpret
    │   ├── __init__.py
    │   ├── attackers
    │   │   ├── __init__.py
    │   │   ├── attacker.py
    │   │   ├── hotflip.py
    │   │   ├── input_reduction.py
    │   │   └── utils.py
    │   ├── influence_interpreters
    │   │   ├── __init__.py
    │   │   ├── influence_interpreter.py
    │   │   └── simple_influence.py
    │   └── saliency_interpreters
    │   │   ├── __init__.py
    │   │   ├── integrated_gradient.py
    │   │   ├── saliency_interpreter.py
    │   │   ├── simple_gradient.py
    │   │   └── smooth_gradient.py
    ├── models
    │   ├── __init__.py
    │   ├── archival.py
    │   ├── basic_classifier.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── classifier_head.py
    │   │   └── head.py
    │   ├── model.py
    │   ├── multitask.py
    │   └── simple_tagger.py
    ├── modules
    │   ├── __init__.py
    │   ├── attention
    │   │   ├── __init__.py
    │   │   ├── additive_attention.py
    │   │   ├── attention.py
    │   │   ├── bilinear_attention.py
    │   │   ├── cosine_attention.py
    │   │   ├── dot_product_attention.py
    │   │   ├── linear_attention.py
    │   │   └── scaled_dot_product_attention.py
    │   ├── augmented_lstm.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── pretrained_transformer_backbone.py
    │   │   └── vilbert_backbone.py
    │   ├── bimpm_matching.py
    │   ├── conditional_random_field
    │   │   ├── __init__.py
    │   │   ├── conditional_random_field.py
    │   │   ├── conditional_random_field_wemission.py
    │   │   ├── conditional_random_field_wlannoy.py
    │   │   └── conditional_random_field_wtrans.py
    │   ├── elmo.py
    │   ├── elmo_lstm.py
    │   ├── encoder_base.py
    │   ├── feedforward.py
    │   ├── gated_sum.py
    │   ├── highway.py
    │   ├── input_variational_dropout.py
    │   ├── layer_norm.py
    │   ├── lstm_cell_with_projection.py
    │   ├── masked_layer_norm.py
    │   ├── matrix_attention
    │   │   ├── __init__.py
    │   │   ├── bilinear_matrix_attention.py
    │   │   ├── cosine_matrix_attention.py
    │   │   ├── dot_product_matrix_attention.py
    │   │   ├── linear_matrix_attention.py
    │   │   ├── matrix_attention.py
    │   │   └── scaled_dot_product_matrix_attention.py
    │   ├── maxout.py
    │   ├── residual_with_layer_dropout.py
    │   ├── sampled_softmax_loss.py
    │   ├── scalar_mix.py
    │   ├── seq2seq_encoders
    │   │   ├── __init__.py
    │   │   ├── compose_encoder.py
    │   │   ├── feedforward_encoder.py
    │   │   ├── gated_cnn_encoder.py
    │   │   ├── pass_through_encoder.py
    │   │   ├── pytorch_seq2seq_wrapper.py
    │   │   ├── pytorch_transformer_wrapper.py
    │   │   └── seq2seq_encoder.py
    │   ├── seq2vec_encoders
    │   │   ├── __init__.py
    │   │   ├── bert_pooler.py
    │   │   ├── boe_encoder.py
    │   │   ├── cls_pooler.py
    │   │   ├── cnn_encoder.py
    │   │   ├── cnn_highway_encoder.py
    │   │   ├── pytorch_seq2vec_wrapper.py
    │   │   └── seq2vec_encoder.py
    │   ├── softmax_loss.py
    │   ├── span_extractors
    │   │   ├── __init__.py
    │   │   ├── bidirectional_endpoint_span_extractor.py
    │   │   ├── endpoint_span_extractor.py
    │   │   ├── max_pooling_span_extractor.py
    │   │   ├── self_attentive_span_extractor.py
    │   │   ├── span_extractor.py
    │   │   └── span_extractor_with_span_width_embedding.py
    │   ├── stacked_alternating_lstm.py
    │   ├── stacked_bidirectional_lstm.py
    │   ├── text_field_embedders
    │   │   ├── __init__.py
    │   │   ├── basic_text_field_embedder.py
    │   │   └── text_field_embedder.py
    │   ├── time_distributed.py
    │   ├── token_embedders
    │   │   ├── __init__.py
    │   │   ├── bag_of_word_counts_token_embedder.py
    │   │   ├── elmo_token_embedder.py
    │   │   ├── embedding.py
    │   │   ├── empty_embedder.py
    │   │   ├── pass_through_token_embedder.py
    │   │   ├── pretrained_transformer_embedder.py
    │   │   ├── pretrained_transformer_mismatched_embedder.py
    │   │   ├── token_characters_encoder.py
    │   │   └── token_embedder.py
    │   ├── transformer
    │   │   ├── __init__.py
    │   │   ├── activation_layer.py
    │   │   ├── attention_module.py
    │   │   ├── bimodal_attention.py
    │   │   ├── bimodal_connection_layer.py
    │   │   ├── bimodal_encoder.py
    │   │   ├── layer_norm.py
    │   │   ├── output_layer.py
    │   │   ├── positional_encoding.py
    │   │   ├── t5.py
    │   │   ├── transformer_embeddings.py
    │   │   ├── transformer_layer.py
    │   │   ├── transformer_module.py
    │   │   ├── transformer_pooler.py
    │   │   ├── transformer_stack.py
    │   │   └── util.py
    │   ├── util.py
    │   └── vision
    │   │   ├── __init__.py
    │   │   ├── grid_embedder.py
    │   │   ├── image2image.py
    │   │   └── region_detector.py
    ├── nn
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── beam_search.py
    │   ├── checkpoint
    │   │   ├── __init__.py
    │   │   ├── checkpoint_wrapper.py
    │   │   └── fairscale_checkpoint_wrapper.py
    │   ├── chu_liu_edmonds.py
    │   ├── initializers.py
    │   ├── module.py
    │   ├── parallel
    │   │   ├── __init__.py
    │   │   ├── ddp_accelerator.py
    │   │   ├── fairscale_fsdp_accelerator.py
    │   │   └── sharded_module_mixin.py
    │   ├── regularizers
    │   │   ├── __init__.py
    │   │   ├── regularizer.py
    │   │   ├── regularizer_applicator.py
    │   │   └── regularizers.py
    │   └── util.py
    ├── predictors
    │   ├── __init__.py
    │   ├── multitask.py
    │   ├── predictor.py
    │   ├── sentence_tagger.py
    │   └── text_classifier.py
    ├── py.typed
    ├── sanity_checks
    │   ├── __init__.py
    │   └── task_checklists
    │   │   └── __init__.py
    ├── tools
    │   ├── EVALB
    │   │   ├── .gitignore
    │   │   ├── COLLINS.prm
    │   │   ├── LICENSE
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── bug
    │   │   │   ├── bug.gld
    │   │   │   ├── bug.rsl-new
    │   │   │   ├── bug.rsl-old
    │   │   │   └── bug.tst
    │   │   ├── evalb.c
    │   │   ├── new.prm
    │   │   ├── sample
    │   │   │   ├── sample.gld
    │   │   │   ├── sample.prm
    │   │   │   ├── sample.rsl
    │   │   │   └── sample.tst
    │   │   └── tgrep_proc.prl
    │   ├── __init__.py
    │   ├── archive_surgery.py
    │   ├── create_elmo_embeddings_from_vocab.py
    │   └── inspect_cache.py
    ├── training
    │   ├── __init__.py
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   ├── backward.py
    │   │   ├── callback.py
    │   │   ├── confidence_checks.py
    │   │   ├── console_logger.py
    │   │   ├── log_writer.py
    │   │   ├── should_validate.py
    │   │   ├── tensorboard.py
    │   │   ├── track_epoch.py
    │   │   └── wandb.py
    │   ├── checkpointer.py
    │   ├── gradient_descent_trainer.py
    │   ├── learning_rate_schedulers
    │   │   ├── __init__.py
    │   │   ├── combined.py
    │   │   ├── cosine.py
    │   │   ├── learning_rate_scheduler.py
    │   │   ├── linear_with_warmup.py
    │   │   ├── noam.py
    │   │   ├── polynomial_decay.py
    │   │   ├── pytorch_lr_schedulers.py
    │   │   └── slanted_triangular.py
    │   ├── metric_tracker.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── attachment_scores.py
    │   │   ├── auc.py
    │   │   ├── average.py
    │   │   ├── bleu.py
    │   │   ├── boolean_accuracy.py
    │   │   ├── categorical_accuracy.py
    │   │   ├── covariance.py
    │   │   ├── entropy.py
    │   │   ├── evalb_bracketing_scorer.py
    │   │   ├── f1_measure.py
    │   │   ├── fbeta_measure.py
    │   │   ├── fbeta_multi_label_measure.py
    │   │   ├── fbeta_verbose_measure.py
    │   │   ├── mean_absolute_error.py
    │   │   ├── metric.py
    │   │   ├── pearson_correlation.py
    │   │   ├── perplexity.py
    │   │   ├── rouge.py
    │   │   ├── sequence_accuracy.py
    │   │   ├── span_based_f1_measure.py
    │   │   ├── spearman_correlation.py
    │   │   └── unigram_recall.py
    │   ├── momentum_schedulers
    │   │   ├── __init__.py
    │   │   ├── inverted_triangular.py
    │   │   └── momentum_scheduler.py
    │   ├── moving_average.py
    │   ├── no_op_trainer.py
    │   ├── optimizers.py
    │   ├── scheduler.py
    │   ├── trainer.py
    │   └── util.py
    └── version.py
├── benchmarks
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   └── tokenizers
    │   │   ├── __init__.py
    │   │   └── character_tokenizer_bench.py
    ├── nn
    │   └── util_bench.py
    └── pytest.ini
├── codecov.yml
├── constraints.txt
├── dev-requirements.txt
├── docs
    ├── css
    │   └── extra.css
    └── img
    │   ├── allennlp-logo-dark.png
    │   └── favicon.ico
├── mkdocs-skeleton.yml
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── requirements.in
├── requirements.txt
├── scripts
    ├── 24hr_diff.sh
    ├── ai2_internal
    │   ├── resumable_train.sh
    │   ├── resume_daemon.py
    │   └── run_with_beaker.py
    ├── build_docs.sh
    ├── build_docs_config.py
    ├── check_large_files.sh
    ├── check_links.py
    ├── check_torch_version.py
    ├── close_stale_issues.py
    ├── get_version.py
    ├── ping_issue_assignees.py
    ├── py2md.py
    ├── release_notes.py
    ├── tests
    │   ├── ai2_internal
    │   │   └── resume_daemon_test.py
    │   └── py2md
    │   │   ├── basic_example.py
    │   │   ├── basic_example_expected_output.md
    │   │   └── py2md_test.py
    └── train_fixtures.py
├── setup.py
├── test_fixtures
    ├── __init__.py
    ├── basic_classifier
    │   ├── common.jsonnet
    │   ├── embedding_with_trainable_is_false
    │   │   └── model.tar.gz
    │   ├── experiment_from_archive.jsonnet
    │   ├── experiment_seq2seq.jsonnet
    │   ├── experiment_seq2vec.jsonnet
    │   ├── from_archive_serialization
    │   │   └── model.tar.gz
    │   ├── parameters_inspection.json
    │   └── serialization
    │   │   ├── best.th
    │   │   ├── model.tar.gz
    │   │   └── vocabulary
    │   │       ├── labels.txt
    │   │       ├── non_padded_namespaces.txt
    │   │       └── tokens.txt
    ├── common
    │   ├── .gitignore
    │   ├── external_symlink.tar.gz
    │   └── quote.tar.gz
    ├── data
    │   ├── babi.txt
    │   ├── brown_corpus.txt
    │   ├── conll2003.txt
    │   ├── images
    │   │   └── COCO_train2014_000000458752.jpg
    │   ├── sequence_tagging.tsv
    │   ├── shards
    │   │   ├── sequence_tagging_00.tsv
    │   │   ├── sequence_tagging_01.tsv
    │   │   └── sequence_tagging_02.tsv
    │   ├── text_classification_json
    │   │   ├── ag_news_corpus.jsonl
    │   │   ├── ag_news_corpus_fake_sentiment_labels.jsonl
    │   │   ├── imdb_corpus.jsonl
    │   │   ├── imdb_corpus2.jsonl
    │   │   └── integer_labels.jsonl
    │   ├── vocab.tar.gz
    │   └── vocab.zip
    ├── elmo
    │   ├── config
    │   │   └── characters_token_embedder.json
    │   ├── elmo_token_embeddings.hdf5
    │   ├── lm_embeddings_0.hdf5
    │   ├── lm_embeddings_1.hdf5
    │   ├── lm_embeddings_2.hdf5
    │   ├── lm_weights.hdf5
    │   ├── options.json
    │   ├── sentences.json
    │   └── vocab_test.txt
    ├── embeddings
    │   ├── fake_embeddings.5d.txt
    │   ├── fake_embeddings.5d.txt.bz2
    │   ├── fake_embeddings.5d.txt.gz
    │   ├── fake_embeddings.5d.txt.tar.gz
    │   ├── fake_embeddings.5d.txt.xz
    │   ├── fake_embeddings.5d.txt.zip
    │   ├── glove.6B.100d.sample.txt.gz
    │   ├── glove.6B.300d.sample.txt.gz
    │   ├── multi-file-archive.tar.gz
    │   └── multi-file-archive.zip
    ├── fairness
    │   ├── bias_embeddings.json
    │   ├── definitional_pairs.json
    │   ├── equalize_pairs.json
    │   └── gender_specific_full.json
    ├── plugins
    │   ├── .allennlp_plugins
    │   └── d
    │   │   ├── __init__.py
    │   │   └── d.py
    ├── simple_tagger
    │   ├── experiment.json
    │   ├── experiment_with_regularization.json
    │   ├── model_test_case.jsonnet
    │   ├── serialization
    │   │   ├── best.th
    │   │   ├── model.tar.gz
    │   │   └── vocabulary
    │   │   │   ├── labels.txt
    │   │   │   ├── non_padded_namespaces.txt
    │   │   │   └── tokens.txt
    │   └── serialization_full
    │   │   ├── best.th
    │   │   ├── config.json
    │   │   ├── meta.json
    │   │   ├── metrics.json
    │   │   ├── metrics_epoch_0.json
    │   │   ├── model.tar.gz
    │   │   ├── model_state_e1_b0.th
    │   │   ├── out.log
    │   │   ├── training_state_e1_b0.th
    │   │   └── vocabulary
    │   │       ├── .lock
    │   │       ├── labels.txt
    │   │       ├── non_padded_namespaces.txt
    │   │       └── tokens.txt
    ├── simple_tagger_with_elmo
    │   ├── experiment.json
    │   └── serialization
    │   │   ├── best.th
    │   │   ├── model.tar.gz
    │   │   └── vocabulary
    │   │       ├── labels.txt
    │   │       └── non_padded_namespaces.txt
    ├── simple_tagger_with_span_f1
    │   ├── experiment.json
    │   └── serialization
    │   │   ├── best.th
    │   │   ├── model.tar.gz
    │   │   └── vocabulary
    │   │       ├── labels.txt
    │   │       ├── non_padded_namespaces.txt
    │   │       ├── test_tokens.txt
    │   │       └── tokens.txt
    ├── task_suites
    │   └── fake_suite.tar.gz
    └── utf-8_sample
    │   ├── archives
    │       ├── utf-8.tar.bz2
    │       ├── utf-8.tar.gz
    │       ├── utf-8.tar.xz
    │       └── utf-8.zip
    │   ├── utf-8_sample.txt
    │   ├── utf-8_sample.txt.gz
    │   └── utf-8_sample.txt.zip
└── tests
    ├── __init__.py
    ├── commands
        ├── __init__.py
        ├── build_vocab_test.py
        ├── cached_path_test.py
        ├── checklist_test.py
        ├── diff_test.py
        ├── evaluate_test.py
        ├── find_learning_rate_test.py
        ├── main_test.py
        ├── no_op_train_test.py
        ├── predict_test.py
        ├── print_results_test.py
        ├── test_install_test.py
        └── train_test.py
    ├── common
        ├── __init__.py
        ├── cached_transformers_test.py
        ├── file_utils_test.py
        ├── from_params_test.py
        ├── logging_test.py
        ├── model_card_test.py
        ├── params_test.py
        ├── plugins_test.py
        ├── push_to_hub_test.py
        ├── registrable_test.py
        ├── sequences_test.py
        ├── task_card_test.py
        ├── testing.py
        └── util_test.py
    ├── confidence_checks
        ├── normalization_bias_verification_test.py
        └── task_checklists
        │   ├── __init__.py
        │   ├── sentiment_analysis_suite_test.py
        │   ├── task_suite_test.py
        │   └── utils_test.py
    ├── data
        ├── __init__.py
        ├── data_loaders
        │   ├── __init__.py
        │   ├── multiprocess_data_loader_test.py
        │   ├── multitask_data_loader_test.py
        │   └── multitask_scheduler_test.py
        ├── dataset_readers
        │   ├── __init__.py
        │   ├── babi_reader_test.py
        │   ├── conll2003_test.py
        │   ├── dataset_reader_test.py
        │   ├── dataset_utils
        │   │   └── span_utils_test.py
        │   ├── interleaving_dataset_reader_test.py
        │   ├── sequence_tagging_test.py
        │   ├── sharded_dataset_reader_test.py
        │   └── text_classification_json_test.py
        ├── dataset_test.py
        ├── fields
        │   ├── __init__.py
        │   ├── adjacency_field_test.py
        │   ├── field_test.py
        │   ├── flag_field_test.py
        │   ├── index_field_test.py
        │   ├── label_field_test.py
        │   ├── list_field_test.py
        │   ├── metadata_field_test.py
        │   ├── multilabel_field_test.py
        │   ├── sequence_label_field_test.py
        │   ├── span_field_test.py
        │   ├── tensor_field_test.py
        │   ├── text_field_test.py
        │   └── transformer_text_field_test.py
        ├── image_loader_test.py
        ├── instance_test.py
        ├── samplers
        │   ├── __init__.py
        │   ├── bucket_batch_sampler_test.py
        │   ├── max_tokens_batch_sampler_test.py
        │   └── sampler_test.py
        ├── token_indexers
        │   ├── __init__.py
        │   ├── character_token_indexer_test.py
        │   ├── elmo_indexer_test.py
        │   ├── pretrained_transformer_indexer_test.py
        │   ├── pretrained_transformer_mismatched_indexer_test.py
        │   ├── single_id_token_indexer_test.py
        │   └── spacy_indexer_test.py
        ├── tokenizers
        │   ├── __init__.py
        │   ├── character_tokenizer_test.py
        │   ├── letters_digits_tokenizer_test.py
        │   ├── pretrained_transformer_tokenizer_test.py
        │   ├── sentence_splitter_test.py
        │   └── spacy_tokenizer_test.py
        └── vocabulary_test.py
    ├── evaluation
        ├── __init__.py
        ├── evaluator_tests.py
        └── serializers
        │   ├── __init__.py
        │   └── serializer_test.py
    ├── fairness
        ├── __init__.py
        ├── bias_direction_test.py
        ├── bias_metrics_test.py
        ├── bias_mitigators_test.py
        ├── bias_utils_test.py
        └── fairness_metrics_test.py
    ├── interpret
        ├── __init__.py
        ├── hotflip_test.py
        ├── input_reduction_test.py
        ├── integrated_gradient_test.py
        ├── simple_gradient_test.py
        ├── simple_influence_test.py
        └── smooth_gradient_test.py
    ├── models
        ├── __init__.py
        ├── archival_test.py
        ├── basic_classifier_test.py
        ├── model_test.py
        ├── multitask_test.py
        ├── simple_tagger_test.py
        └── test_model_test_case.py
    ├── modules
        ├── attention
        │   ├── __init__.py
        │   ├── additive_attention_test.py
        │   ├── attention_test.py
        │   ├── bilinear_attention_test.py
        │   ├── cosine_attention_test.py
        │   ├── dot_product_attention_test.py
        │   ├── linear_attention_test.py
        │   └── scaled_dot_product_attention_test.py
        ├── augmented_lstm_test.py
        ├── bimpm_matching_test.py
        ├── conditional_random_field_test.py
        ├── elmo_test.py
        ├── encoder_base_test.py
        ├── feedforward_test.py
        ├── gated_sum_test.py
        ├── highway_test.py
        ├── lstm_cell_with_projection_test.py
        ├── masked_layer_norm_test.py
        ├── matrix_attention
        │   ├── __init__.py
        │   ├── bilinear_matrix_attention_test.py
        │   ├── cosine_matrix_attention_test.py
        │   ├── dot_product_matrix_attention_test.py
        │   ├── linear_matrix_attention_test.py
        │   ├── matrix_attention_test.py
        │   └── scaled_dot_product_matrix_attention_test.py
        ├── maxout_test.py
        ├── residual_with_layer_dropout_test.py
        ├── sampled_softmax_loss_test.py
        ├── scalar_mix_test.py
        ├── seq2seq_encoder_test.py
        ├── seq2seq_encoders
        │   ├── __init__.py
        │   ├── compose_encoder_test.py
        │   ├── feedforward_encoder_test.py
        │   ├── gated_cnn_encoder_test.py
        │   ├── pass_through_encoder_test.py
        │   ├── pytorch_seq2seq_wrapper_test.py
        │   └── pytorch_transformer_wrapper_test.py
        ├── seq2vec_encoder_test.py
        ├── seq2vec_encoders
        │   ├── __init__.py
        │   ├── bert_pooler_test.py
        │   ├── boe_encoder_test.py
        │   ├── cls_pooler_test.py
        │   ├── cnn_encoder_test.py
        │   ├── cnn_highway_encoder_test.py
        │   └── pytorch_seq2vec_wrapper_test.py
        ├── span_extractors
        │   ├── __init__.py
        │   ├── bidirectional_endpoint_span_extractor_test.py
        │   ├── endpoint_span_extractor_test.py
        │   ├── max_pooling_span_extractor_test.py
        │   └── self_attentive_span_extractor_test.py
        ├── stacked_alternating_lstm_test.py
        ├── stacked_bidirectional_lstm_test.py
        ├── stacked_elmo_lstm_test.py
        ├── text_field_embedders
        │   ├── __init__.py
        │   └── basic_text_field_embedder_test.py
        ├── time_distributed_test.py
        ├── token_embedders
        │   ├── __init__.py
        │   ├── bag_of_word_counts_token_embedder_test.py
        │   ├── elmo_token_embedder_test.py
        │   ├── embedding_test.py
        │   ├── pass_through_embedder_test.py
        │   ├── pretrained_transformer_embedder_test.py
        │   ├── pretrained_transformer_mismatched_embedder_test.py
        │   └── token_characters_encoder_test.py
        ├── transformer
        │   ├── activation_layer_test.py
        │   ├── bimodal_attention_test.py
        │   ├── bimodal_encoder_test.py
        │   ├── output_layer_test.py
        │   ├── positional_encoding_test.py
        │   ├── self_attention_test.py
        │   ├── t5_self_attention_test.py
        │   ├── t5_test.py
        │   ├── toolkit_test.py
        │   ├── transformer_embeddings_test.py
        │   ├── transformer_layer_test.py
        │   ├── transformer_module_test.py
        │   ├── transformer_pooler_test.py
        │   └── transformer_stack_test.py
        └── vision
        │   ├── __init__.py
        │   ├── grid_embedder_test.py
        │   └── region_detector_test.py
    ├── nn
        ├── __init__.py
        ├── beam_search_test.py
        ├── checkpoint
        │   ├── __init__.py
        │   ├── checkpoint_wrapper_test.py
        │   └── fairscale_checkpoint_wrapper_test.py
        ├── chu_liu_edmonds_test.py
        ├── initializers_test.py
        ├── parallel
        │   ├── __init__.py
        │   └── fairscale_fsdp_accelerator_test.py
        ├── pretrained_model_initializer_test.py
        ├── regularizers_test.py
        └── util_test.py
    ├── predictors
        ├── __init__.py
        ├── predictor_test.py
        ├── sentence_tagger_test.py
        └── text_classifier_test.py
    ├── training
        ├── __init__.py
        ├── checkpointer_test.py
        ├── learning_rate_schedulers
        │   ├── __init__.py
        │   ├── combined_test.py
        │   ├── cosine_test.py
        │   ├── learning_rate_scheduler_test.py
        │   └── slanted_triangular_test.py
        ├── metrics
        │   ├── __init__.py
        │   ├── attachment_scores_test.py
        │   ├── auc_test.py
        │   ├── average_test.py
        │   ├── bleu_test.py
        │   ├── boolean_accuracy_test.py
        │   ├── categorical_accuracy_test.py
        │   ├── covariance_test.py
        │   ├── entropy_test.py
        │   ├── evalb_bracketing_scorer_test.py
        │   ├── f1_measure_test.py
        │   ├── fbeta_measure_test.py
        │   ├── fbeta_multi_label_measure_test.py
        │   ├── fbeta_verbose_measure_test.py
        │   ├── mean_absolute_error_test.py
        │   ├── pearson_correlation_test.py
        │   ├── rouge_test.py
        │   ├── sequence_accuracy_test.py
        │   ├── span_based_f1_measure_test.py
        │   ├── spearman_correlation_test.py
        │   └── unigram_recall_test.py
        ├── momentum_schedulers
        │   ├── __init__.py
        │   └── inverted_triangular_test.py
        ├── moving_average_test.py
        ├── no_op_trainer_test.py
        ├── optimizer_test.py
        ├── trainer_test.py
        └── util_test.py
    ├── tutorials
        ├── __init__.py
        └── tagger
        │   ├── __init__.py
        │   └── basic_allennlp_test.py
    └── version_test.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = tests/*
3 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .dockerignore
2 | **.pyc
3 | **/__pycache__
4 | .gitignore
5 | .git
6 | .coverage
7 | .benchmarks
8 | .mypy_cache
9 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 115
 3 | 
 4 | ignore =
 5 |     # these rules don't play well with black
 6 |     E203  # whitespace before :
 7 |     W503  # line break before binary operator
 8 | 
 9 | exclude =
10 |     build/**
11 |     doc/**
12 |     tutorials/tagger/**
13 | 
14 | per-file-ignores =
15 |     # __init__.py files are allowed to have unused imports and lines-too-long
16 |     */__init__.py:F401
17 |     */**/**/__init__.py:F401,E501
18 | 
19 |     # tests don't have to respect
20 |     #  E731: do not assign a lambda expression, use a def
21 |     tests/**:E731
22 | 
23 |     # scripts don't have to respect
24 |     #  E402: imports not at top of file (because we mess with sys.path)
25 |     scripts/**:E402
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: 'Feature request'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question
 3 | about: Ask a question
 4 | title: ''
 5 | labels: 'question'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please ask questions on [Stack Overflow](https://stackoverflow.com/questions/tagged/allennlp) rather than on GitHub.  We monitor and triage questions on Stack Overflow with the AllenNLP label and questions there are more easily searchable for others.
11 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!-- To ensure we can review your pull request promptly please complete this template entirely. -->
 2 | 
 3 | <!-- Please reference the issue number here. You can replace "Fixes" with "Closes" if it makes more sense. -->
 4 | Fixes # .
 5 | 
 6 | Changes proposed in this pull request:
 7 | <!-- Please list all changes/additions here. -->
 8 | -
 9 | 
10 | ## Before submitting
11 | 
12 | <!-- Please complete this checklist BEFORE submitting your PR to speed along the review process. -->
13 | - [ ] I've read and followed all steps in the [Making a pull request](https://github.com/allenai/allennlp/blob/main/CONTRIBUTING.md#making-a-pull-request)
14 |     section of the `CONTRIBUTING` docs.
15 | - [ ] I've updated or added any relevant docstrings following the syntax described in the
16 |     [Writing docstrings](https://github.com/allenai/allennlp/blob/main/CONTRIBUTING.md#writing-docstrings) section of the `CONTRIBUTING` docs.
17 | - [ ] If this PR fixes a bug, I've added a test that will fail without my fix.
18 | - [ ] If this PR adds a new feature, I've added tests that sufficiently cover my new functionality.
19 | 
20 | ## After submitting
21 | 
22 | <!-- Please complete this checklist AFTER submitting your PR to speed along the review process. -->
23 | - [ ] All GitHub Actions jobs for my pull request have passed.
24 | - [ ] **`codecov/patch`** reports high test coverage (at least 90%).
25 |     You can find this under the "Actions" tab of the pull request once the other checks have finished.
26 | 


--------------------------------------------------------------------------------
/.github/workflows/cffconvert.yml:
--------------------------------------------------------------------------------
 1 | name: cffconvert
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - CITATION.cff
 7 |   push:
 8 |     paths:
 9 |       - CITATION.cff
10 | 
11 | jobs:
12 |   validate:
13 |     name: "validate"
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out a copy of the repository
17 |         uses: actions/checkout@v3
18 | 
19 |       - name: Check whether the citation metadata from CITATION.cff is valid
20 |         uses: citation-file-format/cffconvert-github-action@2.0.0
21 |         with:
22 |           args: "--validate"
23 | 


--------------------------------------------------------------------------------
/.github/workflows/issues.yml:
--------------------------------------------------------------------------------
 1 | name: Issues Bot
 2 | 
 3 | on:
 4 |   # Uncomment this PR trigger for testing.
 5 |   # pull_request:
 6 |   #   branches:
 7 |   #   - main
 8 |   schedule:
 9 |     - cron: '5 16 * * 1,2,3,4,5'  # runs at 16:05 UTC Monday - Friday
10 | 
11 | jobs:
12 |   close_stale_issues:
13 |     name: Close Stale Issues
14 |     if: github.repository == 'allenai/allennlp'
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |     - uses: actions/checkout@v3
20 | 
21 |     - name: Setup Python
22 |       uses: actions/setup-python@v4
23 |       with:
24 |         python-version: 3.7
25 | 
26 |     - name: Install requirements
27 |       run: |
28 |         pip install PyGithub
29 | 
30 |     - name: Close stale issues
31 |       run: |
32 |         python scripts/close_stale_issues.py
33 | 
34 |   ping_assignees:
35 |     name: Ping Inactive Assignees
36 |     if: github.repository == 'allenai/allennlp'
37 |     runs-on: ubuntu-latest
38 |     env:
39 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40 |     steps:
41 |     - uses: actions/checkout@v3
42 | 
43 |     - name: Setup Python
44 |       uses: actions/setup-python@v4
45 |       with:
46 |         python-version: 3.7
47 | 
48 |     - name: Install requirements
49 |       run: |
50 |         pip install PyGithub
51 | 
52 |     - name: Ping inactive assignees
53 |       run: |
54 |         python scripts/ping_issue_assignees.py
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # build artifacts
 2 | 
 3 | .eggs/
 4 | .mypy_cache
 5 | allennlp.egg-info/
 6 | build/
 7 | dist/
 8 | pip-wheel-metadata/
 9 | context.tar.gz
10 | 
11 | 
12 | # dev tools
13 | 
14 | .envrc
15 | .python-version
16 | .idea
17 | .venv/
18 | .vscode/
19 | /*.iml
20 | 
21 | 
22 | # jupyter notebooks
23 | 
24 | .ipynb_checkpoints
25 | 
26 | 
27 | # miscellaneous
28 | 
29 | .cache/
30 | allennlp/tools/EVALB/evalb.dSYM/
31 | doc/_build/
32 | *.swp
33 | .DS_Store
34 | 
35 | 
36 | # python
37 | 
38 | *.pyc
39 | *.pyo
40 | __pycache__
41 | 
42 | 
43 | # testing and continuous integration
44 | 
45 | .coverage
46 | .pytest_cache/
47 | .benchmarks
48 | 
49 | # documentation build artifacts
50 | 
51 | docs/*.md
52 | docs/api
53 | site/
54 | mkdocs.yml
55 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # YAML 1.2
 2 | ---
 3 | authors: 
 4 |   -
 5 |     affiliation: "Allen Institute for Artificial Intelligence"
 6 |     family-names: Gardner
 7 |     given-names: Matt
 8 |   -
 9 |     affiliation: "Allen Institute for Artificial Intelligence"
10 |     family-names: Grus
11 |     given-names: Joel
12 |   -
13 |     affiliation: "Allen Institute for Artificial Intelligence"
14 |     family-names: Neumann
15 |     given-names: Mark
16 |   -
17 |     affiliation: "Allen Institute for Artificial Intelligence"
18 |     family-names: Tafjord
19 |     given-names: Oyvind
20 |   -
21 |     affiliation: "Allen Institute for Artificial Intelligence"
22 |     family-names: Dasigi
23 |     given-names: Pradeep
24 |   -
25 |     affiliation: "Allen Institute for Artificial Intelligence"
26 |     family-names: Liu
27 |     given-names: Nelson
28 |   -
29 |     affiliation: "Allen Institute for Artificial Intelligence"
30 |     family-names: Peters
31 |     given-names: Matthew
32 |   -
33 |     affiliation: "Allen Institute for Artificial Intelligence"
34 |     family-names: Schmitz
35 |     given-names: Michael
36 |   -
37 |     affiliation: "Allen Institute for Artificial Intelligence"
38 |     family-names: Zettlemoyer
39 |     given-names: Luke
40 | cff-version: "1.2.0"
41 | license: "Apache-2.0"
42 | message: "If you use this software, please cite it using this metadata."
43 | repository-code: "https://github.com/allenai/allennlp"
44 | title: "AllenNLP: A Deep Semantic Natural Language Processing Platform"
45 | version: "2.10.1"
46 | doi: "10.18653/v1/W18-2501"
47 | ...
48 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # This Dockerfile creates an environment suitable for downstream usage of AllenNLP.
 2 | # It's built from a wheel installation of allennlp using the base images from
 3 | # https://github.com/allenai/docker-images/pkgs/container/pytorch
 4 | 
 5 | ARG TORCH=1.12.0-cuda11.3-python3.8
 6 | FROM ghcr.io/allenai/pytorch:${TORCH}
 7 | 
 8 | WORKDIR /stage/allennlp
 9 | 
10 | # Installing AllenNLP's dependencies is the most time-consuming part of building
11 | # this Docker image, so we make use of layer caching here by adding the minimal files
12 | # necessary to install the dependencies.
13 | COPY allennlp/version.py allennlp/version.py
14 | COPY setup.py .
15 | COPY requirements.txt .
16 | COPY dev-requirements.txt .
17 | COPY constraints.txt .
18 | RUN touch allennlp/__init__.py \
19 |     && touch README.md \
20 |     && pip install --no-cache-dir -c constraints.txt -e .[all]
21 | 
22 | # Now add the full package source and re-install just the package.
23 | COPY allennlp allennlp
24 | RUN pip install --no-cache-dir --no-deps -e .[all]
25 | 
26 | COPY Makefile .
27 | RUN make download-extras
28 | 
29 | WORKDIR /app/
30 | 
31 | # Copy wrapper script to allow beaker to run resumable training workloads.
32 | COPY scripts/ai2_internal/resumable_train.sh .
33 | 
34 | LABEL maintainer="allennlp-contact@allenai.org"
35 | 
36 | ENTRYPOINT ["allennlp"]
37 | 


--------------------------------------------------------------------------------
/Dockerfile.test:
--------------------------------------------------------------------------------
 1 | # Used to build an image for running tests.
 2 | 
 3 | ARG TORCH=1.12.0-cuda11.3-python3.8
 4 | FROM ghcr.io/allenai/pytorch:${TORCH}
 5 | 
 6 | # These environment variables are helpful for debugging.
 7 | # See https://pytorch.org/docs/stable/distributed.html#common-environment-variables for more info.
 8 | ENV NCCL_DEBUG INFO
 9 | ENV NCCL_DEBUG_SUBSYS ALL
10 | 
11 | WORKDIR /stage/allennlp
12 | 
13 | # Installing AllenNLP's dependencies is the most time-consuming part of building
14 | # this Docker image, so we make use of layer caching here by adding the minimal files
15 | # necessary to install the dependencies.
16 | COPY allennlp/version.py allennlp/version.py
17 | COPY setup.py .
18 | COPY dev-requirements.txt .
19 | COPY constraints.txt .
20 | RUN touch allennlp/__init__.py \
21 |     && touch README.md \
22 |     && pip install --no-cache-dir -c constraints.txt -e . -r dev-requirements.txt
23 | 
24 | # Now add the full package source and re-install just the package.
25 | COPY . .
26 | RUN pip install --no-cache-dir --no-deps -e .
27 | 
28 | ENTRYPOINT ["make"]
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | recursive-include allennlp *
4 | recursive-include scripts *
5 | global-exclude .DS_Store *.py[cod]
6 | prune **/__pycache__
7 | prune **/.mypy_cache
8 | 


--------------------------------------------------------------------------------
/allennlp/__init__.py:
--------------------------------------------------------------------------------
 1 | # We get a lot of these spurious warnings,
 2 | # see https://github.com/ContinuumIO/anaconda-issues/issues/6678
 3 | import warnings  # noqa
 4 | 
 5 | warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 6 | warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
 7 | 
 8 | try:
 9 |     # On some systems this prevents the dreaded
10 |     # ImportError: dlopen: cannot load any more object with static TLS
11 |     import transformers, spacy, torch, numpy  # noqa
12 | 
13 | except ModuleNotFoundError:
14 |     print(
15 |         "Using AllenNLP requires the python packages Spacy, "
16 |         "Pytorch and Numpy to be installed. Please see "
17 |         "https://github.com/allenai/allennlp for installation instructions."
18 |     )
19 |     raise
20 | 
21 | from allennlp.version import VERSION as __version__  # noqa
22 | 


--------------------------------------------------------------------------------
/allennlp/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | if os.environ.get("ALLENNLP_DEBUG"):
 8 |     LEVEL = logging.DEBUG
 9 | else:
10 |     level_name = os.environ.get("ALLENNLP_LOG_LEVEL", "INFO")
11 |     LEVEL = logging._nameToLevel.get(level_name, logging.INFO)
12 | 
13 | sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
14 | logging.basicConfig(format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", level=LEVEL)
15 | 
16 | # filelock emits too many messages, so tell it to be quiet unless it has something
17 | # important to say.
18 | logging.getLogger("filelock").setLevel(logging.WARNING)
19 | 
20 | 
21 | # transformers emits an annoying log message everytime it's imported, so we filter that
22 | # one message out specifically.
23 | def _transformers_log_filter(record):
24 |     if record.msg.startswith("PyTorch version"):
25 |         return False
26 |     return True
27 | 
28 | 
29 | logging.getLogger("transformers.file_utils").addFilter(_transformers_log_filter)
30 | 
31 | 
32 | def run():
33 |     from allennlp.commands import main  # noqa
34 |     from allennlp.common.util import install_sigterm_handler
35 | 
36 |     # We want to be able to catch SIGTERM signals in addition to SIGINT (keyboard interrupt).
37 |     install_sigterm_handler()
38 | 
39 |     main(prog="allennlp")
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     run()
44 | 


--------------------------------------------------------------------------------
/allennlp/commands/checklist.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The `checklist` subcommand allows you to conduct behavioural
 3 | testing for your model's predictions using a trained model and its
 4 | [`Predictor`](../predictors/predictor.md#predictor) wrapper.
 5 | 
 6 | It is based on the optional checklist package; if this is not
 7 | available, the command will be replaced by a dummy.
 8 | """
 9 | 
10 | import argparse
11 | import logging
12 | 
13 | from allennlp.commands.subcommand import Subcommand
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | try:
18 |     from allennlp.commands._checklist_internal import CheckList
19 | except ImportError:
20 |     # create dummy command that tells users how to
21 |     # install the necessary dependency
22 | 
23 |     def _dummy_output(args: argparse.Namespace):
24 |         logger.info(
25 |             "The checklist integration of allennlp is optional; if you're using conda, "
26 |             "it can be installed with `conda install allennlp-checklist`, "
27 |             "otherwise use `pip install allennlp[checklist]`."
28 |         )
29 | 
30 |     # need to work around https://github.com/python/mypy/issues/1153
31 |     @Subcommand.register("checklist")
32 |     class CheckList(Subcommand):  # type: ignore
33 |         def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
34 |             description = """Dummy command because checklist is not installed."""
35 |             subparser = parser.add_parser(
36 |                 self.name,
37 |                 description=description,
38 |                 help="Run a trained model through a checklist suite.",
39 |             )
40 |             subparser.set_defaults(func=_dummy_output)
41 |             return subparser
42 | 


--------------------------------------------------------------------------------
/allennlp/common/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.common.from_params import FromParams
2 | from allennlp.common.lazy import Lazy
3 | from allennlp.common.params import Params
4 | from allennlp.common.registrable import Registrable
5 | from allennlp.common.tqdm import Tqdm
6 | from allennlp.common.util import JsonDict
7 | from allennlp.common.meta import Meta
8 | from allennlp.common.push_to_hf import push_to_hf
9 | 


--------------------------------------------------------------------------------
/allennlp/common/meta.py:
--------------------------------------------------------------------------------
 1 | from os import PathLike
 2 | from dataclasses import dataclass, asdict
 3 | import json
 4 | import logging
 5 | from typing import Union
 6 | 
 7 | from allennlp.version import VERSION
 8 | 
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | META_NAME = "meta.json"
14 | 
15 | 
16 | @dataclass
17 | class Meta:
18 |     """
19 |     Defines the meta data that's saved in a serialization directory and archive
20 |     when training an AllenNLP model.
21 |     """
22 | 
23 |     version: str
24 | 
25 |     @classmethod
26 |     def new(cls) -> "Meta":
27 |         return cls(version=VERSION)
28 | 
29 |     def to_file(self, path: Union[PathLike, str]) -> None:
30 |         with open(path, "w") as meta_file:
31 |             json.dump(asdict(self), meta_file)
32 | 
33 |     @classmethod
34 |     def from_path(cls, path: Union[PathLike, str]) -> "Meta":
35 |         with open(path) as meta_file:
36 |             data = json.load(meta_file)
37 |         return cls(**data)
38 | 


--------------------------------------------------------------------------------
/allennlp/common/testing/checklist_test.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from checklist.test_suite import TestSuite
 3 | from checklist.test_types import MFT as MinimumFunctionalityTest
 4 | from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
 5 | 
 6 | 
 7 | @TaskSuite.register("fake-task-suite")
 8 | class FakeTaskSuite(TaskSuite):
 9 |     """
10 |     Fake checklist suite for testing purpose.
11 |     """
12 | 
13 |     def __init__(
14 |         self,
15 |         suite: Optional[TestSuite] = None,
16 |         fake_arg1: Optional[int] = None,
17 |         fake_arg2: Optional[int] = None,
18 |     ):
19 |         self._fake_arg1 = fake_arg1
20 |         self._fake_arg2 = fake_arg2
21 | 
22 |         if not suite:
23 |             suite = TestSuite()
24 | 
25 |         # Adding a simple checklist test.
26 |         test = MinimumFunctionalityTest(
27 |             ["sentence 1", "sentence 2"],
28 |             labels=0,
29 |             name="fake test 1",
30 |             capability="fake capability",
31 |             description="Test's description",
32 |         )
33 |         suite.add(test)
34 | 
35 |         super().__init__(suite)
36 | 


--------------------------------------------------------------------------------
/allennlp/common/testing/confidence_check_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from allennlp.data.vocabulary import Vocabulary
 3 | from allennlp.models.model import Model
 4 | 
 5 | 
 6 | class FakeModelForTestingNormalizationBiasVerification(Model):
 7 |     def __init__(self, use_bias=True):
 8 |         super().__init__(vocab=Vocabulary())
 9 |         self.conv = torch.nn.Conv2d(3, 5, kernel_size=1, bias=use_bias)
10 |         self.bn = torch.nn.BatchNorm2d(5)
11 | 
12 |     def forward(self, x):
13 |         # x: (B, 3, H, W)
14 |         out = self.bn(self.conv(x))
15 |         return {"loss": out.sum()}
16 | 


--------------------------------------------------------------------------------
/allennlp/confidence_checks/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.confidence_checks.verification_base import VerificationBase
2 | from allennlp.confidence_checks.normalization_bias_verification import NormalizationBiasVerification
3 | 


--------------------------------------------------------------------------------
/allennlp/confidence_checks/task_checklists/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
 2 | from allennlp.confidence_checks.task_checklists.sentiment_analysis_suite import (
 3 |     SentimentAnalysisSuite,
 4 | )
 5 | from allennlp.confidence_checks.task_checklists.question_answering_suite import (
 6 |     QuestionAnsweringSuite,
 7 | )
 8 | from allennlp.confidence_checks.task_checklists.textual_entailment_suite import (
 9 |     TextualEntailmentSuite,
10 | )
11 | 


--------------------------------------------------------------------------------
/allennlp/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.data.data_loaders import (
 2 |     DataLoader,
 3 |     TensorDict,
 4 |     allennlp_collate,
 5 | )
 6 | from allennlp.data.dataset_readers.dataset_reader import DatasetReader, DatasetReaderInput
 7 | from allennlp.data.fields.field import DataArray, Field
 8 | from allennlp.data.fields.text_field import TextFieldTensors
 9 | from allennlp.data.instance import Instance
10 | from allennlp.data.samplers import BatchSampler
11 | from allennlp.data.token_indexers.token_indexer import TokenIndexer, IndexedTokenList
12 | from allennlp.data.tokenizers import Token, Tokenizer
13 | from allennlp.data.vocabulary import Vocabulary
14 | from allennlp.data.batch import Batch
15 | from allennlp.data.image_loader import ImageLoader, TorchImageLoader
16 | 


--------------------------------------------------------------------------------
/allennlp/data/data_loaders/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.data.data_loaders.data_loader import DataLoader, TensorDict
2 | from allennlp.data.data_loaders.multiprocess_data_loader import MultiProcessDataLoader, WorkerError
3 | from allennlp.data.data_loaders.multitask_data_loader import MultiTaskDataLoader
4 | from allennlp.data.data_loaders.simple_data_loader import SimpleDataLoader
5 | from allennlp.data.data_loaders.data_collator import allennlp_collate
6 | 


--------------------------------------------------------------------------------
/allennlp/data/dataset_readers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A :class:`~allennlp.data.dataset_readers.dataset_reader.DatasetReader`
 3 | reads a file and converts it to a collection of
 4 | :class:`~allennlp.data.instance.Instance` s.
 5 | The various subclasses know how to read specific filetypes
 6 | and produce datasets in the formats required by specific models.
 7 | """
 8 | 
 9 | 
10 | from allennlp.data.dataset_readers.dataset_reader import (
11 |     DatasetReader,
12 |     WorkerInfo,
13 |     DatasetReaderInput,
14 | )
15 | from allennlp.data.dataset_readers.babi import BabiReader
16 | from allennlp.data.dataset_readers.conll2003 import Conll2003DatasetReader
17 | from allennlp.data.dataset_readers.interleaving_dataset_reader import InterleavingDatasetReader
18 | from allennlp.data.dataset_readers.multitask import MultiTaskDatasetReader
19 | from allennlp.data.dataset_readers.sequence_tagging import SequenceTaggingDatasetReader
20 | from allennlp.data.dataset_readers.sharded_dataset_reader import ShardedDatasetReader
21 | from allennlp.data.dataset_readers.text_classification_json import TextClassificationJsonReader
22 | 


--------------------------------------------------------------------------------
/allennlp/data/dataset_readers/dataset_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.data.dataset_readers.dataset_utils.span_utils import enumerate_spans
2 | from allennlp.data.dataset_readers.dataset_utils.span_utils import bio_tags_to_spans
3 | from allennlp.data.dataset_readers.dataset_utils.span_utils import to_bioul, iob1_to_bioul
4 | from allennlp.data.dataset_readers.dataset_utils.span_utils import bioul_tags_to_spans
5 | 


--------------------------------------------------------------------------------
/allennlp/data/fields/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A :class:`~allennlp.data.fields.field.Field` is some piece of data instance
 3 | that ends up as an array in a model.
 4 | """
 5 | 
 6 | from allennlp.data.fields.field import Field
 7 | from allennlp.data.fields.adjacency_field import AdjacencyField
 8 | from allennlp.data.fields.tensor_field import TensorField
 9 | from allennlp.data.fields.flag_field import FlagField
10 | from allennlp.data.fields.index_field import IndexField
11 | from allennlp.data.fields.label_field import LabelField
12 | from allennlp.data.fields.list_field import ListField
13 | from allennlp.data.fields.metadata_field import MetadataField
14 | from allennlp.data.fields.multilabel_field import MultiLabelField
15 | from allennlp.data.fields.namespace_swapping_field import NamespaceSwappingField
16 | from allennlp.data.fields.sequence_field import SequenceField
17 | from allennlp.data.fields.sequence_label_field import SequenceLabelField
18 | from allennlp.data.fields.span_field import SpanField
19 | from allennlp.data.fields.text_field import TextField
20 | from allennlp.data.fields.array_field import ArrayField
21 | from allennlp.data.fields.transformer_text_field import TransformerTextField
22 | 


--------------------------------------------------------------------------------
/allennlp/data/fields/array_field.py:
--------------------------------------------------------------------------------
1 | from allennlp.data.fields.tensor_field import TensorField
2 | 
3 | ArrayField = TensorField
4 | """For backwards compatibility, we keep the name `ArrayField`."""
5 | 


--------------------------------------------------------------------------------
/allennlp/data/fields/flag_field.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | 
 3 | 
 4 | from allennlp.data.fields.field import Field
 5 | 
 6 | 
 7 | class FlagField(Field[Any]):
 8 |     """
 9 |     A class representing a flag, which must be constant across all instances in a batch.
10 |     This will be passed to a `forward` method as a single value of whatever type you pass in.
11 |     """
12 | 
13 |     __slots__ = ["flag_value"]
14 | 
15 |     def __init__(self, flag_value: Any) -> None:
16 |         self.flag_value = flag_value
17 | 
18 |     def get_padding_lengths(self) -> Dict[str, int]:
19 |         return {}
20 | 
21 |     def as_tensor(self, padding_lengths: Dict[str, int]) -> Any:
22 |         return self.flag_value
23 | 
24 |     def empty_field(self):
25 |         # Because this has to be constant across all instances in a batch, we need to keep the same
26 |         # value.
27 |         return FlagField(self.flag_value)
28 | 
29 |     def __str__(self) -> str:
30 |         return f"FlagField({self.flag_value})"
31 | 
32 |     def __len__(self) -> int:
33 |         return 1
34 | 
35 |     def batch_tensors(self, tensor_list: List[Any]) -> Any:
36 |         if len(set(tensor_list)) != 1:
37 |             raise ValueError(
38 |                 f"Got different values in a FlagField when trying to batch them: {tensor_list}"
39 |             )
40 |         return tensor_list[0]
41 | 
42 |     def human_readable_repr(self) -> Any:
43 |         if hasattr(self.flag_value, "human_readable_repr"):
44 |             return self.flag_value.human_readable_repr()
45 |         return self.flag_value
46 | 


--------------------------------------------------------------------------------
/allennlp/data/fields/sequence_field.py:
--------------------------------------------------------------------------------
 1 | from allennlp.data.fields.field import DataArray, Field
 2 | 
 3 | 
 4 | class SequenceField(Field[DataArray]):
 5 |     """
 6 |     A `SequenceField` represents a sequence of things.  This class just adds a method onto
 7 |     `Field`: :func:`sequence_length`.  It exists so that `SequenceLabelField`, `IndexField` and other
 8 |     similar `Fields` can have a single type to require, with a consistent API, whether they are
 9 |     pointing to words in a `TextField`, items in a `ListField`, or something else.
10 |     """
11 | 
12 |     __slots__ = []  # type: ignore
13 | 
14 |     def sequence_length(self) -> int:
15 |         """
16 |         How many elements are there in this sequence?
17 |         """
18 |         raise NotImplementedError
19 | 
20 |     def empty_field(self) -> "SequenceField":
21 |         raise NotImplementedError
22 | 


--------------------------------------------------------------------------------
/allennlp/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.data.samplers.batch_sampler import BatchSampler
2 | from allennlp.data.samplers.bucket_batch_sampler import BucketBatchSampler
3 | from allennlp.data.samplers.max_tokens_batch_sampler import MaxTokensBatchSampler
4 | 


--------------------------------------------------------------------------------
/allennlp/data/samplers/batch_sampler.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Iterable, Sequence, Optional
 2 | 
 3 | from allennlp.common.registrable import Registrable
 4 | from allennlp.data.instance import Instance
 5 | 
 6 | 
 7 | class BatchSampler(Registrable):
 8 |     def get_batch_indices(self, instances: Sequence[Instance]) -> Iterable[List[int]]:
 9 |         raise NotImplementedError
10 | 
11 |     def get_num_batches(self, instances: Sequence[Instance]) -> int:
12 |         raise NotImplementedError
13 | 
14 |     def get_batch_size(self) -> Optional[int]:
15 |         """
16 |         Not all `BatchSamplers` define a consistent `batch_size`, but those that
17 |         do should override this method.
18 |         """
19 |         return None
20 | 


--------------------------------------------------------------------------------
/allennlp/data/token_indexers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A `TokenIndexer` determines how string tokens get represented as arrays of indices in a model.
 3 | """
 4 | 
 5 | from allennlp.data.token_indexers.single_id_token_indexer import SingleIdTokenIndexer
 6 | from allennlp.data.token_indexers.token_characters_indexer import TokenCharactersIndexer
 7 | from allennlp.data.token_indexers.token_indexer import TokenIndexer
 8 | from allennlp.data.token_indexers.elmo_indexer import ELMoTokenCharactersIndexer
 9 | from allennlp.data.token_indexers.spacy_indexer import SpacyTokenIndexer
10 | from allennlp.data.token_indexers.pretrained_transformer_indexer import PretrainedTransformerIndexer
11 | from allennlp.data.token_indexers.pretrained_transformer_mismatched_indexer import (
12 |     PretrainedTransformerMismatchedIndexer,
13 | )
14 | 


--------------------------------------------------------------------------------
/allennlp/data/tokenizers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains various classes for performing
 3 | tokenization.
 4 | """
 5 | 
 6 | from allennlp.data.tokenizers.token_class import Token
 7 | from allennlp.data.tokenizers.tokenizer import Tokenizer
 8 | from allennlp.data.tokenizers.spacy_tokenizer import SpacyTokenizer
 9 | from allennlp.data.tokenizers.letters_digits_tokenizer import LettersDigitsTokenizer
10 | from allennlp.data.tokenizers.pretrained_transformer_tokenizer import PretrainedTransformerTokenizer
11 | from allennlp.data.tokenizers.character_tokenizer import CharacterTokenizer
12 | from allennlp.data.tokenizers.sentence_splitter import SentenceSplitter
13 | from allennlp.data.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
14 | 


--------------------------------------------------------------------------------
/allennlp/data/tokenizers/letters_digits_tokenizer.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import List
 3 | 
 4 | 
 5 | from allennlp.data.tokenizers.token_class import Token
 6 | from allennlp.data.tokenizers.tokenizer import Tokenizer
 7 | 
 8 | 
 9 | @Tokenizer.register("letters_digits")
10 | class LettersDigitsTokenizer(Tokenizer):
11 |     """
12 |     A `Tokenizer` which keeps runs of (unicode) letters and runs of digits together, while
13 |     every other non-whitespace character becomes a separate word.
14 | 
15 |     Registered as a `Tokenizer` with name "letters_digits".
16 |     """
17 | 
18 |     def tokenize(self, text: str) -> List[Token]:
19 |         # We use the [^\W\d_] pattern as a trick to match unicode letters
20 |         tokens = [Token(m.group(), idx=m.start()) for m in re.finditer(r"[^\W\d_]+|\d+|\S", text)]
21 |         return tokens
22 | 


--------------------------------------------------------------------------------
/allennlp/data/tokenizers/whitespace_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Any
 2 | 
 3 | 
 4 | from allennlp.data.tokenizers.token_class import Token
 5 | from allennlp.data.tokenizers.tokenizer import Tokenizer
 6 | 
 7 | 
 8 | @Tokenizer.register("whitespace")
 9 | @Tokenizer.register("just_spaces")
10 | class WhitespaceTokenizer(Tokenizer):
11 |     """
12 |     A `Tokenizer` that assumes you've already done your own tokenization somehow and have
13 |     separated the tokens by spaces.  We just split the input string on whitespace and return the
14 |     resulting list.
15 | 
16 |     Note that we use `text.split()`, which means that the amount of whitespace between the
17 |     tokens does not matter.  This will never result in spaces being included as tokens.
18 | 
19 |     Registered as a `Tokenizer` with name "whitespace" and "just_spaces".
20 |     """
21 | 
22 |     def tokenize(self, text: str) -> List[Token]:
23 |         return [Token(t) for t in text.split()]
24 | 
25 |     def _to_params(self) -> Dict[str, Any]:
26 |         return {"type": "whitespace"}
27 | 


--------------------------------------------------------------------------------
/allennlp/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.evaluation.evaluator import Evaluator, SimpleEvaluator
2 | from allennlp.evaluation.serializers.serializers import Serializer
3 | 


--------------------------------------------------------------------------------
/allennlp/evaluation/postprocessors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/allennlp/evaluation/postprocessors/__init__.py


--------------------------------------------------------------------------------
/allennlp/evaluation/serializers/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.evaluation.serializers.serializers import SimpleSerializer
2 | 


--------------------------------------------------------------------------------
/allennlp/interpret/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.interpret.attackers.attacker import Attacker
2 | from allennlp.interpret.saliency_interpreters.saliency_interpreter import SaliencyInterpreter
3 | from allennlp.interpret.influence_interpreters.influence_interpreter import InfluenceInterpreter
4 | 


--------------------------------------------------------------------------------
/allennlp/interpret/attackers/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.interpret.attackers.attacker import Attacker
2 | from allennlp.interpret.attackers.input_reduction import InputReduction
3 | from allennlp.interpret.attackers.hotflip import Hotflip
4 | 


--------------------------------------------------------------------------------
/allennlp/interpret/influence_interpreters/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.interpret.influence_interpreters.influence_interpreter import InfluenceInterpreter
2 | from allennlp.interpret.influence_interpreters.simple_influence import SimpleInfluence
3 | 


--------------------------------------------------------------------------------
/allennlp/interpret/saliency_interpreters/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.interpret.saliency_interpreters.saliency_interpreter import SaliencyInterpreter
2 | from allennlp.interpret.saliency_interpreters.simple_gradient import SimpleGradient
3 | from allennlp.interpret.saliency_interpreters.integrated_gradient import IntegratedGradient
4 | from allennlp.interpret.saliency_interpreters.smooth_gradient import SmoothGradient
5 | 


--------------------------------------------------------------------------------
/allennlp/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | These submodules contain the classes for AllenNLP models,
 3 | all of which are subclasses of `Model`.
 4 | """
 5 | 
 6 | from allennlp.models.model import Model
 7 | from allennlp.models.archival import archive_model, load_archive, Archive
 8 | from allennlp.models.basic_classifier import BasicClassifier
 9 | from allennlp.models.multitask import MultiTaskModel
10 | from allennlp.models.simple_tagger import SimpleTagger
11 | 


--------------------------------------------------------------------------------
/allennlp/models/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.models.heads.head import Head
2 | from allennlp.models.heads.classifier_head import ClassifierHead
3 | 


--------------------------------------------------------------------------------
/allennlp/models/heads/head.py:
--------------------------------------------------------------------------------
 1 | from allennlp.models.model import Model
 2 | 
 3 | 
 4 | class Head(Model):
 5 |     """
 6 |     A `Head` is a `Model` that takes _already encoded input_ and typically does simple computation
 7 |     before returning a loss.
 8 | 
 9 |     There isn't currently any difference in API between a `Model` and a `Head`, but we have this
10 |     separate type as both a signaling mechanism for what to expect when looking at a `Head` class,
11 |     and so that we can use this as a more informative type annotation when building models that use
12 |     `Heads` as inputs.
13 | 
14 |     One additional consideration in a `Head` is that `make_output_human_readable` needs to account
15 |     for the case where it gets called without first having `forward` be called on the head.  This is
16 |     because at the point where we call `make_output_human_readable`, we don't know which heads were
17 |     used in `forward`, and trying to save the state is messy.  So just make sure that you always
18 |     have conditional logic in `make_output_human_readable` when you implement a `Head`.
19 |     """
20 | 
21 |     pass
22 | 


--------------------------------------------------------------------------------
/allennlp/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Custom PyTorch
 3 | `Module <https://pytorch.org/docs/master/nn.html#torch.nn.Module>`_ s
 4 | that are used as components in AllenNLP `Model` s.
 5 | """
 6 | 
 7 | from allennlp.modules.attention import Attention
 8 | from allennlp.modules.backbones import Backbone
 9 | from allennlp.modules.bimpm_matching import BiMpmMatching
10 | from allennlp.modules.conditional_random_field import ConditionalRandomField
11 | from allennlp.modules.elmo import Elmo
12 | from allennlp.modules.feedforward import FeedForward
13 | from allennlp.modules.gated_sum import GatedSum
14 | from allennlp.modules.highway import Highway
15 | from allennlp.modules.input_variational_dropout import InputVariationalDropout
16 | from allennlp.modules.layer_norm import LayerNorm
17 | from allennlp.modules.matrix_attention import MatrixAttention
18 | from allennlp.modules.maxout import Maxout
19 | from allennlp.modules.residual_with_layer_dropout import ResidualWithLayerDropout
20 | from allennlp.modules.scalar_mix import ScalarMix
21 | from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder
22 | from allennlp.modules.seq2vec_encoders import Seq2VecEncoder
23 | from allennlp.modules.text_field_embedders import TextFieldEmbedder
24 | from allennlp.modules.time_distributed import TimeDistributed
25 | from allennlp.modules.token_embedders import TokenEmbedder, Embedding
26 | from allennlp.modules.softmax_loss import SoftmaxLoss
27 | 


--------------------------------------------------------------------------------
/allennlp/modules/attention/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.modules.attention.attention import Attention
2 | from allennlp.modules.attention.bilinear_attention import BilinearAttention
3 | from allennlp.modules.attention.additive_attention import AdditiveAttention
4 | from allennlp.modules.attention.cosine_attention import CosineAttention
5 | from allennlp.modules.attention.dot_product_attention import DotProductAttention
6 | from allennlp.modules.attention.linear_attention import LinearAttention
7 | from allennlp.modules.attention.scaled_dot_product_attention import ScaledDotProductAttention
8 | 


--------------------------------------------------------------------------------
/allennlp/modules/attention/cosine_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.modules.attention.attention import Attention
 4 | from allennlp.nn import util
 5 | 
 6 | 
 7 | @Attention.register("cosine")
 8 | class CosineAttention(Attention):
 9 |     """
10 |     Computes attention between a vector and a matrix using cosine similarity.
11 | 
12 |     Registered as an `Attention` with name "cosine".
13 |     """
14 | 
15 |     def _forward_internal(self, vector: torch.Tensor, matrix: torch.Tensor) -> torch.Tensor:
16 |         a_norm = vector / (
17 |             vector.norm(p=2, dim=-1, keepdim=True) + util.tiny_value_of_dtype(vector.dtype)
18 |         )
19 |         b_norm = matrix / (
20 |             matrix.norm(p=2, dim=-1, keepdim=True) + util.tiny_value_of_dtype(matrix.dtype)
21 |         )
22 |         return torch.bmm(a_norm.unsqueeze(dim=1), b_norm.transpose(-1, -2)).squeeze(1)
23 | 


--------------------------------------------------------------------------------
/allennlp/modules/attention/dot_product_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.modules.attention.attention import Attention
 4 | 
 5 | 
 6 | @Attention.register("dot_product")
 7 | class DotProductAttention(Attention):
 8 |     """
 9 |     Computes attention between a vector and a matrix using dot product.
10 | 
11 |     Reference: [Attention Is All You Need (Vaswani et al, 2017)]
12 |     (https://api.semanticscholar.org/CorpusID:13756489)
13 | 
14 |     Registered as an `Attention` with name "dot_product".
15 |     """
16 | 
17 |     def _forward_internal(self, vector: torch.Tensor, matrix: torch.Tensor) -> torch.Tensor:
18 |         return matrix.bmm(vector.unsqueeze(-1)).squeeze(-1)
19 | 


--------------------------------------------------------------------------------
/allennlp/modules/attention/scaled_dot_product_attention.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import Optional
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | from allennlp.modules.attention.dot_product_attention import DotProductAttention
 8 | from allennlp.modules.attention.attention import Attention
 9 | 
10 | 
11 | @Attention.register("scaled_dot_product")
12 | class ScaledDotProductAttention(DotProductAttention):
13 |     """
14 |     Computes attention between two tensors using scaled dot product.
15 |     # Reference: [Attention Is All You Need (Vaswani et al, 2017)]
16 |     # (https://api.semanticscholar.org/CorpusID:13756489)
17 | 
18 |     Registered as an `Attention` with name "scaled_dot_product".
19 | 
20 |     # Parameters
21 | 
22 |     scaling_factor : `int`, required
23 |         The similarity score is scaled down by the `scaling_factor`.
24 |     normalize : `bool`, optional (default=`True`)
25 |         If true, we normalize the computed similarities with a softmax, to return a probability
26 |         distribution for your attention.  If false, this is just computing a similarity score.
27 |     """
28 | 
29 |     def __init__(self, scaling_factor: Optional[int] = None, normalize: bool = True) -> None:
30 |         super().__init__(normalize)
31 |         self.scaling_factor = scaling_factor
32 | 
33 |     def _forward_internal(self, vector: torch.Tensor, matrix: torch.Tensor) -> torch.Tensor:
34 |         scores = super()._forward_internal(vector, matrix)
35 |         scaling_factor = self.scaling_factor or matrix.size(-1)
36 |         scores = scores / math.sqrt(scaling_factor)
37 |         return scores
38 | 


--------------------------------------------------------------------------------
/allennlp/modules/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.modules.backbones.backbone import Backbone
2 | from allennlp.modules.backbones.pretrained_transformer_backbone import PretrainedTransformerBackbone
3 | from allennlp.modules.backbones.vilbert_backbone import VilbertBackbone
4 | 


--------------------------------------------------------------------------------
/allennlp/modules/conditional_random_field/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.modules.conditional_random_field.conditional_random_field import (
 2 |     ConditionalRandomField,
 3 |     allowed_transitions,
 4 | )
 5 | from allennlp.modules.conditional_random_field.conditional_random_field_wemission import (
 6 |     ConditionalRandomFieldWeightEmission,
 7 | )
 8 | from allennlp.modules.conditional_random_field.conditional_random_field_wtrans import (
 9 |     ConditionalRandomFieldWeightTrans,
10 | )
11 | from allennlp.modules.conditional_random_field.conditional_random_field_wlannoy import (
12 |     ConditionalRandomFieldWeightLannoy,
13 | )
14 | 


--------------------------------------------------------------------------------
/allennlp/modules/gated_sum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.nn import Activation
 4 | 
 5 | 
 6 | class GatedSum(torch.nn.Module):
 7 |     """
 8 |     This `Module` represents a gated sum of two tensors `a` and `b`. Specifically:
 9 |     ```
10 |     f = activation(W [a; b])
11 |     out = f * a + (1 - f) * b
12 |     ```
13 | 
14 |     # Parameters
15 | 
16 |     input_dim : `int`, required
17 |         The dimensionality of the input. We assume the input have shape `(..., input_dim)`.
18 |     activation : `Activation`, optional (default = `torch.nn.Sigmoid()`)
19 |         The activation function to use.
20 |     """
21 | 
22 |     def __init__(self, input_dim: int, activation: Activation = torch.nn.Sigmoid()) -> None:
23 |         super().__init__()
24 |         self.input_dim = input_dim
25 |         self._gate = torch.nn.Linear(input_dim * 2, 1)
26 |         self._activation = activation
27 | 
28 |     def get_input_dim(self):
29 |         return self.input_dim
30 | 
31 |     def get_output_dim(self):
32 |         return self.input_dim
33 | 
34 |     def forward(self, input_a: torch.Tensor, input_b: torch.Tensor) -> torch.Tensor:
35 |         if input_a.size() != input_b.size():
36 |             raise ValueError("The input must have the same size.")
37 |         if input_a.size(-1) != self.input_dim:
38 |             raise ValueError("Input size must match `input_dim`.")
39 |         gate_value = self._activation(self._gate(torch.cat([input_a, input_b], -1)))
40 |         return gate_value * input_a + (1 - gate_value) * input_b
41 | 


--------------------------------------------------------------------------------
/allennlp/modules/input_variational_dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class InputVariationalDropout(torch.nn.Dropout):
 5 |     """
 6 |     Apply the dropout technique in Gal and Ghahramani, [Dropout as a Bayesian Approximation:
 7 |     Representing Model Uncertainty in Deep Learning](https://arxiv.org/abs/1506.02142) to a
 8 |     3D tensor.
 9 | 
10 |     This module accepts a 3D tensor of shape `(batch_size, num_timesteps, embedding_dim)`
11 |     and samples a single dropout mask of shape `(batch_size, embedding_dim)` and applies
12 |     it to every time step.
13 |     """
14 | 
15 |     def forward(self, input_tensor):
16 | 
17 |         """
18 |         Apply dropout to input tensor.
19 | 
20 |         # Parameters
21 | 
22 |         input_tensor : `torch.FloatTensor`
23 |             A tensor of shape `(batch_size, num_timesteps, embedding_dim)`
24 | 
25 |         # Returns
26 | 
27 |         output : `torch.FloatTensor`
28 |             A tensor of shape `(batch_size, num_timesteps, embedding_dim)` with dropout applied.
29 |         """
30 |         ones = input_tensor.data.new_ones(input_tensor.shape[0], input_tensor.shape[-1])
31 |         dropout_mask = torch.nn.functional.dropout(ones, self.p, self.training, inplace=False)
32 |         if self.inplace:
33 |             input_tensor *= dropout_mask.unsqueeze(1)
34 |             return None
35 |         else:
36 |             return dropout_mask.unsqueeze(1) * input_tensor
37 | 


--------------------------------------------------------------------------------
/allennlp/modules/layer_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.nn import util
 4 | 
 5 | 
 6 | class LayerNorm(torch.nn.Module):
 7 | 
 8 |     """
 9 |     An implementation of [Layer Normalization](
10 |     https://www.semanticscholar.org/paper/Layer-Normalization-Ba-Kiros/97fb4e3d45bb098e27e0071448b6152217bd35a5).
11 | 
12 |     Layer Normalization stabilises the training of deep neural networks by
13 |     normalising the outputs of neurons from a particular layer. It computes:
14 | 
15 |     output = (gamma * (tensor - mean) / (std + eps)) + beta
16 | 
17 |     # Parameters
18 | 
19 |     dimension : `int`, required.
20 |         The dimension of the layer output to normalize.
21 | 
22 |     # Returns
23 | 
24 |     The normalized layer output.
25 |     """  # noqa
26 | 
27 |     def __init__(self, dimension: int) -> None:
28 |         super().__init__()
29 |         self.gamma = torch.nn.Parameter(torch.ones(dimension))
30 |         self.beta = torch.nn.Parameter(torch.zeros(dimension))
31 | 
32 |     def forward(self, tensor: torch.Tensor):
33 |         mean = tensor.mean(-1, keepdim=True)
34 |         std = tensor.std(-1, unbiased=False, keepdim=True)
35 |         return (
36 |             self.gamma * (tensor - mean) / (std + util.tiny_value_of_dtype(std.dtype)) + self.beta
37 |         )
38 | 


--------------------------------------------------------------------------------
/allennlp/modules/masked_layer_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.nn import util
 4 | 
 5 | 
 6 | class MaskedLayerNorm(torch.nn.Module):
 7 |     """
 8 |     See LayerNorm for details.
 9 | 
10 |     Note, however, that unlike LayerNorm this norm includes a batch component.
11 |     """
12 | 
13 |     def __init__(self, size: int, gamma0: float = 0.1) -> None:
14 |         super().__init__()
15 |         self.gamma = torch.nn.Parameter(torch.ones(1, 1, size) * gamma0)
16 |         self.beta = torch.nn.Parameter(torch.zeros(1, 1, size))
17 |         self.size = size
18 | 
19 |     def forward(self, tensor: torch.Tensor, mask: torch.BoolTensor) -> torch.Tensor:
20 | 
21 |         broadcast_mask = mask.unsqueeze(-1)
22 |         num_elements = broadcast_mask.sum() * self.size
23 |         mean = (tensor * broadcast_mask).sum() / num_elements
24 |         masked_centered = (tensor - mean) * broadcast_mask
25 |         std = torch.sqrt(
26 |             (masked_centered * masked_centered).sum() / num_elements
27 |             + util.tiny_value_of_dtype(tensor.dtype)
28 |         )
29 |         return (
30 |             self.gamma * (tensor - mean) / (std + util.tiny_value_of_dtype(tensor.dtype))
31 |             + self.beta
32 |         )
33 | 


--------------------------------------------------------------------------------
/allennlp/modules/matrix_attention/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
2 | from allennlp.modules.matrix_attention.bilinear_matrix_attention import BilinearMatrixAttention
3 | from allennlp.modules.matrix_attention.cosine_matrix_attention import CosineMatrixAttention
4 | from allennlp.modules.matrix_attention.dot_product_matrix_attention import DotProductMatrixAttention
5 | from allennlp.modules.matrix_attention.scaled_dot_product_matrix_attention import (
6 |     ScaledDotProductMatrixAttention,
7 | )
8 | from allennlp.modules.matrix_attention.linear_matrix_attention import LinearMatrixAttention
9 | 


--------------------------------------------------------------------------------
/allennlp/modules/matrix_attention/cosine_matrix_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
 5 | from allennlp.nn import util
 6 | 
 7 | 
 8 | @MatrixAttention.register("cosine")
 9 | class CosineMatrixAttention(MatrixAttention):
10 |     """
11 |     Computes attention between every entry in matrix_1 with every entry in matrix_2 using cosine
12 |     similarity.
13 | 
14 |     Registered as a `MatrixAttention` with name "cosine".
15 |     """
16 | 
17 |     def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
18 |         a_norm = matrix_1 / (
19 |             matrix_1.norm(p=2, dim=-1, keepdim=True) + util.tiny_value_of_dtype(matrix_1.dtype)
20 |         )
21 |         b_norm = matrix_2 / (
22 |             matrix_2.norm(p=2, dim=-1, keepdim=True) + util.tiny_value_of_dtype(matrix_2.dtype)
23 |         )
24 |         return torch.bmm(a_norm, b_norm.transpose(-1, -2))
25 | 


--------------------------------------------------------------------------------
/allennlp/modules/matrix_attention/dot_product_matrix_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
 5 | 
 6 | 
 7 | @MatrixAttention.register("dot_product")
 8 | class DotProductMatrixAttention(MatrixAttention):
 9 |     """
10 |     Computes attention between every entry in matrix_1 with every entry in matrix_2 using a dot
11 |     product.
12 | 
13 |     Registered as a `MatrixAttention` with name "dot_product".
14 |     """
15 | 
16 |     def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
17 |         return matrix_1.matmul(matrix_2.transpose(-1, -2))
18 | 


--------------------------------------------------------------------------------
/allennlp/modules/matrix_attention/matrix_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common.registrable import Registrable
 4 | 
 5 | 
 6 | class MatrixAttention(torch.nn.Module, Registrable):
 7 |     """
 8 |     `MatrixAttention` takes two matrices as input and returns a matrix of attentions.
 9 | 
10 |     We compute the similarity between each row in each matrix and return unnormalized similarity
11 |     scores. Because these scores are unnormalized, we don't take a mask as input; it's up to the
12 |     caller to deal with masking properly when this output is used.
13 | 
14 |     Input:
15 |         - matrix_1 : `(batch_size, num_rows_1, embedding_dim_1)`
16 |         - matrix_2 : `(batch_size, num_rows_2, embedding_dim_2)`
17 | 
18 |     Output:
19 |         - `(batch_size, num_rows_1, num_rows_2)`
20 |     """
21 | 
22 |     def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
23 |         raise NotImplementedError
24 | 


--------------------------------------------------------------------------------
/allennlp/modules/matrix_attention/scaled_dot_product_matrix_attention.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | from allennlp.modules.matrix_attention.dot_product_matrix_attention import DotProductMatrixAttention
 7 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
 8 | 
 9 | 
10 | @MatrixAttention.register("scaled_dot_product")
11 | class ScaledDotProductMatrixAttention(DotProductMatrixAttention):
12 |     """
13 |     Computes attention between every entry in matrix_1 with every entry in matrix_2 using a dot
14 |     product. Scales the result by the size of the embeddings.
15 | 
16 |     Registered as a `MatrixAttention` with name "scaled_dot_product".
17 |     """
18 | 
19 |     def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
20 |         return super().forward(matrix_1, matrix_2) / math.sqrt(matrix_1.size(-1))
21 | 


--------------------------------------------------------------------------------
/allennlp/modules/seq2seq_encoders/feedforward_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | from allennlp.modules.feedforward import FeedForward
 5 | from allennlp.modules.seq2seq_encoders.seq2seq_encoder import Seq2SeqEncoder
 6 | 
 7 | 
 8 | @Seq2SeqEncoder.register("feedforward")
 9 | class FeedForwardEncoder(Seq2SeqEncoder):
10 |     """
11 |     This class applies the `FeedForward` to each item in sequences.
12 | 
13 |     Registered as a `Seq2SeqEncoder` with name "feedforward".
14 |     """
15 | 
16 |     def __init__(self, feedforward: FeedForward) -> None:
17 |         super().__init__()
18 |         self._feedforward = feedforward
19 | 
20 |     def get_input_dim(self) -> int:
21 |         return self._feedforward.get_input_dim()
22 | 
23 |     def get_output_dim(self) -> int:
24 |         return self._feedforward.get_output_dim()
25 | 
26 |     def is_bidirectional(self) -> bool:
27 |         return False
28 | 
29 |     def forward(self, inputs: torch.Tensor, mask: torch.BoolTensor = None) -> torch.Tensor:
30 |         """
31 |         # Parameters
32 | 
33 |         inputs : `torch.Tensor`, required.
34 |             A tensor of shape (batch_size, timesteps, input_dim)
35 |         mask : `torch.BoolTensor`, optional (default = `None`).
36 |             A tensor of shape (batch_size, timesteps).
37 | 
38 |         # Returns
39 | 
40 |         A tensor of shape (batch_size, timesteps, output_dim).
41 |         """
42 |         if mask is None:
43 |             return self._feedforward(inputs)
44 |         else:
45 |             outputs = self._feedforward(inputs)
46 |             return outputs * mask.unsqueeze(dim=-1)
47 | 


--------------------------------------------------------------------------------
/allennlp/modules/seq2vec_encoders/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modules that transform a sequence of input vectors
 3 | into a single output vector.
 4 | Some are just basic wrappers around existing PyTorch modules,
 5 | others are AllenNLP modules.
 6 | 
 7 | The available Seq2Vec encoders are
 8 | 
 9 | * `"gru"` https://pytorch.org/docs/master/nn.html#torch.nn.GRU
10 | * `"lstm"` https://pytorch.org/docs/master/nn.html#torch.nn.LSTM
11 | * `"rnn"` https://pytorch.org/docs/master/nn.html#torch.nn.RNN
12 | * `"cnn"` allennlp.modules.seq2vec_encoders.cnn_encoder.CnnEncoder
13 | * `"augmented_lstm"` allennlp.modules.augmented_lstm.AugmentedLstm
14 | * `"alternating_lstm"` allennlp.modules.stacked_alternating_lstm.StackedAlternatingLstm
15 | * `"stacked_bidirectional_lstm"` allennlp.modules.stacked_bidirectional_lstm.StackedBidirectionalLstm
16 | """
17 | 
18 | from allennlp.modules.seq2vec_encoders.bert_pooler import BertPooler
19 | from allennlp.modules.seq2vec_encoders.boe_encoder import BagOfEmbeddingsEncoder
20 | from allennlp.modules.seq2vec_encoders.cls_pooler import ClsPooler
21 | from allennlp.modules.seq2vec_encoders.cnn_encoder import CnnEncoder
22 | from allennlp.modules.seq2vec_encoders.cnn_highway_encoder import CnnHighwayEncoder
23 | from allennlp.modules.seq2vec_encoders.pytorch_seq2vec_wrapper import (
24 |     AugmentedLstmSeq2VecEncoder,
25 |     GruSeq2VecEncoder,
26 |     LstmSeq2VecEncoder,
27 |     PytorchSeq2VecWrapper,
28 |     RnnSeq2VecEncoder,
29 |     StackedAlternatingLstmSeq2VecEncoder,
30 |     StackedBidirectionalLstmSeq2VecEncoder,
31 | )
32 | from allennlp.modules.seq2vec_encoders.seq2vec_encoder import Seq2VecEncoder
33 | 


--------------------------------------------------------------------------------
/allennlp/modules/seq2vec_encoders/seq2vec_encoder.py:
--------------------------------------------------------------------------------
 1 | from allennlp.modules.encoder_base import _EncoderBase
 2 | from allennlp.common import Registrable
 3 | 
 4 | 
 5 | class Seq2VecEncoder(_EncoderBase, Registrable):
 6 |     """
 7 |     A `Seq2VecEncoder` is a `Module` that takes as input a sequence of vectors and returns a
 8 |     single vector.  Input shape : `(batch_size, sequence_length, input_dim)`; output shape:
 9 |     `(batch_size, output_dim)`.
10 | 
11 |     We add two methods to the basic `Module` API: `get_input_dim()` and `get_output_dim()`.
12 |     You might need this if you want to construct a `Linear` layer using the output of this encoder,
13 |     or to raise sensible errors for mis-matching input dimensions.
14 |     """
15 | 
16 |     def get_input_dim(self) -> int:
17 |         """
18 |         Returns the dimension of the vector input for each element in the sequence input
19 |         to a `Seq2VecEncoder`. This is `not` the shape of the input tensor, but the
20 |         last element of that shape.
21 |         """
22 |         raise NotImplementedError
23 | 
24 |     def get_output_dim(self) -> int:
25 |         """
26 |         Returns the dimension of the final vector output by this `Seq2VecEncoder`.  This is `not`
27 |         the shape of the returned tensor, but the last element of that shape.
28 |         """
29 |         raise NotImplementedError
30 | 


--------------------------------------------------------------------------------
/allennlp/modules/span_extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.modules.span_extractors.span_extractor import SpanExtractor
 2 | from allennlp.modules.span_extractors.endpoint_span_extractor import EndpointSpanExtractor
 3 | from allennlp.modules.span_extractors.self_attentive_span_extractor import (
 4 |     SelfAttentiveSpanExtractor,
 5 | )
 6 | from allennlp.modules.span_extractors.bidirectional_endpoint_span_extractor import (
 7 |     BidirectionalEndpointSpanExtractor,
 8 | )
 9 | from allennlp.modules.span_extractors.max_pooling_span_extractor import MaxPoolingSpanExtractor
10 | 


--------------------------------------------------------------------------------
/allennlp/modules/text_field_embedders/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A `TextFieldEmbedder` is a `Module` that takes as input the `dict` of NumPy arrays
3 | produced by a `TextField` and returns as output an embedded representation of the tokens in that field.
4 | """
5 | 
6 | from allennlp.modules.text_field_embedders.text_field_embedder import TextFieldEmbedder
7 | from allennlp.modules.text_field_embedders.basic_text_field_embedder import BasicTextFieldEmbedder
8 | 


--------------------------------------------------------------------------------
/allennlp/modules/token_embedders/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A `TokenEmbedder` is a `Module` that
 3 | embeds one-hot-encoded tokens as vectors.
 4 | """
 5 | 
 6 | from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
 7 | from allennlp.modules.token_embedders.embedding import Embedding
 8 | from allennlp.modules.token_embedders.token_characters_encoder import TokenCharactersEncoder
 9 | from allennlp.modules.token_embedders.elmo_token_embedder import ElmoTokenEmbedder
10 | from allennlp.modules.token_embedders.empty_embedder import EmptyEmbedder
11 | from allennlp.modules.token_embedders.bag_of_word_counts_token_embedder import (
12 |     BagOfWordCountsTokenEmbedder,
13 | )
14 | from allennlp.modules.token_embedders.pass_through_token_embedder import PassThroughTokenEmbedder
15 | from allennlp.modules.token_embedders.pretrained_transformer_embedder import (
16 |     PretrainedTransformerEmbedder,
17 | )
18 | from allennlp.modules.token_embedders.pretrained_transformer_mismatched_embedder import (
19 |     PretrainedTransformerMismatchedEmbedder,
20 | )
21 | 


--------------------------------------------------------------------------------
/allennlp/modules/token_embedders/empty_embedder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
 3 | 
 4 | 
 5 | @TokenEmbedder.register("empty")
 6 | class EmptyEmbedder(TokenEmbedder):
 7 |     """
 8 |     Assumes you want to completely ignore the output of a `TokenIndexer` for some reason, and does
 9 |     not return anything when asked to embed it.
10 | 
11 |     You should almost never need to use this; normally you would just not use a particular
12 |     `TokenIndexer`. It's only in very rare cases, like simplicity in data processing for language
13 |     modeling (where we use just one `TextField` to handle input embedding and computing target ids),
14 |     where you might want to use this.
15 | 
16 |     Registered as a `TokenEmbedder` with name "empty".
17 |     """
18 | 
19 |     def __init__(self) -> None:
20 |         super().__init__()
21 | 
22 |     def get_output_dim(self):
23 |         return 0
24 | 
25 |     def forward(self, *inputs, **kwargs) -> torch.Tensor:
26 |         return None
27 | 


--------------------------------------------------------------------------------
/allennlp/modules/token_embedders/pass_through_token_embedder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
 3 | 
 4 | 
 5 | @TokenEmbedder.register("pass_through")
 6 | class PassThroughTokenEmbedder(TokenEmbedder):
 7 |     """
 8 |     Assumes that the input is already vectorized in some way,
 9 |     and just returns it.
10 | 
11 |     Registered as a `TokenEmbedder` with name "pass_through".
12 | 
13 |     # Parameters
14 | 
15 |     hidden_dim : `int`, required.
16 | 
17 |     """
18 | 
19 |     def __init__(self, hidden_dim: int) -> None:
20 |         self.hidden_dim = hidden_dim
21 |         super().__init__()
22 | 
23 |     def get_output_dim(self):
24 |         return self.hidden_dim
25 | 
26 |     def forward(self, tokens: torch.Tensor) -> torch.Tensor:
27 |         return tokens
28 | 


--------------------------------------------------------------------------------
/allennlp/modules/token_embedders/token_embedder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common import Registrable
 4 | 
 5 | 
 6 | class TokenEmbedder(torch.nn.Module, Registrable):
 7 |     """
 8 |     A `TokenEmbedder` is a `Module` that takes as input a tensor with integer ids that have
 9 |     been output from a [`TokenIndexer`](/api/data/token_indexers/token_indexer.md) and outputs
10 |     a vector per token in the input.  The input typically has shape `(batch_size, num_tokens)`
11 |     or `(batch_size, num_tokens, num_characters)`, and the output is of shape `(batch_size, num_tokens,
12 |     output_dim)`.  The simplest `TokenEmbedder` is just an embedding layer, but for
13 |     character-level input, it could also be some kind of character encoder.
14 | 
15 |     We add a single method to the basic `Module` API: `get_output_dim()`.  This lets us
16 |     more easily compute output dimensions for the
17 |     [`TextFieldEmbedder`](/api/modules/text_field_embedders/text_field_embedder.md),
18 |     which we might need when defining model parameters such as LSTMs or linear layers, which need
19 |     to know their input dimension before the layers are called.
20 |     """
21 | 
22 |     default_implementation = "embedding"
23 | 
24 |     def get_output_dim(self) -> int:
25 |         """
26 |         Returns the final output dimension that this `TokenEmbedder` uses to represent each
27 |         token.  This is `not` the shape of the returned tensor, but the last element of that shape.
28 |         """
29 |         raise NotImplementedError
30 | 


--------------------------------------------------------------------------------
/allennlp/modules/transformer/activation_layer.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | import torch
 3 | 
 4 | from allennlp.common import FromParams
 5 | 
 6 | from allennlp.modules.transformer.transformer_module import TransformerModule
 7 | 
 8 | from transformers.models.bert.modeling_bert import ACT2FN
 9 | 
10 | 
11 | class ActivationLayer(TransformerModule, FromParams):
12 |     def __init__(
13 |         self,
14 |         hidden_size: int,
15 |         intermediate_size: int,
16 |         activation: Union[str, torch.nn.Module],
17 |         pool: bool = False,
18 |     ):
19 |         super().__init__()
20 |         self.dense = torch.nn.Linear(hidden_size, intermediate_size)
21 |         if isinstance(activation, str):
22 |             self.act_fn = ACT2FN[activation]
23 |         else:
24 |             self.act_fn = activation
25 |         self.pool = pool
26 | 
27 |     def get_output_dim(self) -> int:
28 |         return self.dense.out_features
29 | 
30 |     def forward(self, hidden_states):
31 |         if self.pool:
32 |             hidden_states = hidden_states[:, 0]
33 |         hidden_states = self.dense(hidden_states)
34 |         hidden_states = self.act_fn(hidden_states)
35 |         return hidden_states
36 | 


--------------------------------------------------------------------------------
/allennlp/modules/transformer/layer_norm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | from allennlp.modules.transformer.transformer_module import TransformerModule
4 | 
5 | 
6 | class LayerNorm(torch.nn.LayerNorm, TransformerModule):
7 |     _pretrained_mapping = {"gamma": "weight", "beta": "bias"}
8 | 


--------------------------------------------------------------------------------
/allennlp/modules/transformer/output_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common import FromParams
 4 | 
 5 | from allennlp.modules.transformer.transformer_module import TransformerModule
 6 | from allennlp.modules.transformer.layer_norm import LayerNorm
 7 | 
 8 | 
 9 | class OutputLayer(TransformerModule, FromParams):
10 | 
11 |     _pretrained_mapping = {"LayerNorm": "layer_norm"}
12 | 
13 |     def __init__(self, input_size: int, hidden_size: int, dropout: float):
14 |         super().__init__()
15 |         self.dense = torch.nn.Linear(input_size, hidden_size)
16 |         self.layer_norm = LayerNorm(hidden_size, eps=1e-12)
17 |         self.dropout = torch.nn.Dropout(dropout)
18 | 
19 |     def get_output_dim(self) -> int:
20 |         return self.dense.out_features
21 | 
22 |     def forward(self, hidden_states, input_tensor):
23 |         dense_output = self.dense(hidden_states)
24 |         dropout_output = self.dropout(dense_output)
25 |         output = self.layer_norm(dropout_output + input_tensor)
26 |         return output
27 | 


--------------------------------------------------------------------------------
/allennlp/modules/transformer/transformer_pooler.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, TYPE_CHECKING
 2 | 
 3 | import torch
 4 | 
 5 | from allennlp.common import FromParams
 6 | from allennlp.modules.transformer.activation_layer import ActivationLayer
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from transformers.configuration_utils import PretrainedConfig
10 | 
11 | 
12 | class TransformerPooler(ActivationLayer, FromParams):
13 | 
14 |     _pretrained_relevant_module = ["pooler", "bert.pooler", "roberta.pooler"]
15 | 
16 |     def __init__(
17 |         self,
18 |         hidden_size: int,
19 |         intermediate_size: int,
20 |         activation: Union[str, torch.nn.Module] = "relu",
21 |     ):
22 |         super().__init__(hidden_size, intermediate_size, activation, pool=True)
23 | 
24 |     @classmethod
25 |     def _from_config(cls, config: "PretrainedConfig", **kwargs):
26 |         return cls(config.hidden_size, config.hidden_size, "tanh")  # BERT has this hardcoded
27 | 


--------------------------------------------------------------------------------
/allennlp/modules/util.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import torch
 3 | 
 4 | 
 5 | def replicate_layers(layer: torch.nn.Module, num_copies: int):
 6 |     """
 7 |     # Parameters
 8 |             layer (torch.nn.Module) - The torch layer that needs to be replicated.
 9 |             num_copies (int) - Number of copies to create.
10 | 
11 |     # Returns
12 |             A ModuleList that contains `num_copies` of the `layer`.
13 |     """
14 |     return torch.nn.ModuleList([deepcopy(layer) for _ in range(num_copies)])
15 | 


--------------------------------------------------------------------------------
/allennlp/modules/vision/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.modules.vision.grid_embedder import GridEmbedder, ResnetBackbone
 2 | from allennlp.modules.vision.image2image import (
 3 |     Image2ImageModule,
 4 |     NormalizeImage,
 5 | )
 6 | from allennlp.modules.vision.region_detector import (
 7 |     RegionDetector,
 8 |     FasterRcnnRegionDetector,
 9 | )
10 | 


--------------------------------------------------------------------------------
/allennlp/modules/vision/image2image.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, FloatTensor, IntTensor
 3 | from typing import List
 4 | 
 5 | from allennlp.common.registrable import Registrable
 6 | 
 7 | 
 8 | class Image2ImageModule(nn.Module, Registrable):
 9 |     """
10 |     An `Image2ImageModule` takes a batch of images as a tensor with the dimensions
11 |     `(batch_size, color_channels, height, width)`, and returns a tensor in the same format,
12 |     after applying some transformation on the images.
13 |     """
14 | 
15 |     def forward(self, images: FloatTensor, sizes: IntTensor):
16 |         raise NotImplementedError()
17 | 
18 | 
19 | @Image2ImageModule.register("normalize")
20 | class NormalizeImage(Image2ImageModule):
21 |     """
22 |     Normalizes an image by subtracting the mean and dividing by the
23 |     standard deviation, separately for each channel.
24 |     """
25 | 
26 |     def __init__(self, means: List[float], stds: List[float]):
27 |         super().__init__()
28 |         assert len(means) == len(stds)
29 |         self.means = torch.tensor(means, dtype=torch.float32)
30 |         self.stds = torch.tensor(stds, dtype=torch.float32)
31 | 
32 |     def forward(self, images: FloatTensor, sizes: IntTensor):
33 |         assert images.size(1) == self.means.size(0)
34 |         self.means = self.means.to(images.device)
35 |         self.stds = self.stds.to(images.device)
36 |         images = images.transpose(1, -1)
37 |         images = images - self.means
38 |         images = images / self.stds
39 |         return images.transpose(-1, 1)
40 | 


--------------------------------------------------------------------------------
/allennlp/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.nn.module import Module
2 | from allennlp.nn.activations import Activation
3 | from allennlp.nn.initializers import Initializer, InitializerApplicator
4 | from allennlp.nn.regularizers import RegularizerApplicator
5 | 


--------------------------------------------------------------------------------
/allennlp/nn/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.nn.checkpoint.checkpoint_wrapper import CheckpointWrapper, TorchCheckpointWrapper
2 | from allennlp.nn.checkpoint.fairscale_checkpoint_wrapper import FairScaleCheckpointWrapper
3 | 


--------------------------------------------------------------------------------
/allennlp/nn/checkpoint/fairscale_checkpoint_wrapper.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from fairscale.nn.checkpoint import checkpoint_wrapper
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | from allennlp.nn.checkpoint.checkpoint_wrapper import CheckpointWrapper
 8 | 
 9 | 
10 | @CheckpointWrapper.register("fairscale")
11 | class FairScaleCheckpointWrapper(CheckpointWrapper):
12 |     """
13 |     Provides [FairScale]
14 |     (https://fairscale.readthedocs.io/en/latest/api/nn/checkpoint/checkpoint_activations.html)'s
15 |     activation/gradient checkpointing functionality.
16 | 
17 |     The parameters and their defaults are the same as they are in FairScale, and
18 |     any of them can be overriden on a per-module basis by passing the corresponding parameter
19 |     to `.wrap_module()`.
20 | 
21 |     This can also be used in conjunction with the
22 |     :class:`allennlp.nn.parallel.fairscale_fsdp_accelerator.FairScaleFsdpAccelerator`.
23 |     See the [T5 implementation](/api/modules/transformer/t5/) for an example
24 |     of how to use the two together.
25 |     """
26 | 
27 |     def __init__(self, offload_to_cpu: Optional[bool] = True) -> None:
28 |         self._offload_to_cpu = offload_to_cpu
29 | 
30 |     def wrap_module(
31 |         self,
32 |         module: nn.Module,
33 |         **kwargs,
34 |     ) -> nn.Module:
35 |         if "offload_to_cpu" not in kwargs and self._offload_to_cpu is not None:
36 |             kwargs["offload_to_cpu"] = self._offload_to_cpu
37 |         return checkpoint_wrapper(module, **kwargs)
38 | 


--------------------------------------------------------------------------------
/allennlp/nn/parallel/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.nn.parallel.sharded_module_mixin import ShardedModuleMixin
 2 | from allennlp.nn.parallel.ddp_accelerator import (
 3 |     DdpAccelerator,
 4 |     DdpWrappedModel,
 5 |     TorchDdpAccelerator,
 6 | )
 7 | from allennlp.nn.parallel.fairscale_fsdp_accelerator import (
 8 |     FairScaleFsdpAccelerator,
 9 |     FairScaleFsdpWrappedModel,
10 | )
11 | 


--------------------------------------------------------------------------------
/allennlp/nn/parallel/sharded_module_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ShardedModuleMixin:
 5 |     """
 6 |     Mixin class for sharded data parallel wrappers. Subclasses should implement
 7 |     `get_original_module()` which returns a reference the original inner wrapped module.
 8 |     """
 9 | 
10 |     def get_original_module(self) -> torch.nn.Module:
11 |         """
12 |         Get the original
13 |         """
14 |         raise NotImplementedError
15 | 


--------------------------------------------------------------------------------
/allennlp/nn/regularizers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains classes representing regularization schemes
 3 | as well as a class for applying regularization to parameters.
 4 | """
 5 | 
 6 | from allennlp.nn.regularizers.regularizer import Regularizer
 7 | from allennlp.nn.regularizers.regularizers import L1Regularizer
 8 | from allennlp.nn.regularizers.regularizers import L2Regularizer
 9 | from allennlp.nn.regularizers.regularizer_applicator import RegularizerApplicator
10 | 


--------------------------------------------------------------------------------
/allennlp/nn/regularizers/regularizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common import Registrable
 4 | 
 5 | 
 6 | class Regularizer(Registrable):
 7 |     """
 8 |     An abstract class representing a regularizer. It must implement
 9 |     call, returning a scalar tensor.
10 |     """
11 | 
12 |     default_implementation = "l2"
13 | 
14 |     def __call__(self, parameter: torch.Tensor) -> torch.Tensor:
15 |         raise NotImplementedError
16 | 


--------------------------------------------------------------------------------
/allennlp/nn/regularizers/regularizer_applicator.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import List, Tuple
 3 | 
 4 | import torch
 5 | 
 6 | from allennlp.common import FromParams
 7 | from allennlp.nn.regularizers.regularizer import Regularizer
 8 | 
 9 | 
10 | class RegularizerApplicator(FromParams):
11 |     """
12 |     Applies regularizers to the parameters of a Module based on regex matches.
13 |     """
14 | 
15 |     def __init__(self, regexes: List[Tuple[str, Regularizer]] = None) -> None:
16 |         """
17 |         # Parameters
18 | 
19 |         regexes : `List[Tuple[str, Regularizer]]`, optional (default = `None`)
20 |             A sequence of pairs (regex, Regularizer), where each Regularizer
21 |             applies to the parameters its regex matches (and that haven't previously
22 |             been matched).
23 |         """
24 |         self._regularizers = regexes or []
25 | 
26 |     def __call__(self, module: torch.nn.Module) -> torch.Tensor:
27 |         """
28 |         # Parameters
29 | 
30 |         module : `torch.nn.Module`, required
31 |             The module to regularize.
32 |         """
33 |         accumulator = 0.0
34 |         for name, parameter in module.named_parameters():
35 |             # We first check if the parameter needs gradient updates or not
36 |             if parameter.requires_grad:
37 |                 # For each parameter find the first matching regex.
38 |                 for regex, regularizer in self._regularizers:
39 |                     if re.search(regex, name):
40 |                         penalty = regularizer(parameter)
41 |                         accumulator = accumulator + penalty
42 |                         break
43 |         return accumulator
44 | 


--------------------------------------------------------------------------------
/allennlp/nn/regularizers/regularizers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.nn.regularizers.regularizer import Regularizer
 4 | 
 5 | 
 6 | @Regularizer.register("l1")
 7 | class L1Regularizer(Regularizer):
 8 |     """
 9 |     Represents a penalty proportional to the sum of the absolute values of the parameters
10 | 
11 |     Registered as a `Regularizer` with name "l1".
12 |     """
13 | 
14 |     def __init__(self, alpha: float = 0.01) -> None:
15 |         self.alpha = alpha
16 | 
17 |     def __call__(self, parameter: torch.Tensor) -> torch.Tensor:
18 |         return self.alpha * torch.sum(torch.abs(parameter))
19 | 
20 | 
21 | @Regularizer.register("l2")
22 | class L2Regularizer(Regularizer):
23 |     """
24 |     Represents a penalty proportional to the sum of squared values of the parameters
25 | 
26 |     Registered as a `Regularizer` with name "l2".
27 |     """
28 | 
29 |     def __init__(self, alpha: float = 0.01) -> None:
30 |         self.alpha = alpha
31 | 
32 |     def __call__(self, parameter: torch.Tensor) -> torch.Tensor:
33 |         return self.alpha * torch.sum(torch.pow(parameter, 2))
34 | 


--------------------------------------------------------------------------------
/allennlp/predictors/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A `Predictor` is
 3 | a wrapper for an AllenNLP `Model`
 4 | that makes JSON predictions using JSON inputs. If you
 5 | want to serve up a model through the web service
 6 | (or using `allennlp.commands.predict`), you'll need
 7 | a `Predictor` that wraps it.
 8 | """
 9 | from allennlp.predictors.predictor import Predictor
10 | from allennlp.predictors.sentence_tagger import SentenceTaggerPredictor
11 | from allennlp.predictors.text_classifier import TextClassifierPredictor
12 | from allennlp.predictors.multitask import MultiTaskPredictor
13 | 


--------------------------------------------------------------------------------
/allennlp/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/allennlp/py.typed


--------------------------------------------------------------------------------
/allennlp/sanity_checks/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.confidence_checks.verification_base import VerificationBase
 2 | from allennlp.confidence_checks.normalization_bias_verification import NormalizationBiasVerification
 3 | 
 4 | import warnings
 5 | 
 6 | warnings.warn(
 7 |     "Module 'sanity_checks' is deprecated, please use 'confidence_checks' instead.",
 8 |     DeprecationWarning,
 9 | )
10 | 


--------------------------------------------------------------------------------
/allennlp/sanity_checks/task_checklists/__init__.py:
--------------------------------------------------------------------------------
 1 | from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
 2 | from allennlp.confidence_checks.task_checklists.sentiment_analysis_suite import (
 3 |     SentimentAnalysisSuite,
 4 | )
 5 | from allennlp.confidence_checks.task_checklists.question_answering_suite import (
 6 |     QuestionAnsweringSuite,
 7 | )
 8 | from allennlp.confidence_checks.task_checklists.textual_entailment_suite import (
 9 |     TextualEntailmentSuite,
10 | )
11 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/.gitignore:
--------------------------------------------------------------------------------
1 | evalb
2 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/Makefile:
--------------------------------------------------------------------------------
1 | all: clean evalb
2 | 
3 | clean:
4 | 	rm -f evalb
5 | 
6 | evalb: evalb.c
7 | 	gcc -Wall -g -o evalb evalb.c
8 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/sample/sample.gld:
--------------------------------------------------------------------------------
 1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 2 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 3 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 4 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 5 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 6 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 7 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 8 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 9 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
10 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
11 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
12 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
13 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
14 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
15 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
16 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
19 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
21 | (S (A-SBJ-1 (P this)) (B-WHATEVER (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/sample/sample.tst:
--------------------------------------------------------------------------------
 1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 2 | (S (A (P this)) (B (Q is) (C (R a) (T test))))
 3 | (S (A (P this)) (B (Q is) (A (R a) (U test))))
 4 | (S (C (P this)) (B (Q is) (A (R a) (U test))))
 5 | (S (A (P this)) (B (Q is) (R a) (A (T test))))
 6 | (S (A (P this) (Q is)) (A (R a) (T test)))
 7 | (S (P this) (Q is) (R a) (T test))
 8 | (P this) (Q is) (R a) (T test)
 9 | (S (A (P this)) (B (Q is) (A (A (R a) (T test)))))
10 | (S (A (P this)) (B (Q is) (A (A (A (A (A (R a) (T test))))))))
11 | 
12 | (S (A (P this)) (B (Q was) (A (A (R a) (T test)))))
13 | (S (A (P this)) (B (Q is) (U not) (A (A (R a) (T test)))))
14 | 
15 | (TOP (S (A (P this)) (B (Q is) (A (R a) (T test)))))
16 | (S (A (P this)) (NONE *) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (S (NONE abc) (A (NONE *))) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (TT test))))
19 | (S (A (P This)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P That)) (B (Q is) (A (R a) (T test))))
21 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 | 


--------------------------------------------------------------------------------
/allennlp/tools/EVALB/tgrep_proc.prl:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/perl
 2 | 
 3 | while(<>)
 4 | {
 5 |     if(m/TOP/) #skip lines which are blank
 6 |     {
 7 | 	print;
 8 |     } 
 9 | }
10 | 


--------------------------------------------------------------------------------
/allennlp/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/allennlp/tools/__init__.py


--------------------------------------------------------------------------------
/allennlp/tools/inspect_cache.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from allennlp.common.file_utils import CACHE_DIRECTORY
 4 | from allennlp.common.file_utils import filename_to_url
 5 | 
 6 | 
 7 | def main():
 8 |     print(f"Looking for datasets in {CACHE_DIRECTORY}...")
 9 |     if not os.path.exists(CACHE_DIRECTORY):
10 |         print("Directory does not exist.")
11 |         print("No cached datasets found.")
12 | 
13 |     cached_files = os.listdir(CACHE_DIRECTORY)
14 | 
15 |     if not cached_files:
16 |         print("Directory is empty.")
17 |         print("No cached datasets found.")
18 | 
19 |     for filename in cached_files:
20 |         if not filename.endswith("json"):
21 |             url, etag = filename_to_url(filename)
22 |             print("Filename: %s" % filename)
23 |             print("Url: %s" % url)
24 |             print("ETag: %s" % etag)
25 |             print()
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------
/allennlp/training/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.training.checkpointer import Checkpointer
2 | from allennlp.training.no_op_trainer import NoOpTrainer
3 | from allennlp.training.callbacks import TrainerCallback
4 | from allennlp.training.trainer import Trainer
5 | from allennlp.training.gradient_descent_trainer import GradientDescentTrainer
6 | 


--------------------------------------------------------------------------------
/allennlp/training/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.training.callbacks.callback import TrainerCallback
2 | from allennlp.training.callbacks.console_logger import ConsoleLoggerCallback
3 | from allennlp.training.callbacks.confidence_checks import ConfidenceChecksCallback
4 | from allennlp.training.callbacks.tensorboard import TensorBoardCallback
5 | from allennlp.training.callbacks.track_epoch import TrackEpochCallback
6 | from allennlp.training.callbacks.wandb import WandBCallback
7 | from allennlp.training.callbacks.backward import MixedPrecisionBackwardCallback, OnBackwardException
8 | from allennlp.training.callbacks.should_validate import ShouldValidateCallback
9 | 


--------------------------------------------------------------------------------
/allennlp/training/callbacks/backward.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, TYPE_CHECKING
 2 | import torch
 3 | 
 4 | from allennlp.training.callbacks.callback import TrainerCallback
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from allennlp.training.gradient_descent_trainer import GradientDescentTrainer
 8 | 
 9 | 
10 | @TrainerCallback.register("mixed_precision_backward")
11 | class MixedPrecisionBackwardCallback(TrainerCallback):
12 |     """
13 |     Performs backpropagation for mixed precision training.
14 |     """
15 | 
16 |     def on_backward(
17 |         self,
18 |         trainer: "GradientDescentTrainer",
19 |         batch_outputs: Dict[str, torch.Tensor],
20 |         backward_called: bool,
21 |         **kwargs
22 |     ) -> bool:
23 |         if backward_called:
24 |             raise OnBackwardException()
25 |         trainer._scaler.scale(batch_outputs["loss"]).backward()  # type: ignore
26 |         return True
27 | 
28 | 
29 | class OnBackwardException(Exception):
30 |     """
31 |     The exception type raised if an `on_backward` callback
32 |     attempts to call `backward` when `backward_called` is `True`.
33 |     """
34 | 
35 |     def __init__(self, message="") -> None:
36 |         super().__init__(
37 |             "Backpropagation has already been performed"
38 |             "and the computation graph has been erased, so"
39 |             "calling `loss.backward` is not permitted. " + message
40 |         )
41 | 


--------------------------------------------------------------------------------
/allennlp/training/callbacks/track_epoch.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, TYPE_CHECKING
 2 | 
 3 | from allennlp.training.callbacks.callback import TrainerCallback
 4 | 
 5 | if TYPE_CHECKING:
 6 |     from allennlp.training.gradient_descent_trainer import GradientDescentTrainer
 7 | 
 8 | 
 9 | @TrainerCallback.register("track_epoch_callback")
10 | class TrackEpochCallback(TrainerCallback):
11 |     """
12 |     A callback that you can pass to the `GradientDescentTrainer` to access the current epoch number
13 |     in your model during training. This callback sets `model.epoch`, which can be read inside of
14 |     `model.forward()`. We set `model.epoch = epoch + 1` which now denotes the number of
15 |     completed epochs at a given training state.
16 |     """
17 | 
18 |     def on_start(
19 |         self, trainer: "GradientDescentTrainer", is_primary: bool = True, **kwargs
20 |     ) -> None:
21 |         super().on_start(trainer, is_primary)
22 |         trainer.model.epoch = 0  # type: ignore[assignment]
23 | 
24 |     def on_epoch(
25 |         self,
26 |         trainer: "GradientDescentTrainer",
27 |         metrics: Dict[str, Any],
28 |         epoch: int,
29 |         is_primary: bool = True,
30 |         **kwargs,
31 |     ) -> None:
32 |         trainer.model.epoch = epoch + 1  # type: ignore[assignment]
33 | 


--------------------------------------------------------------------------------
/allennlp/training/metrics/average.py:
--------------------------------------------------------------------------------
 1 | from allennlp.training.metrics.metric import Metric
 2 | from allennlp.nn.util import dist_reduce_sum
 3 | 
 4 | 
 5 | @Metric.register("average")
 6 | class Average(Metric):
 7 |     """
 8 |     This [`Metric`](./metric.md) breaks with the typical `Metric` API and just stores values that were
 9 |     computed in some fashion outside of a `Metric`.  If you have some external code that computes
10 |     the metric for you, for instance, you can use this to report the average result using our
11 |     `Metric` API.
12 |     """
13 | 
14 |     def __init__(self) -> None:
15 |         self._total_value = 0.0
16 |         self._count = 0
17 | 
18 |     def __call__(self, value):
19 |         """
20 |         # Parameters
21 | 
22 |         value : `float`
23 |             The value to average.
24 |         """
25 |         self._count += dist_reduce_sum(1)
26 |         self._total_value += dist_reduce_sum(float(list(self.detach_tensors(value))[0]))
27 | 
28 |     def get_metric(self, reset: bool = False):
29 |         """
30 |         # Returns
31 | 
32 |         The average of all values that were passed to `__call__`.
33 |         """
34 | 
35 |         average_value = self._total_value / self._count if self._count > 0 else 0.0
36 |         if reset:
37 |             self.reset()
38 |         return float(average_value)
39 | 
40 |     def reset(self):
41 |         self._total_value = 0.0
42 |         self._count = 0
43 | 


--------------------------------------------------------------------------------
/allennlp/training/metrics/perplexity.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from allennlp.training.metrics.average import Average
 4 | from allennlp.training.metrics.metric import Metric
 5 | 
 6 | 
 7 | @Metric.register("perplexity")
 8 | class Perplexity(Average):
 9 |     """
10 |     Perplexity is a common metric used for evaluating how well a language model
11 |     predicts a sample.
12 | 
13 |     Notes
14 |     -----
15 |     Assumes negative log likelihood loss of each batch (base e). Provides the
16 |     average perplexity of the batches.
17 |     """
18 | 
19 |     def get_metric(self, reset: bool = False):
20 |         """
21 |         # Returns
22 | 
23 |         The accumulated perplexity.
24 |         """
25 |         average_loss = super().get_metric(reset)
26 |         if average_loss == 0:
27 |             return 0.0
28 | 
29 |         # Exponentiate the loss to compute perplexity
30 |         return math.exp(average_loss)
31 | 


--------------------------------------------------------------------------------
/allennlp/training/momentum_schedulers/__init__.py:
--------------------------------------------------------------------------------
1 | from allennlp.training.momentum_schedulers.momentum_scheduler import MomentumScheduler
2 | from allennlp.training.momentum_schedulers.inverted_triangular import InvertedTriangular
3 | 


--------------------------------------------------------------------------------
/allennlp/training/momentum_schedulers/momentum_scheduler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common.registrable import Registrable
 4 | from allennlp.training.scheduler import Scheduler
 5 | 
 6 | 
 7 | class MomentumScheduler(Scheduler, Registrable):
 8 |     def __init__(self, optimizer: torch.optim.Optimizer, last_epoch: int = -1) -> None:
 9 |         super().__init__(optimizer, "momentum", last_epoch)
10 | 
11 |     def get_values(self) -> None:
12 |         raise NotImplementedError
13 | 


--------------------------------------------------------------------------------
/allennlp/version.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | _MAJOR = "2"
 4 | _MINOR = "10"
 5 | # On main and in a nightly release the patch should be one ahead of the last
 6 | # released build.
 7 | _PATCH = "1"
 8 | # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 9 | # https://semver.org/#is-v123-a-semantic-version for the semantics.
10 | _SUFFIX = os.environ.get("ALLENNLP_VERSION_SUFFIX", "")
11 | 
12 | VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
13 | VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
14 | 


--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/benchmarks/__init__.py


--------------------------------------------------------------------------------
/benchmarks/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/benchmarks/data/__init__.py


--------------------------------------------------------------------------------
/benchmarks/data/tokenizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/benchmarks/data/tokenizers/__init__.py


--------------------------------------------------------------------------------
/benchmarks/data/tokenizers/character_tokenizer_bench.py:
--------------------------------------------------------------------------------
 1 | from allennlp.data.tokenizers import CharacterTokenizer
 2 | 
 3 | 
 4 | tokenizer = CharacterTokenizer()
 5 | passage = (
 6 |     "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
 7 |     "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis "
 8 |     "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. "
 9 |     "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu "
10 |     "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in "
11 |     "culpa qui officia deserunt mollit anim id est laborum."
12 | )
13 | 
14 | 
15 | def bench_character_tokenizer(benchmark):
16 |     benchmark(tokenizer.tokenize, passage)
17 | 


--------------------------------------------------------------------------------
/benchmarks/nn/util_bench.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.nn import util
 4 | from allennlp.common.testing import requires_gpu
 5 | 
 6 | 
 7 | @requires_gpu
 8 | def bench_add_sentence_boundary_token_ids(benchmark):
 9 |     device = torch.device("cuda")
10 |     # shape: (32, 50)
11 |     tensor = torch.tensor([[3] * 50] * 32, device=device)
12 |     # shape: (32, 50)
13 |     mask = torch.tensor([[True] * 50, [True] * 30 + [False] * 20] * 16, device=device)
14 |     begin_token = 1
15 |     end_token = 2
16 |     benchmark(util.add_sentence_boundary_token_ids, tensor, mask, begin_token, end_token)
17 | 
18 | 
19 | @requires_gpu
20 | def bench_remove_sentence_boundaries(benchmark):
21 |     device = torch.device("cuda")
22 |     # shape: (32, 50, 1)
23 |     tensor = torch.tensor([[3] * 50] * 32, device=device).unsqueeze(-1)
24 |     # shape: (32, 50)
25 |     mask = torch.tensor([[True] * 50, [True] * 30 + [False] * 20] * 16, device=device)
26 |     benchmark(util.remove_sentence_boundaries, tensor, mask)
27 | 
28 | 
29 | @requires_gpu
30 | def bench_create_tensor_then_send_to_device(benchmark):
31 |     device = torch.device("cuda:0")
32 | 
33 |     def create_tensor():
34 |         return torch.rand((32, 50)).to(device)
35 | 
36 |     benchmark(create_tensor)
37 | 
38 | 
39 | @requires_gpu
40 | def bench_create_tensor_directly_on_device(benchmark):
41 |     device = torch.device("cuda:0")
42 | 
43 |     def create_tensor():
44 |         return torch.rand((32, 50), device=device)
45 | 
46 |     benchmark(create_tensor)
47 | 


--------------------------------------------------------------------------------
/benchmarks/pytest.ini:
--------------------------------------------------------------------------------
 1 | # We use pytest to run benchmarks, which is weird, but so far the best benchmarking
 2 | # framework we've found is only available as a pytest plugin.
 3 | # That said, we like to organize our benchmarks seperately and with different naming
 4 | # conventions from our tests, which requires using a seperate pytest configuration.
 5 | [pytest]
 6 | python_files = *_bench.py
 7 | python_functions = bench_* *_bench
 8 | python_classes = 
 9 | markers =
10 |     gpu: marks tests that need at least one GPU
11 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   precision: 0
 3 |   round: down
 4 |   status:
 5 |     patch:
 6 |       default:
 7 |         target: 90
 8 |         informational: true
 9 |     project:
10 |       default:
11 |         threshold: 1%
12 |         informational: true
13 |     changes: false
14 | comment: false
15 | ignore:
16 |   - "tests/"
17 | github_checks:
18 |   # TODO(epwalsh): re-enable if there is a way to only enable annotations
19 |   # on diff lines for a PR.
20 |   annotations: false
21 | 


--------------------------------------------------------------------------------
/constraints.txt:
--------------------------------------------------------------------------------
 1 | ################################
 2 | ###### Core dependencies #######
 3 | ################################
 4 | torch<1.13.0
 5 | torchvision<0.14.0
 6 | cached-path<1.2.0
 7 | spacy<3.4
 8 | transformers<4.21
 9 | filelock<3.8
10 | wandb<0.13.0
11 | 
12 | # Protobuf is a dependency of wandb and tensorboard, but they are missing this pin.
13 | protobuf<4.0.0
14 | 
15 | # Required so pip-compile can properly resolve the pydantic version
16 | inflect<6.0
17 | 
18 | ##################################################
19 | ###### Extra dependencies for integrations #######
20 | ##################################################
21 | # NOTE: we use a special trailing comment on each line to denote which extras
22 | # each package is needed by. For example, checklist is needed by the 'checklist' extra
23 | # that you install with 'pip install allennlp[checklist]'.
24 | checklist==0.0.11  # needed by: checklist
25 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | #### TESTING-RELATED PACKAGES ####
 2 | 
 3 | # Checks style, syntax, and other useful errors.
 4 | flake8>=4.0.1
 5 | 
 6 | # Static type checking
 7 | mypy==0.961
 8 | 
 9 | # Automatic code formatting
10 | black==22.6.0
11 | 
12 | # Allows generation of coverage reports with pytest.
13 | pytest-cov>=3.0.0
14 | 
15 | # Allows codecov to generate coverage reports
16 | coverage[toml]>=6.4
17 | codecov>=2.1.12
18 | 
19 | # Optional dependencies, which we install for testing purposes.
20 | matplotlib>=2.2.3
21 | 
22 | # For mocking HTTP requests/responses.
23 | responses>=0.21
24 | 
25 | # For running tests that aren't 100% reliable.
26 | flaky>=3.7.0
27 | 
28 | # For running benchmarks.
29 | pytest-benchmark>=3.4.1
30 | 
31 | #### DOC-RELATED PACKAGES ####
32 | 
33 | # YAML manipulation
34 | ruamel.yaml>=0.17.17
35 | 
36 | # Generating markdown files from Python modules.
37 | pydoc-markdown<4.4.0
38 | databind.core<=1.5.3
39 | databind-json<=1.5.3
40 | docspec<1.2.0,>1.0.1
41 | docspec-python<1.2.0,>1.0.1
42 | 
43 | mkdocs==1.3.0
44 | mkdocs-material>=5.5.0,<8.4.0
45 | markdown-include==0.6.0
46 | 
47 | # Narrowing constraints
48 | pymdown-extensions>=9.5
49 | 
50 | #### PACKAGE-UPLOAD PACKAGES ####
51 | 
52 | # Pypi uploads
53 | twine>=1.11.0,<5.0.0
54 | setuptools
55 | wheel
56 | 


--------------------------------------------------------------------------------
/docs/css/extra.css:
--------------------------------------------------------------------------------
 1 | h4 {
 2 |     font-size: 0.9rem !important;
 3 |     font-weight: 400 !important;
 4 |     margin-top: 1.2em !important;
 5 | }
 6 | 
 7 | h2, h3, h4 {
 8 |     color: #213744;
 9 | }
10 | 
11 | .alignleft {
12 | 	float: left;
13 | }
14 | 
15 | .alignright {
16 | 	float: right;
17 | }
18 | 
19 | a.sourcelink {
20 |     color: #888;
21 | }
22 | 


--------------------------------------------------------------------------------
/docs/img/allennlp-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/docs/img/allennlp-logo-dark.png


--------------------------------------------------------------------------------
/docs/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/docs/img/favicon.ico


--------------------------------------------------------------------------------
/mkdocs-skeleton.yml:
--------------------------------------------------------------------------------
 1 | site_name: AllenNLP
 2 | site_description: AllenNLP is a ..
 3 | site_url: https://allennlp.org/
 4 | 
 5 | extra_css:
 6 |   - "css/extra.css"
 7 | 
 8 | theme:
 9 |   name: material
10 |   palette:
11 |     primary: blue
12 |     accent: grey
13 |   logo: img/favicon.ico
14 |   favicon: img/favicon.ico
15 |   highlightjs: true
16 |   hljs_languages:
17 |   - python
18 |   - typescript
19 |   - json
20 | 
21 | 
22 | repo_name: allenai/allennlp
23 | # TODO(markn): Consider adding GA here, if we care about it.
24 | 
25 | nav:
26 | - Home: README.md
27 | - Repository: https://github.com/allenai/allennlp
28 | - Versions:
29 |   - Latest: /latest/
30 |   - Stable: /stable/
31 |   - Commit: /main/
32 | - API: 'This section is autogenerated, do not edit.'
33 | - Contributing: CONTRIBUTING.md
34 | - CHANGELOG: CHANGELOG.md
35 | - License: https://raw.githubusercontent.com/allenai/allennlp/main/LICENSE
36 | 
37 | markdown_extensions:
38 | - toc:
39 |     permalink: true
40 |     toc_depth: 3
41 | - markdown.extensions.codehilite:
42 |     guess_lang: true
43 | - admonition
44 | - codehilite
45 | - extra
46 | - pymdownx.highlight
47 | - pymdownx.superfences
48 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = true
3 | no_site_packages = true
4 | 
5 | [mypy-tests.*]
6 | strict_optional = false
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 100
 3 | 
 4 | include = '\.pyi?$'
 5 | 
 6 | exclude = '''
 7 | (
 8 |       __pycache__
 9 |     | \btutorials\b
10 |     | \bbuild\b
11 |     | \.git
12 |     | \.mypy_cache
13 |     | \.pytest_cache
14 |     | \.vscode
15 |     | \.venv
16 |     | \bdist\b
17 |     | \bdoc\b
18 | )
19 | '''
20 | 
21 | [build-system]
22 | requires = ["setuptools", "wheel"]
23 | build-backend = "setuptools.build_meta"
24 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | -c constraints.txt
2 | -r dev-requirements.txt
3 | -r requirements.txt
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ################################
 2 | ###### Core dependencies #######
 3 | ################################
 4 | torch>=1.10.0
 5 | torchvision>=0.8.1
 6 | cached-path>=1.1.3
 7 | fairscale==0.4.6
 8 | jsonnet>=0.10.0 ; sys.platform != 'win32'
 9 | nltk>=3.6.5
10 | spacy>=2.1.0
11 | numpy>=1.21.4
12 | tensorboardX>=1.2
13 | requests>=2.28
14 | tqdm>=4.62
15 | h5py>=3.6.0
16 | scikit-learn>=1.0.1
17 | scipy>=1.7.3
18 | pytest>=6.2.5
19 | transformers>=4.1
20 | sentencepiece>=0.1.96
21 | dataclasses;python_version<'3.7'
22 | filelock>=3.3
23 | lmdb>=1.2.1
24 | more-itertools>=8.12.0
25 | termcolor==1.1.0
26 | wandb>=0.10.0
27 | huggingface_hub>=0.0.16
28 | dill>=0.3.4
29 | base58>=2.1.1
30 | 
31 | # sacremoses should be a dependency of transformers, but it is missing, so we add it manually.
32 | sacremoses
33 | 
34 | # Spacy depends on typer, and typer had a bug. This is how we make sure we get the fixed version of typer.
35 | typer>=0.4.1
36 | 
37 | # Protobuf is a dependency of wandb and tensorboard, but they are missing this pin.
38 | protobuf>=3.12.0
39 | 
40 | # We need this for building the Docker image
41 | traitlets>5.1.1
42 | 
43 | ##################################################
44 | ###### Extra dependencies for integrations #######
45 | ##################################################
46 | # NOTE: we use a special trailing comment on each line to denote which extras
47 | # each package is needed by. For example, checklist is needed by the 'checklist' extra
48 | # that you install with 'pip install allennlp[checklist]'.
49 | checklist>=0.0.11  # needed by: checklist
50 | 


--------------------------------------------------------------------------------
/scripts/24hr_diff.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # A small script which checks if there have been any commits in the past 24 hours.
4 | 
5 | if [[ $(git whatchanged --since 'one day ago') ]]; then
6 |   exit 0
7 | fi
8 | exit 1
9 | 


--------------------------------------------------------------------------------
/scripts/ai2_internal/resumable_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Dispatches to allennlp train. Recovers if the serialization directory is
 4 | # found and is non-empty, trains from scratch otherwise.
 5 | #
 6 | # Usage:
 7 | # resumable_train.sh serialization_dir [train_arg ...]
 8 | 
 9 | serialization_dir=$1
10 | shift
11 | 
12 | # If $serialization_dir exists and is non-empty we are resuming
13 | if [ -d $serialization_dir ] && [ "$(ls -A $serialization_dir)" ]; then
14 |     echo "Recovering state from $serialization_dir"
15 |     allennlp train -r -s $serialization_dir $@
16 | else
17 |     echo "No recovery state found. Starting from scratch."
18 |     allennlp train -s $serialization_dir $@
19 | fi
20 | 
21 | 


--------------------------------------------------------------------------------
/scripts/build_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -Eeuo pipefail
4 | 
5 | make clean
6 | make build-docs
7 | 


--------------------------------------------------------------------------------
/scripts/check_large_files.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # if any command inside script returns error, exit and return that error 
 4 | set -e
 5 | 
 6 | # magic line to ensure that we're always inside the root of our application,
 7 | # no matter from which directory we'll run script
 8 | # thanks to it we can just enter `./scripts/run-tests.bash`
 9 | cd "${0%/*}/.."
10 | 
11 | # let's fake failing test for now 
12 | echo "Running tests"
13 | echo "............................" 
14 | 
15 | SIZE=$1
16 | 
17 | # Get the current branch.
18 | # branch=$(git branch | grep \* | cut -d ' ' -f2)
19 | declare -a large_files=()
20 | # Get all changed files (compared to main branch) 
21 | for path in $(git diff --name-only main | sed -e 's/A[[:space:]]//'); 
22 | do
23 |      # Check to see if any sizes are greater than 2MB
24 |     large_files+=($(du -m $path | awk -v size="$SIZE" '{if ($1 > size) print $2}'))
25 | done
26 | 
27 | # Result
28 | if [ ${#large_files[@]} -gt 0 ];
29 | then
30 |     # Display result
31 |     echo "Found ${#large_files[@]} files have size bigger than "$SIZE" MB" 
32 |     echo "--------------------------------------------------------"
33 |     for file in ${large_files[@]};
34 |     do 
35 |         echo $file
36 |     done
37 |     echo "--------------------------------------------------------"
38 |     echo "Please reduce file size before commit."
39 |     echo "Failed to commit!" && exit 1
40 | else
41 |     echo "Passed" && exit 0
42 | fi
43 | 


--------------------------------------------------------------------------------
/scripts/close_stale_issues.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime as dt
 2 | import os
 3 | 
 4 | from github import Github
 5 | 
 6 | 
 7 | LABELS_TO_EXEMPT = ["contributions welcome", "merge when ready", "under development", "help wanted"]
 8 | 
 9 | 
10 | def main():
11 |     g = Github(os.environ["GITHUB_TOKEN"])
12 |     repo = g.get_repo("allenai/allennlp")
13 |     open_issues = repo.get_issues(state="open")
14 | 
15 |     for issue in open_issues:
16 |         if (
17 |             issue.milestone is None
18 |             and not issue.assignees
19 |             and issue.pull_request is None
20 |             and (dt.utcnow() - issue.updated_at).days > 7
21 |             and (dt.utcnow() - issue.created_at).days >= 14
22 |             and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
23 |         ):
24 |             print("Closing", issue)
25 |             issue.create_comment(
26 |                 "This issue is being closed due to lack of activity. "
27 |                 "If you think it still needs to be addressed, please comment on this thread 👇"
28 |             )
29 |             issue.add_to_labels("stale")
30 |             issue.edit(state="closed")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/scripts/get_version.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | from typing import Dict
 5 | 
 6 | import requests
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument("version_type", choices=["stable", "latest", "current"])
12 |     return parser.parse_args()
13 | 
14 | 
15 | def get_current_version() -> str:
16 |     VERSION: Dict[str, str] = {}
17 |     with open("allennlp/version.py", "r") as version_file:
18 |         exec(version_file.read(), VERSION)
19 |     return "v" + VERSION["VERSION"]
20 | 
21 | 
22 | def get_latest_version() -> str:
23 |     resp = requests.get("https://api.github.com/repos/allenai/allennlp/tags")
24 |     return resp.json()[0]["name"]
25 | 
26 | 
27 | def get_stable_version() -> str:
28 |     resp = requests.get("https://api.github.com/repos/allenai/allennlp/releases/latest")
29 |     return resp.json()["tag_name"]
30 | 
31 | 
32 | def main() -> None:
33 |     opts = parse_args()
34 |     if opts.version_type == "stable":
35 |         print(get_stable_version())
36 |     elif opts.version_type == "latest":
37 |         print(get_latest_version())
38 |     elif opts.version_type == "current":
39 |         print(get_current_version())
40 |     else:
41 |         raise NotImplementedError
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | 


--------------------------------------------------------------------------------
/scripts/ping_issue_assignees.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime as dt
 2 | import os
 3 | 
 4 | from github import Github
 5 | 
 6 | 
 7 | def main():
 8 |     g = Github(os.environ["GITHUB_TOKEN"])
 9 |     repo = g.get_repo("allenai/allennlp")
10 |     open_issues = repo.get_issues(state="open")
11 | 
12 |     for issue in open_issues:
13 |         if (
14 |             issue.milestone is None
15 |             and issue.assignees
16 |             and issue.pull_request is None
17 |             and (dt.utcnow() - issue.updated_at).days >= 14
18 |         ):
19 |             assignees = ", ".join([f"@{user.login}" for user in issue.assignees])
20 |             print(f"Pinging {assignees} for {issue}")
21 |             issue.create_comment(
22 |                 f"{assignees} this is just a friendly ping to make sure you "
23 |                 "haven't forgotten about this issue 😜"
24 |             )
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/test_fixtures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/__init__.py


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/common.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset_reader": {
 3 |         "type": "text_classification_json",
 4 |         "tokenizer": {
 5 |             "type": "spacy"
 6 |         },
 7 |         "token_indexers": {
 8 |             "tokens": {
 9 |                 "type": "single_id",
10 |                 "namespace": "tokens",
11 |                 "lowercase_tokens": true
12 |             }
13 |         },
14 |         "max_sequence_length": 400
15 |     },
16 |     "train_data_path": "test_fixtures/data/text_classification_json/imdb_corpus.jsonl",
17 |     "validation_data_path": "test_fixtures/data/text_classification_json/imdb_corpus.jsonl",
18 |     "data_loader": {
19 |         "batch_sampler": {
20 |             "type": "bucket",
21 |             "batch_size": 5
22 |         },
23 |     },
24 |     "trainer": {
25 |         "optimizer": {
26 |             "type": "adam",
27 |             "lr": 0.001
28 |         },
29 |         "validation_metric": "+accuracy",
30 |         "num_epochs": 3,
31 |         "grad_norm": 10.0,
32 |         "patience": 5,
33 |         "cuda_device": -1
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/embedding_with_trainable_is_false/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/basic_classifier/embedding_with_trainable_is_false/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/experiment_from_archive.jsonnet:
--------------------------------------------------------------------------------
 1 | local COMMON = import 'common.jsonnet';
 2 | 
 3 | {
 4 |     "dataset_reader": COMMON['dataset_reader'],
 5 |     "datasets_for_vocab_creation": ["train"],
 6 |     "train_data_path": "test_fixtures/data/text_classification_json/ag_news_corpus_fake_sentiment_labels.jsonl",
 7 |     "validation_data_path": "test_fixtures/data/text_classification_json/ag_news_corpus_fake_sentiment_labels.jsonl",
 8 |     "model": {
 9 |         "type": "from_archive",
10 |         "archive_file": "test_fixtures/basic_classifier/serialization/model.tar.gz",
11 |     },
12 |     "data_loader": COMMON['data_loader'],
13 |     "trainer": COMMON['trainer'],
14 | }
15 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/experiment_seq2seq.jsonnet:
--------------------------------------------------------------------------------
 1 | local COMMON = import 'common.jsonnet';
 2 | 
 3 | {
 4 |     "dataset_reader": COMMON['dataset_reader'],
 5 |     "datasets_for_vocab_creation": ["train"],
 6 |     "train_data_path": COMMON['train_data_path'],
 7 |     "validation_data_path": COMMON['validation_data_path'],
 8 |     "model": {
 9 |         "type": "basic_classifier",
10 |         "text_field_embedder": {
11 |             "token_embedders": {
12 |                 "tokens": {
13 |                     "type": "embedding",
14 |                     "embedding_dim": 10,
15 |                     "trainable": true
16 |                 }
17 |             }
18 |         },
19 |         "seq2seq_encoder": {
20 |             "type": "lstm",
21 |             "num_layers": 1,
22 |             "bidirectional": false,
23 |             "input_size": 10,
24 |             "hidden_size": 16
25 |         },
26 |         "seq2vec_encoder": {
27 |             "type": "bag_of_embeddings",
28 |             "embedding_dim": 16,
29 |             "averaged": true
30 |         },
31 |         "feedforward": {
32 |             "input_dim": 16,
33 |             "num_layers": 1,
34 |             "hidden_dims": 20,
35 |             "activations": "relu",
36 |             "dropout": 0.1
37 |       }
38 |     },
39 |     "data_loader": COMMON['data_loader'],
40 |     "trainer": COMMON['trainer']
41 | }
42 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/experiment_seq2vec.jsonnet:
--------------------------------------------------------------------------------
 1 | local COMMON = import 'common.jsonnet';
 2 | 
 3 | {
 4 |     "dataset_reader": COMMON['dataset_reader'],
 5 |     "datasets_for_vocab_creation": ["train"],
 6 |     "train_data_path": COMMON['train_data_path'], 
 7 |     "validation_data_path": COMMON['train_data_path'],
 8 |     "model": {
 9 |         "type": "basic_classifier",
10 |         "text_field_embedder": {
11 |             "token_embedders": {
12 |                 "tokens": {
13 |                     "type": "embedding",
14 |                     "embedding_dim": 10,
15 |                     "trainable": true
16 |                 }
17 |             }
18 |         },
19 |         "seq2vec_encoder": {
20 |            "type": "cnn",
21 |            "num_filters": 8,
22 |            "embedding_dim": 10,
23 |            "output_dim": 16
24 |         }
25 |     },
26 |     "data_loader": COMMON['data_loader'],
27 |     "trainer": COMMON['trainer'],
28 | }
29 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/from_archive_serialization/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/basic_classifier/from_archive_serialization/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/parameters_inspection.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_classification_layer": {
 3 |         "bias": "tunable",
 4 |         "weight": "tunable"
 5 |     },
 6 |     "_feedforward": {
 7 |         "_linear_layers": {
 8 |             "0": {
 9 |                 "bias": "tunable",
10 |                 "weight": "tunable"
11 |             }
12 |         }
13 |     },
14 |     "_seq2seq_encoder": {
15 |         "_module": {
16 |             "bias_hh_l0": "tunable",
17 |             "bias_ih_l0": "tunable",
18 |             "weight_hh_l0": "tunable",
19 |             "weight_ih_l0": "tunable"
20 |         }
21 |     },
22 |     "_text_field_embedder": {
23 |         "token_embedder_tokens": {
24 |             "weight": "tunable"
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/serialization/best.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/basic_classifier/serialization/best.th


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/serialization/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/basic_classifier/serialization/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/serialization/vocabulary/labels.txt:
--------------------------------------------------------------------------------
1 | neg
2 | pos
3 | 


--------------------------------------------------------------------------------
/test_fixtures/basic_classifier/serialization/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *labels
2 | *tags
3 | 


--------------------------------------------------------------------------------
/test_fixtures/common/.gitignore:
--------------------------------------------------------------------------------
1 | /quote-tar-gz-extracted/
2 | 


--------------------------------------------------------------------------------
/test_fixtures/common/external_symlink.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/common/external_symlink.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/common/quote.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/common/quote.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/data/babi.txt:
--------------------------------------------------------------------------------
 1 | 1 Gertrude is a cat.
 2 | 2 Cats are afraid of sheep.
 3 | 3 Jessica is a sheep.
 4 | 4 Mice are afraid of wolves.
 5 | 5 Emily is a wolf.
 6 | 6 Winona is a mouse.
 7 | 7 Wolves are afraid of sheep.
 8 | 8 Sheep are afraid of wolves.
 9 | 9 What is Gertrude afraid of?	sheep	1 2
10 | 10 What is Winona afraid of?	wolf	4 6
11 | 11 What is Emily afraid of?	sheep	5 7
12 | 12 What is Jessica afraid of?	wolf	3 8
13 | 1 Mice are afraid of wolves.
14 | 2 Gertrude is a mouse.
15 | 3 Sheep are afraid of mice.
16 | 4 Winona is a cat.
17 | 5 Wolves are afraid of mice.
18 | 6 Emily is a sheep.
19 | 7 Jessica is a wolf.
20 | 8 Cats are afraid of mice.
21 | 9 What is Emily afraid of?	mouse	3 6
22 | 10 What is Winona afraid of?	mouse	4 8
23 | 11 What is Gertrude afraid of?	wolf	1 2
24 | 12 What is Jessica afraid of?	mouse	5 7
25 | 


--------------------------------------------------------------------------------
/test_fixtures/data/brown_corpus.txt:
--------------------------------------------------------------------------------
 1 | cats/N are/V animals/N ./N
 2 | 
 3 | 
 4 | dogs/N are/V animals/N ./N
 5 | 
 6 | 
 7 | snakes/N are/V animals/N ./N
 8 | 
 9 | 
10 | birds/N are/V animals/N ./N
11 | 


--------------------------------------------------------------------------------
/test_fixtures/data/conll2003.txt:
--------------------------------------------------------------------------------
 1 | -DOCSTART- -X- -X- O
 2 | 
 3 | U.N.	NNP	I-NP	I-ORG
 4 | official	NN	I-NP	O
 5 | Ekeus	NNP	I-NP	I-PER
 6 | heads	VBZ	I-VP	O
 7 | for	IN	I-PP	O
 8 | Baghdad	NNP	I-NP	I-LOC
 9 | .	.	O	O
10 | 
11 | -DOCSTART- -X- -X- O
12 | 
13 | AI2	NNP	I-NP	I-ORG
14 | engineer	NN	I-NP	O
15 | Joel	NNP	I-NP	I-PER
16 | lives	VBZ	I-VP	O
17 | in	IN	I-PP	O
18 | Seattle	NNP	I-NP	I-LOC
19 | .	.	O	O
20 | 


--------------------------------------------------------------------------------
/test_fixtures/data/images/COCO_train2014_000000458752.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/data/images/COCO_train2014_000000458752.jpg


--------------------------------------------------------------------------------
/test_fixtures/data/sequence_tagging.tsv:
--------------------------------------------------------------------------------
1 | cats###N	are###V	animals###N	.###N
2 | dogs###N	are###V	animals###N	.###N
3 | snakes###N	are###V	animals###N	.###N
4 | birds###N	are###V	animals###N	.###N
5 | horses###N	are###V	animals###N	.###N
6 | 


--------------------------------------------------------------------------------
/test_fixtures/data/shards/sequence_tagging_00.tsv:
--------------------------------------------------------------------------------
1 | cats###N	are###V	animals###N	.###N
2 | dogs###N	are###V	animals###N	.###N
3 | snakes###N	are###V	animals###N	.###N
4 | birds###N	are###V	animals###N	.###N
5 | 


--------------------------------------------------------------------------------
/test_fixtures/data/shards/sequence_tagging_01.tsv:
--------------------------------------------------------------------------------
1 | ferns###N	are###V	plants###N	.###N
2 | trees###N	are###V	plants###N	.###N
3 | flowers###N	are###V	plants###N	.###N
4 | vegetables###N	are###V	plants###N	.###N
5 | 


--------------------------------------------------------------------------------
/test_fixtures/data/shards/sequence_tagging_02.tsv:
--------------------------------------------------------------------------------
1 | cars###N	are###V	vehicles###N	.###N
2 | buses###N	are###V	vehicles###N	.###N
3 | planes###N	are###V	vehicles###N	.###N
4 | rockets###N	are###V	vehicles###N	.###N
5 | 


--------------------------------------------------------------------------------
/test_fixtures/data/text_classification_json/ag_news_corpus.jsonl:
--------------------------------------------------------------------------------
1 | {"label":2,"text":"Memphis Rout Still Stings for No. 14 Louisville; Coach Petrino Vows to Have Team Better Prepared. NASHVILLE, Tenn. Nov 3, 2004 - Louisville #39;s 30-point loss at home to Memphis last season is still a painful memory for the Cardinals.","headline":"Memphis Rout Still Stings for Louisville"}
2 | {"label":2,"text":"AP - Eli Manning has replaced Kurt Warner as the New York Giants' starting quarterback.","headline":"Manning Replaces Warner As Giants QB (AP)"}
3 | {"label":4,"text":"A conference dedicated to online journalism explores the effect blogs have on news reporting. Some say they draw attention to under-reported stories. Others struggle to establish the credibility enjoyed by professionals.","headline":"Do Blogs Change the News?"}
4 | 


--------------------------------------------------------------------------------
/test_fixtures/data/text_classification_json/ag_news_corpus_fake_sentiment_labels.jsonl:
--------------------------------------------------------------------------------
1 | {"label":"pos","text":"Memphis Rout Still Stings for No. 14 Louisville; Coach Petrino Vows to Have Team Better Prepared. NASHVILLE, Tenn. Nov 3, 2004 - Louisville #39;s 30-point loss at home to Memphis last season is still a painful memory for the Cardinals.","headline":"Memphis Rout Still Stings for Louisville"}
2 | {"label":"neg","text":"AP - Eli Manning has replaced Kurt Warner as the New York Giants' starting quarterback.","headline":"Manning Replaces Warner As Giants QB (AP)"}
3 | {"label":"pos","text":"A conference dedicated to online journalism explores the effect blogs have on news reporting. Some say they draw attention to under-reported stories. Others struggle to establish the credibility enjoyed by professionals.","headline":"Do Blogs Change the News?"}
4 | 


--------------------------------------------------------------------------------
/test_fixtures/data/text_classification_json/integer_labels.jsonl:
--------------------------------------------------------------------------------
1 | {"label": 0, "text": "This text has label 0"}
2 | {"label": 1, "text": "This text has label 1"}


--------------------------------------------------------------------------------
/test_fixtures/data/vocab.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/data/vocab.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/data/vocab.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/data/vocab.zip


--------------------------------------------------------------------------------
/test_fixtures/elmo/config/characters_token_embedder.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset_reader": {
 3 |       "type": "conll2003",
 4 |       "tag_label": "ner",
 5 |       "token_indexers": {
 6 |         "tokens": {
 7 |           "type": "single_id",
 8 |           "lowercase_tokens": true
 9 |         },
10 |         "elmo": {
11 |           "type": "elmo_characters"
12 |         }
13 |       }
14 |     },
15 |     "train_data_path": "test_fixtures/data/conll2003.txt",
16 |     "validation_data_path": "test_fixtures/data/conll2003.txt",
17 |     "model": {
18 |       "type": "simple_tagger",
19 |       "text_field_embedder": {
20 |           "token_embedders": {
21 |               "tokens": {
22 |                 "type": "embedding",
23 |                 "embedding_dim": 50
24 |               },
25 |               "elmo": {
26 |                 "type": "elmo_token_embedder",
27 |                 "options_file": "test_fixtures/elmo/options.json",
28 |                 "weight_file": "test_fixtures/elmo/lm_weights.hdf5"
29 |               }
30 |           }
31 |       },
32 |       "encoder": {
33 |               "type": "gru",
34 |               "input_size": 82,
35 |               "hidden_size": 25,
36 |               "num_layers": 2,
37 |               "dropout": 0.5,
38 |               "bidirectional": true
39 |       },
40 |       "regularizer": {
41 |         "regexes": [
42 |           ["transitions$", {"type": "l2", "alpha": 0.01}]
43 |         ]
44 |       }
45 |     },
46 |     "data_loader": {"batch_size": 32},
47 |     "trainer": {
48 |       "optimizer": "adam",
49 |       "num_epochs": 5,
50 |       "cuda_device": -1
51 |     }
52 |   }
53 | 


--------------------------------------------------------------------------------
/test_fixtures/elmo/elmo_token_embeddings.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/elmo/elmo_token_embeddings.hdf5


--------------------------------------------------------------------------------
/test_fixtures/elmo/lm_embeddings_0.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/elmo/lm_embeddings_0.hdf5


--------------------------------------------------------------------------------
/test_fixtures/elmo/lm_embeddings_1.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/elmo/lm_embeddings_1.hdf5


--------------------------------------------------------------------------------
/test_fixtures/elmo/lm_embeddings_2.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/elmo/lm_embeddings_2.hdf5


--------------------------------------------------------------------------------
/test_fixtures/elmo/lm_weights.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/elmo/lm_weights.hdf5


--------------------------------------------------------------------------------
/test_fixtures/elmo/options.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "lstm": {
 3 |     "cell_clip": 3,
 4 |     "use_skip_connections": true,
 5 |     "n_layers": 2,
 6 |     "proj_clip": 3,
 7 |     "projection_dim": 16,
 8 |     "dim": 64
 9 |   },
10 |   "char_cnn": {
11 |     "embedding": {
12 |       "dim": 4
13 |     },
14 |     "filters": [
15 |       [1, 4],
16 |       [2, 8],
17 |       [3, 16],
18 |       [4, 32],
19 |       [5, 64]
20 |     ],
21 |     "n_highway": 2,
22 |     "n_characters": 262,
23 |     "max_characters_per_token": 50,
24 |     "activation": "relu"
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt:
--------------------------------------------------------------------------------
 1 | If 0.798 0.817 0.213 0.501 0.712
 2 | you 0.723 0.626 0.850 0.024 0.715
 3 | think 0.143 0.189 0.555 0.361 0.472
 4 | are 0.095 0.023 0.760 0.773 0.501
 5 | too 0.424 0.834 0.341 0.550 0.250
 6 | small 0.072 0.154 0.410 0.436 0.417
 7 | to 0.510 0.358 0.086 0.459 0.024
 8 | make 0.878 0.651 0.044 0.264 0.872
 9 | a 0.267 0.036 0.937 0.782 0.331
10 | difference 0.053 0.162 0.671 0.110 0.259
11 | try 0.929 0.813 0.396 0.053 0.049
12 | sleeping 0.991 0.532 0.972 0.165 0.203
13 | with 0.042 0.408 0.231 0.294 0.237
14 | mosquito 0.017 0.479 0.909 0.488 0.296
15 | àèìòù 1.0 2.0 3.0 4.0 5.0
16 | 


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/fake_embeddings.5d.txt.bz2


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/fake_embeddings.5d.txt.gz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/fake_embeddings.5d.txt.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/fake_embeddings.5d.txt.xz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/fake_embeddings.5d.txt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/fake_embeddings.5d.txt.zip


--------------------------------------------------------------------------------
/test_fixtures/embeddings/glove.6B.100d.sample.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/glove.6B.100d.sample.txt.gz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/glove.6B.300d.sample.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/glove.6B.300d.sample.txt.gz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/multi-file-archive.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/multi-file-archive.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/embeddings/multi-file-archive.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/embeddings/multi-file-archive.zip


--------------------------------------------------------------------------------
/test_fixtures/fairness/definitional_pairs.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     [
 3 |         "woman",
 4 |         "man"
 5 |     ],
 6 |     [
 7 |         "girl",
 8 |         "boy"
 9 |     ],
10 |     [
11 |         "she",
12 |         "he"
13 |     ],
14 |     [
15 |         "mother",
16 |         "father"
17 |     ],
18 |     [
19 |         "daughter",
20 |         "son"
21 |     ],
22 |     [
23 |         "gal",
24 |         "guy"
25 |     ],
26 |     [
27 |         "female",
28 |         "male"
29 |     ],
30 |     [
31 |         "her",
32 |         "his"
33 |     ],
34 |     [
35 |         "herself",
36 |         "himself"
37 |     ],
38 |     [
39 |         "Mary",
40 |         "John"
41 |     ]
42 | ]


--------------------------------------------------------------------------------
/test_fixtures/plugins/.allennlp_plugins:
--------------------------------------------------------------------------------
1 | d
2 | 


--------------------------------------------------------------------------------
/test_fixtures/plugins/d/__init__.py:
--------------------------------------------------------------------------------
1 | from d.d import D
2 | 


--------------------------------------------------------------------------------
/test_fixtures/plugins/d/d.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from allennlp.commands import Subcommand
 4 | 
 5 | 
 6 | def do_nothing(_):
 7 |     pass
 8 | 
 9 | 
10 | @Subcommand.register("d")
11 | class D(Subcommand):
12 |     def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
13 |         subparser = parser.add_parser(self.name, description="fake", help="fake help")
14 |         subparser.set_defaults(func=do_nothing)
15 |         return subparser
16 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/experiment.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader":{"type":"sequence_tagging"},
 3 |   "train_data_path": "test_fixtures/data/sequence_tagging.tsv",
 4 |   "validation_data_path": "test_fixtures/data/sequence_tagging.tsv",
 5 |   "model": {
 6 |     "type": "simple_tagger",
 7 |     "text_field_embedder": {
 8 |       "token_embedders": {
 9 |         "tokens": {
10 |             "type": "embedding",
11 |             "projection_dim": 2,
12 |             "pretrained_file": "test_fixtures/embeddings/glove.6B.100d.sample.txt.gz",
13 |             "embedding_dim": 100,
14 |             "trainable": true
15 |         }
16 |       }
17 |     },
18 |     "encoder": {
19 |       "type": "lstm",
20 |       "input_size": 2,
21 |       "hidden_size": 4,
22 |       "num_layers": 1
23 |     }
24 |   },
25 |   "data_loader": {
26 |       "batch_sampler": {
27 |         "type": "bucket",
28 |         "sorting_keys": ["tokens"],
29 |         "padding_noise": 0.0,
30 |         "batch_size" : 80
31 |     }
32 | },
33 |   "trainer": {
34 |     "num_epochs": 1,
35 |     "grad_norm": 1.0,
36 |     "patience": 500,
37 |     "cuda_device": -1,
38 |     "optimizer": {
39 |       "type": "adadelta",
40 |       "lr": 0.000001,
41 |       "rho": 0.95
42 |     }
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/experiment_with_regularization.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset_reader":{"type":"sequence_tagging"},
 3 |     "train_data_path": "test_fixtures/data/sequence_tagging.tsv",
 4 |     "validation_data_path": "test_fixtures/data/sequence_tagging.tsv",
 5 |     "model": {
 6 |       "type": "simple_tagger",
 7 |       "text_field_embedder": {
 8 |         "token_embedders": {
 9 |             "tokens": {
10 |             "type": "embedding",
11 |             "projection_dim": 2,
12 |             "pretrained_file": "test_fixtures/embeddings/glove.6B.100d.sample.txt.gz",
13 |             "embedding_dim": 100,
14 |             "trainable": true
15 |             }
16 |         }
17 |       },
18 |       "encoder": {
19 |         "type": "lstm",
20 |         "input_size": 2,
21 |         "hidden_size": 4,
22 |         "num_layers": 1
23 |       },
24 |       "regularizer": {
25 |         "regexes": [
26 |           ["weight$", {"type": "l2", "alpha": 10}],
27 |           ["bias$", {"type": "l1", "alpha": 5}]
28 |         ]
29 |       }
30 |     },
31 |     "data_loader": {
32 |       "batch_sampler": {
33 |           "type": "bucket",
34 |           "batch_size": 80,
35 |           "padding_noise": 0.0
36 |       }
37 |   },
38 |     "trainer": {
39 |       "num_epochs": 1,
40 |       "grad_norm": 1.0,
41 |       "patience": 500,
42 |       "cuda_device": -1,
43 |       "optimizer": {
44 |         "type": "adadelta",
45 |         "lr": 0.000001,
46 |         "rho": 0.95
47 |       }
48 |     }
49 |   }
50 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/model_test_case.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader":{"type":"sequence_tagging"},
 3 |   "train_data_path": "test_fixtures/data/sequence_tagging.tsv",
 4 |   "validation_data_path": "test_fixtures/data/sequence_tagging.tsv",
 5 |   "model": {
 6 |     "type": "simple_tagger",
 7 |     "text_field_embedder": {
 8 |       "token_embedders": {
 9 |         "tokens": {
10 |             "type": "embedding",
11 |             "projection_dim": 2,
12 |             "pretrained_file": "test_fixtures/embeddings/glove.6B.100d.sample.txt.gz",
13 |             "embedding_dim": 100,
14 |             "trainable": true
15 |         }
16 |       }
17 |     },
18 |     "encoder": {
19 |       "type": "lstm",
20 |       "input_size": 2,
21 |       "hidden_size": 4,
22 |       "num_layers": 1
23 |     }
24 |   },
25 |   "data_loader": {
26 |       "batch_sampler": {
27 |         "type": "bucket",
28 |         "sorting_keys": ["tokens"],
29 |         "padding_noise": 0.0,
30 |         "batch_size" : 80
31 |     }
32 |   },
33 |   "trainer": {
34 |     "num_epochs": 40,
35 |     "grad_norm": 1.0,
36 |     "patience": 500,
37 |     "cuda_device": -1,
38 |     "optimizer": {
39 |       "type": "adam",
40 |       "lr": 0.01
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization/best.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization/best.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization/vocabulary/labels.txt:
--------------------------------------------------------------------------------
1 | O
2 | U-ORG
3 | U-PER
4 | U-LOC
5 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *labels
2 | *tags
3 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization/vocabulary/tokens.txt:
--------------------------------------------------------------------------------
 1 | @@UNKNOWN@@
 2 | .
 3 | u.n.
 4 | official
 5 | ekeus
 6 | heads
 7 | for
 8 | baghdad
 9 | ai2
10 | engineer
11 | joel
12 | lives
13 | in
14 | seattle
15 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/best.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization_full/best.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset_reader": {
 3 |         "type": "sequence_tagging"
 4 |     },
 5 |     "model": {
 6 |         "type": "simple_tagger",
 7 |         "encoder": {
 8 |             "type": "lstm",
 9 |             "hidden_size": 4,
10 |             "input_size": 2,
11 |             "num_layers": 1
12 |         },
13 |         "text_field_embedder": {
14 |             "token_embedders": {
15 |                 "tokens": {
16 |                     "type": "embedding",
17 |                     "embedding_dim": 100,
18 |                     "pretrained_file": "test_fixtures/embeddings/glove.6B.100d.sample.txt.gz",
19 |                     "projection_dim": 2,
20 |                     "trainable": true
21 |                 }
22 |             }
23 |         }
24 |     },
25 |     "train_data_path": "test_fixtures/data/sequence_tagging.tsv",
26 |     "validation_data_path": "test_fixtures/data/sequence_tagging.tsv",
27 |     "trainer": {
28 |         "cuda_device": -1,
29 |         "grad_norm": 1,
30 |         "num_epochs": 1,
31 |         "optimizer": {
32 |             "type": "adadelta",
33 |             "lr": 1e-06,
34 |             "rho": 0.95
35 |         },
36 |         "patience": 500
37 |     },
38 |     "data_loader": {
39 |         "batch_sampler": {
40 |             "type": "bucket",
41 |             "batch_size": 80,
42 |             "padding_noise": 0,
43 |             "sorting_keys": [
44 |                 "tokens"
45 |             ]
46 |         }
47 |     }
48 | }


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/meta.json:
--------------------------------------------------------------------------------
1 | {"version": "2.8.0"}


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/metrics.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "best_epoch": 0,
 3 |   "peak_worker_0_memory_MB": 479.12109375,
 4 |   "training_duration": "0:00:00.015016",
 5 |   "epoch": 0,
 6 |   "training_accuracy": 0.75,
 7 |   "training_accuracy3": 1.0,
 8 |   "training_loss": 0.6266683340072632,
 9 |   "training_worker_0_memory_MB": 479.12109375,
10 |   "validation_accuracy": 0.75,
11 |   "validation_accuracy3": 1.0,
12 |   "validation_loss": 0.6266683340072632,
13 |   "best_validation_accuracy": 0.75,
14 |   "best_validation_accuracy3": 1.0,
15 |   "best_validation_loss": 0.6266683340072632
16 | }


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/metrics_epoch_0.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "best_epoch": 0,
 3 |   "peak_worker_0_memory_MB": 479.12109375,
 4 |   "training_duration": "0:00:00.015016",
 5 |   "epoch": 0,
 6 |   "training_accuracy": 0.75,
 7 |   "training_accuracy3": 1.0,
 8 |   "training_loss": 0.6266683340072632,
 9 |   "training_worker_0_memory_MB": 479.12109375,
10 |   "validation_accuracy": 0.75,
11 |   "validation_accuracy3": 1.0,
12 |   "validation_loss": 0.6266683340072632,
13 |   "best_validation_accuracy": 0.75,
14 |   "best_validation_accuracy3": 1.0,
15 |   "best_validation_loss": 0.6266683340072632
16 | }


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization_full/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/model_state_e1_b0.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization_full/model_state_e1_b0.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/training_state_e1_b0.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization_full/training_state_e1_b0.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/vocabulary/.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger/serialization_full/vocabulary/.lock


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/vocabulary/labels.txt:
--------------------------------------------------------------------------------
1 | N
2 | V
3 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *tags
2 | *labels
3 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger/serialization_full/vocabulary/tokens.txt:
--------------------------------------------------------------------------------
 1 | @@UNKNOWN@@
 2 | are
 3 | animals
 4 | .
 5 | cats
 6 | dogs
 7 | snakes
 8 | birds
 9 | horses
10 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_elmo/experiment.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset_reader": {
 3 |     "type": "conll2003",
 4 |     "tag_label": "ner",
 5 |     "coding_scheme": "BIOUL",
 6 |     "token_indexers": {
 7 |       "elmo": {
 8 |         "type": "elmo_characters"
 9 |       }
10 |     }
11 |   },
12 |   "train_data_path": "test_fixtures/data/conll2003.txt",
13 |   "validation_data_path": "test_fixtures/data/conll2003.txt",
14 |   "model": {
15 |     "type": "simple_tagger",
16 |     "text_field_embedder": {
17 |       "token_embedders": {
18 |         "elmo": {
19 |           "type": "elmo_token_embedder",
20 |           "options_file": "test_fixtures/elmo/options.json",
21 |           "weight_file": "test_fixtures/elmo/lm_weights.hdf5"
22 |         }
23 |       }
24 |     },
25 |     "encoder": {
26 |       "type": "gru",
27 |       "input_size": 32,
28 |       "hidden_size": 25,
29 |       "num_layers": 2,
30 |       "dropout": 0.5,
31 |       "bidirectional": true
32 |     },
33 |     "regularizer": {
34 |       "regexes": [
35 |         ["transitions$", {"type": "l2", "alpha": 0.01}]
36 |       ]
37 |     }
38 |   },
39 |   "data_loader": {"batch_size": 32},
40 |   "trainer": {
41 |     "optimizer": "adam",
42 |     "num_epochs": 5,
43 |     "cuda_device": -1
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_elmo/serialization/best.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger_with_elmo/serialization/best.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_elmo/serialization/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger_with_elmo/serialization/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_elmo/serialization/vocabulary/labels.txt:
--------------------------------------------------------------------------------
1 | O
2 | U-ORG
3 | U-PER
4 | U-LOC
5 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_elmo/serialization/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *labels
2 | *tags
3 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/best.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger_with_span_f1/serialization/best.th


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/simple_tagger_with_span_f1/serialization/model.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/vocabulary/labels.txt:
--------------------------------------------------------------------------------
1 | O
2 | I-ORG
3 | I-PER
4 | I-LOC
5 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/vocabulary/non_padded_namespaces.txt:
--------------------------------------------------------------------------------
1 | *labels
2 | *tags
3 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/vocabulary/test_tokens.txt:
--------------------------------------------------------------------------------
 1 | @@UNKNOWN@@
 2 | .
 3 | u.n.
 4 | official
 5 | ekeus
 6 | heads
 7 | for
 8 | baghdad
 9 | ai2
10 | engineer
11 | joel
12 | lives
13 | in
14 | seattle
15 | 


--------------------------------------------------------------------------------
/test_fixtures/simple_tagger_with_span_f1/serialization/vocabulary/tokens.txt:
--------------------------------------------------------------------------------
 1 | @@UNKNOWN@@
 2 | .
 3 | u.n.
 4 | official
 5 | ekeus
 6 | heads
 7 | for
 8 | baghdad
 9 | ai2
10 | engineer
11 | joel
12 | lives
13 | in
14 | seattle
15 | 


--------------------------------------------------------------------------------
/test_fixtures/task_suites/fake_suite.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/task_suites/fake_suite.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/archives/utf-8.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/archives/utf-8.tar.bz2


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/archives/utf-8.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/archives/utf-8.tar.gz


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/archives/utf-8.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/archives/utf-8.tar.xz


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/archives/utf-8.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/archives/utf-8.zip


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/utf-8_sample.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/utf-8_sample.txt.gz


--------------------------------------------------------------------------------
/test_fixtures/utf-8_sample/utf-8_sample.txt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/test_fixtures/utf-8_sample/utf-8_sample.txt.zip


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/__init__.py


--------------------------------------------------------------------------------
/tests/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/commands/__init__.py


--------------------------------------------------------------------------------
/tests/commands/build_vocab_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import pytest
 5 | 
 6 | from allennlp.commands import main
 7 | from allennlp.common.testing import AllenNlpTestCase
 8 | from allennlp.data import Vocabulary
 9 | 
10 | 
11 | class TestBuildVocabCommand(AllenNlpTestCase):
12 |     def test_build_vocab(self):
13 |         output_path = self.TEST_DIR / "vocab.tar.gz"
14 |         sys.argv = [
15 |             "allennlp",
16 |             "build-vocab",
17 |             str(self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet"),
18 |             str(output_path),
19 |         ]
20 |         main()
21 |         assert os.path.exists(output_path)
22 |         vocab = Vocabulary.from_files(output_path)
23 |         vocab.get_token_index("neg", "labels") == 0
24 | 
25 |         # If we try again, this time we should get a RuntimeError because the vocab archive
26 |         # already exists at the output path.
27 |         with pytest.raises(RuntimeError, match="already exists"):
28 |             main()
29 | 
30 |         # But now if add the '--force' argument, it will override the file.
31 |         sys.argv.append("--force")
32 |         main()
33 | 


--------------------------------------------------------------------------------
/tests/commands/no_op_train_test.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | 
 5 | from allennlp.commands.train import train_model
 6 | from allennlp.common import Params
 7 | from allennlp.common.testing import AllenNlpTestCase
 8 | from allennlp.models import load_archive, Model
 9 | 
10 | SEQUENCE_TAGGING_DATA_PATH = str(AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv")
11 | 
12 | 
13 | @Model.register("constant")
14 | class ConstantModel(Model):
15 |     def forward(self, *inputs) -> Dict[str, torch.Tensor]:
16 |         return {"class": torch.tensor(98)}
17 | 
18 | 
19 | class TestTrain(AllenNlpTestCase):
20 |     def test_train_model(self):
21 |         params = lambda: Params(
22 |             {
23 |                 "model": {"type": "constant"},
24 |                 "dataset_reader": {"type": "sequence_tagging", "max_instances": 4},
25 |                 "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
26 |                 "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
27 |                 "data_loader": {"batch_size": 2},
28 |                 "trainer": {"type": "no_op"},
29 |             }
30 |         )
31 | 
32 |         serialization_dir = self.TEST_DIR / "serialization_directory"
33 |         train_model(params(), serialization_dir=serialization_dir)
34 |         archive = load_archive(serialization_dir / "model.tar.gz")
35 |         model = archive.model
36 |         assert model.forward(torch.tensor([1, 2, 3]))["class"] == torch.tensor(98)
37 |         assert model.vocab.get_vocab_size() == 9
38 | 


--------------------------------------------------------------------------------
/tests/commands/test_install_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from allennlp.common.testing import AllenNlpTestCase
 4 | from allennlp.commands.test_install import _get_module_root
 5 | 
 6 | 
 7 | class TestTestInstall(AllenNlpTestCase):
 8 |     def test_get_module_root(self):
 9 |         """
10 |         When a user runs `allennlp test-install`, we have no idea where
11 |         they're running it from, so we do an `os.chdir` to the _module_
12 |         root in order to get all the paths in the fixtures to resolve properly.
13 | 
14 |         The logic within `allennlp test-install` is pretty hard to test in
15 |         its entirety, so this test is verifies that the `os.chdir` component
16 |         works properly by checking that we correctly find the path to
17 |         `os.chdir` to.
18 |         """
19 |         project_root = _get_module_root()
20 |         assert os.path.exists(os.path.join(project_root, "__main__.py"))
21 | 


--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/common/__init__.py


--------------------------------------------------------------------------------
/tests/common/plugins_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.commands import Subcommand
 2 | from allennlp.common.plugins import (
 3 |     discover_plugins,
 4 |     import_plugins,
 5 | )
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | from allennlp.common.util import pushd
 8 | 
 9 | 
10 | class TestPlugins(AllenNlpTestCase):
11 |     def setup_method(self):
12 |         super().setup_method()
13 |         self.plugins_root = self.FIXTURES_ROOT / "plugins"
14 | 
15 |     def test_no_plugins(self):
16 |         available_plugins = set(discover_plugins())
17 |         assert available_plugins == set()
18 | 
19 |     def test_file_plugin(self):
20 |         available_plugins = set(discover_plugins())
21 |         assert available_plugins == set()
22 | 
23 |         with pushd(self.plugins_root):
24 |             available_plugins = set(discover_plugins())
25 |             assert available_plugins == {"d"}
26 | 
27 |             import_plugins()
28 |             subcommands_available = Subcommand.list_available()
29 |             assert "d" in subcommands_available
30 | 


--------------------------------------------------------------------------------
/tests/common/task_card_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.common.testing import AllenNlpTestCase
 2 | from allennlp.common.task_card import TaskCard
 3 | 
 4 | 
 5 | class TestTaskCard(AllenNlpTestCase):
 6 |     def test_init(self):
 7 |         task_card = TaskCard(
 8 |             id="fake_name",
 9 |             name="Fake Name",
10 |             description="Task's description",
11 |             expected_inputs="Passage (text string), Question (text string)",
12 |             expected_outputs="Answer span (start token position and end token position).",
13 |             examples=[
14 |                 {
15 |                     "premise": "A handmade djembe was on display at the Smithsonian.",
16 |                     "hypothesis": "Visitors could see the djembe.",
17 |                 }
18 |             ],
19 |         )
20 | 
21 |         assert task_card.id == "fake_name"
22 |         assert task_card.name == "Fake Name"
23 |         assert task_card.expected_inputs == "Passage (text string), Question (text string)"
24 | 


--------------------------------------------------------------------------------
/tests/common/testing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from allennlp.common.testing import AllenNlpTestCase, multi_device
 4 | 
 5 | actual_devices = set()
 6 | 
 7 | 
 8 | class TestTesting(AllenNlpTestCase):
 9 |     @multi_device
10 |     def test_multi_device(self, device: str):
11 |         actual_devices.add(device)
12 | 
13 |     def test_devices_accounted_for(self):
14 |         expected_devices = {"cpu", "cuda"} if torch.cuda.is_available() else {"cpu"}
15 |         assert expected_devices == actual_devices
16 | 


--------------------------------------------------------------------------------
/tests/confidence_checks/task_checklists/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/confidence_checks/task_checklists/__init__.py


--------------------------------------------------------------------------------
/tests/confidence_checks/task_checklists/sentiment_analysis_suite_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.confidence_checks.task_checklists.sentiment_analysis_suite import (
 2 |     SentimentAnalysisSuite,
 3 | )
 4 | from allennlp.common.testing import AllenNlpTestCase, requires_gpu
 5 | from allennlp.models.archival import load_archive
 6 | from allennlp.predictors import Predictor
 7 | 
 8 | 
 9 | class TestSentimentAnalysisSuite(AllenNlpTestCase):
10 |     def setup_method(self):
11 |         super().setup_method()
12 |         archive = load_archive(
13 |             self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
14 |         )
15 |         self.predictor = Predictor.from_archive(archive)
16 | 
17 |     # Mark this as GPU so it runs on a self-hosted runner, which will be a lot faster.
18 |     @requires_gpu
19 |     def test_run(self):
20 |         data = [
21 |             "This is really good",
22 |             "This was terrible",
23 |             "This was not good",
24 |             "John Smith acted very well.",
25 |             "Seattle was very gloomy.",
26 |             "I have visited the place for 3 years; great food!",
27 |         ]
28 |         suite = SentimentAnalysisSuite(add_default_tests=True, data=data)
29 |         suite.run(self.predictor, max_examples=1)
30 | 


--------------------------------------------------------------------------------
/tests/confidence_checks/task_checklists/utils_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.confidence_checks.task_checklists import utils
 2 | from allennlp.common.testing import AllenNlpTestCase
 3 | 
 4 | 
 5 | class TestUtils(AllenNlpTestCase):
 6 |     def test_punctuations(self):
 7 |         perturbed = utils.toggle_punctuation("This has a period.")
 8 | 
 9 |         assert perturbed[0] == "This has a period"
10 | 
11 |         perturbed = utils.toggle_punctuation("This does not have a period")
12 |         assert perturbed[0] == "This does not have a period."
13 | 


--------------------------------------------------------------------------------
/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/__init__.py


--------------------------------------------------------------------------------
/tests/data/data_loaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/data_loaders/__init__.py


--------------------------------------------------------------------------------
/tests/data/data_loaders/multitask_scheduler_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.data.data_loaders.multitask_scheduler import (
 2 |     RoundRobinScheduler,
 3 |     HomogeneousRoundRobinScheduler,
 4 | )
 5 | 
 6 | 
 7 | class RoundRobinSchedulerTest:
 8 |     def test_order_instances(self):
 9 |         scheduler = RoundRobinScheduler(batch_size=4)
10 |         epoch_instances = {
11 |             "a": [1] * 5,
12 |             "b": [2] * 3,
13 |         }
14 |         batches = scheduler.batch_instances(epoch_instances)
15 |         assert list(batches) == [[1, 2, 1, 2], [1, 2, 1, 1]]
16 | 
17 | 
18 | class HomogeneousRoundRobinSchedulerTest:
19 |     def test_order_instances(self):
20 |         scheduler = HomogeneousRoundRobinScheduler({"a": 2, "b": 3})
21 |         epoch_instances = {
22 |             "a": [1] * 9,
23 |             "b": [2] * 9,
24 |         }
25 |         flattened = scheduler.batch_instances(epoch_instances)
26 |         assert list(flattened) == [
27 |             [1, 1],
28 |             [2, 2, 2],
29 |             [1, 1],
30 |             [2, 2, 2],
31 |             [1, 1],
32 |             [2, 2, 2],
33 |             [1, 1],
34 |             [1],
35 |         ]
36 | 


--------------------------------------------------------------------------------
/tests/data/dataset_readers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/dataset_readers/__init__.py


--------------------------------------------------------------------------------
/tests/data/dataset_readers/babi_reader_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common import Params
 4 | from allennlp.data.dataset_readers import BabiReader
 5 | from allennlp.common.testing import AllenNlpTestCase
 6 | 
 7 | 
 8 | class TestBAbIReader:
 9 |     @pytest.mark.parametrize("keep_sentences", [False, True])
10 |     def test_read_from_file(self, keep_sentences):
11 |         reader = BabiReader(keep_sentences=keep_sentences)
12 |         instances = list(reader.read(AllenNlpTestCase.FIXTURES_ROOT / "data" / "babi.txt"))
13 |         assert len(instances) == 8
14 | 
15 |         if keep_sentences:
16 |             assert [t.text for t in instances[0].fields["context"][3].tokens[3:]] == [
17 |                 "of",
18 |                 "wolves",
19 |                 ".",
20 |             ]
21 |             assert [t.sequence_index for t in instances[0].fields["supports"]] == [0, 1]
22 |         else:
23 |             assert [t.text for t in instances[0].fields["context"].tokens[7:9]] == ["afraid", "of"]
24 | 
25 |     def test_can_build_from_params(self):
26 |         reader = BabiReader.from_params(Params({"keep_sentences": True}))
27 | 
28 |         assert reader._keep_sentences
29 |         assert reader._token_indexers["tokens"].__class__.__name__ == "SingleIdTokenIndexer"
30 | 


--------------------------------------------------------------------------------
/tests/data/fields/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/fields/__init__.py


--------------------------------------------------------------------------------
/tests/data/fields/flag_field_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common.testing.test_case import AllenNlpTestCase
 4 | from allennlp.data.fields import FlagField
 5 | 
 6 | 
 7 | class TestFlagField(AllenNlpTestCase):
 8 |     def test_get_padding_lengths_returns_nothing(self):
 9 |         flag_field = FlagField(True)
10 |         assert flag_field.get_padding_lengths() == {}
11 | 
12 |     def test_as_tensor_just_returns_value(self):
13 |         for value in [True, 3.234, "this is a string"]:
14 |             assert FlagField(value).as_tensor({}) == value
15 | 
16 |     def test_printing_doesnt_crash(self):
17 |         flag = FlagField(True)
18 |         print(flag)
19 | 
20 |     def test_human_readable_repr(self):
21 |         flag = FlagField(True)
22 |         assert flag.human_readable_repr() is True
23 | 
24 |     def test_batch_tensors_returns_single_value(self):
25 |         value = True
26 |         fields = [FlagField(value) for _ in range(5)]
27 |         values = [field.as_tensor({}) for field in fields]
28 |         batched_value = fields[0].batch_tensors(values)
29 |         assert batched_value == value
30 | 
31 |     def test_batch_tensors_crashes_with_non_uniform_values(self):
32 |         field = FlagField(True)
33 |         with pytest.raises(ValueError):
34 |             field.batch_tensors([True, False, True])
35 | 
36 |         with pytest.raises(ValueError):
37 |             field.batch_tensors([1, 2, 3, 4])
38 | 
39 |         with pytest.raises(ValueError):
40 |             field.batch_tensors(["different", "string", "flags"])
41 | 


--------------------------------------------------------------------------------
/tests/data/fields/metadata_field_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common.testing.test_case import AllenNlpTestCase
 4 | from allennlp.data.fields import MetadataField
 5 | 
 6 | 
 7 | class TestMetadataField(AllenNlpTestCase):
 8 |     def test_mapping_works_with_dict(self):
 9 |         field = MetadataField({"a": 1, "b": [0]})
10 | 
11 |         assert "a" in field
12 |         assert field["a"] == 1
13 |         assert len(field) == 2
14 | 
15 |         keys = {k for k in field}
16 |         assert keys == {"a", "b"}
17 | 
18 |         values = [v for v in field.values()]
19 |         assert len(values) == 2
20 |         assert 1 in values
21 |         assert [0] in values
22 | 
23 |     def test_mapping_raises_with_non_dict(self):
24 |         field = MetadataField(0)
25 | 
26 |         with pytest.raises(TypeError):
27 |             _ = field[0]
28 | 
29 |         with pytest.raises(TypeError):
30 |             _ = len(field)
31 | 
32 |         with pytest.raises(TypeError):
33 |             _ = [x for x in field]
34 | 
35 |     def test_human_readable_repr(self):
36 |         field = MetadataField({"a": 1, "b": [0]})
37 |         assert field.human_readable_repr() == {"a": 1, "b": [0]}
38 | 


--------------------------------------------------------------------------------
/tests/data/samplers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/samplers/__init__.py


--------------------------------------------------------------------------------
/tests/data/token_indexers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/token_indexers/__init__.py


--------------------------------------------------------------------------------
/tests/data/token_indexers/spacy_indexer_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.common.testing import AllenNlpTestCase
 2 | from allennlp.data.token_indexers.spacy_indexer import SpacyTokenIndexer
 3 | from allennlp.data.fields.text_field import TextField
 4 | from allennlp.common.util import get_spacy_model
 5 | from allennlp.data.vocabulary import Vocabulary
 6 | 
 7 | 
 8 | class TestSpacyTokenIndexer(AllenNlpTestCase):
 9 |     def test_as_array_produces_token_array(self):
10 |         indexer = SpacyTokenIndexer()
11 |         nlp = get_spacy_model("en_core_web_sm", parse=False, ner=False)
12 |         tokens = [t for t in nlp("This is a sentence.")]
13 |         field = TextField(tokens, token_indexers={"spacy": indexer})
14 | 
15 |         vocab = Vocabulary()
16 |         field.index(vocab)
17 | 
18 |         # Indexer functionality
19 |         array_dict = indexer.tokens_to_indices(tokens, vocab)
20 |         assert len(array_dict["tokens"]) == 5
21 |         assert len(array_dict["tokens"][0]) == 96
22 | 
23 |         # Check it also works with field
24 |         lengths = field.get_padding_lengths()
25 |         array_dict = field.as_tensor(lengths)
26 | 
27 |         assert list(array_dict["spacy"]["tokens"].shape) == [5, 96]
28 | 


--------------------------------------------------------------------------------
/tests/data/tokenizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/data/tokenizers/__init__.py


--------------------------------------------------------------------------------
/tests/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/evaluation/__init__.py


--------------------------------------------------------------------------------
/tests/evaluation/serializers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/evaluation/serializers/__init__.py


--------------------------------------------------------------------------------
/tests/fairness/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/fairness/__init__.py


--------------------------------------------------------------------------------
/tests/interpret/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/interpret/__init__.py


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/models/__init__.py


--------------------------------------------------------------------------------
/tests/models/basic_classifier_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from allennlp.common.testing import ModelTestCase
 4 | 
 5 | 
 6 | class TestBasicClassifier(ModelTestCase):
 7 |     def setup_method(self):
 8 |         super().setup_method()
 9 |         self.set_up_model(
10 |             self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2vec.jsonnet",
11 |             self.FIXTURES_ROOT / "data" / "text_classification_json" / "imdb_corpus.jsonl",
12 |         )
13 | 
14 |     def test_forward_pass_runs_correctly(self):
15 |         training_tensors = self.dataset.as_tensor_dict()
16 |         output_dict = self.model(**training_tensors)
17 |         output_dict = self.model.make_output_human_readable(output_dict)
18 |         assert "label" in output_dict.keys()
19 |         probs = output_dict["probs"][0].data.numpy()
20 |         numpy.testing.assert_almost_equal(numpy.sum(probs, -1), numpy.array([1]))
21 | 
22 |     def test_seq2vec_clf_can_train_save_and_load(self):
23 |         self.set_up_model(
24 |             self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2vec.jsonnet",
25 |             self.FIXTURES_ROOT / "data" / "text_classification_json" / "imdb_corpus.jsonl",
26 |         )
27 |         self.ensure_model_can_train_save_and_load(self.param_file)
28 | 
29 |     def test_seq2seq_clf_can_train_save_and_load(self):
30 |         self.set_up_model(
31 |             self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet",
32 |             self.FIXTURES_ROOT / "data" / "text_classification_json" / "imdb_corpus.jsonl",
33 |         )
34 |         self.ensure_model_can_train_save_and_load(self.param_file)
35 | 


--------------------------------------------------------------------------------
/tests/models/model_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pytest
 3 | 
 4 | from allennlp.common.testing.test_case import AllenNlpTestCase
 5 | from allennlp.models import load_archive, Model
 6 | from allennlp.nn.regularizers import RegularizerApplicator
 7 | 
 8 | 
 9 | class TestModel(AllenNlpTestCase):
10 |     def test_extend_embedder_vocab(self):
11 |         model_archive = str(
12 |             self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
13 |         )
14 |         trained_model = load_archive(model_archive).model
15 | 
16 |         original_weight = trained_model._text_field_embedder.token_embedder_tokens.weight
17 |         assert tuple(original_weight.shape) == (213, 10)
18 | 
19 |         counter = {"tokens": {"unawarded": 1}}
20 |         trained_model.vocab._extend(counter)
21 |         trained_model.extend_embedder_vocab()
22 | 
23 |         extended_weight = trained_model._text_field_embedder.token_embedder_tokens.weight
24 |         assert tuple(extended_weight.shape) == (214, 10)
25 | 
26 |         assert torch.all(original_weight == extended_weight[:213, :])
27 | 
28 |     def test_get_regularization_penalty(self):
29 |         class FakeModel(Model):
30 |             def forward(self, **kwargs):
31 |                 return {}
32 | 
33 |         class FakeRegularizerApplicator(RegularizerApplicator):
34 |             def __call__(self, module):
35 |                 return 2.0
36 | 
37 |         with pytest.raises(RuntimeError):
38 |             regularizer = FakeRegularizerApplicator()
39 |             model = FakeModel(None, regularizer)
40 |             model.get_regularization_penalty()
41 | 


--------------------------------------------------------------------------------
/tests/modules/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/attention/__init__.py


--------------------------------------------------------------------------------
/tests/modules/attention/additive_attention_test.py:
--------------------------------------------------------------------------------
 1 | from numpy.testing import assert_almost_equal
 2 | import torch
 3 | from torch.nn.parameter import Parameter
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.modules.attention import AdditiveAttention
 7 | from allennlp.common.testing import AllenNlpTestCase
 8 | 
 9 | 
10 | class TestAdditiveAttention(AllenNlpTestCase):
11 |     def test_forward_does_an_additive_product(self):
12 |         params = Params({"vector_dim": 2, "matrix_dim": 3, "normalize": False})
13 |         additive = AdditiveAttention.from_params(params)
14 |         additive._w_matrix = Parameter(torch.Tensor([[-0.2, 0.3], [-0.5, 0.5]]))
15 |         additive._u_matrix = Parameter(torch.Tensor([[0.0, 1.0], [1.0, 1.0], [1.0, -1.0]]))
16 |         additive._v_vector = Parameter(torch.Tensor([[1.0], [-1.0]]))
17 |         vectors = torch.FloatTensor([[0.7, -0.8], [0.4, 0.9]])
18 |         matrices = torch.FloatTensor(
19 |             [
20 |                 [[1.0, -1.0, 3.0], [0.5, -0.3, 0.0], [0.2, -1.0, 1.0], [0.7, 0.8, -1.0]],
21 |                 [[-2.0, 3.0, -3.0], [0.6, 0.2, 2.0], [0.5, -0.4, -1.0], [0.2, 0.2, 0.0]],
22 |             ]
23 |         )
24 |         result = additive(vectors, matrices).detach().numpy()
25 |         assert result.shape == (2, 4)
26 |         assert_almost_equal(
27 |             result,
28 |             [
29 |                 [1.975072, -0.04997836, 1.2176098, -0.9205586],
30 |                 [-1.4851665, 1.489604, -1.890285, -1.0672251],
31 |             ],
32 |         )
33 | 


--------------------------------------------------------------------------------
/tests/modules/attention/attention_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | from allennlp.modules import Attention
 5 | from allennlp.modules.attention import BilinearAttention, AdditiveAttention, LinearAttention
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("attention_type", Attention.list_available())
 9 | def test_all_attention_works_the_same(attention_type: str):
10 |     module_cls = Attention.by_name(attention_type)
11 | 
12 |     vector = torch.FloatTensor([[-7, -8, -9]])
13 |     matrix = torch.FloatTensor([[[1, 2, 3], [4, 5, 6]]])
14 | 
15 |     if module_cls in {BilinearAttention, AdditiveAttention, LinearAttention}:
16 |         module = module_cls(vector.size(-1), matrix.size(-1))
17 |     else:
18 |         module = module_cls()
19 | 
20 |     output = module(vector, matrix)
21 |     assert tuple(output.size()) == (1, 2)
22 | 


--------------------------------------------------------------------------------
/tests/modules/attention/bilinear_attention_test.py:
--------------------------------------------------------------------------------
 1 | from numpy.testing import assert_almost_equal
 2 | import torch
 3 | from torch.nn.parameter import Parameter
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.modules.attention import BilinearAttention
 7 | from allennlp.common.testing import AllenNlpTestCase
 8 | 
 9 | 
10 | class TestBilinearAttention(AllenNlpTestCase):
11 |     def test_forward_does_a_bilinear_product(self):
12 |         params = Params({"vector_dim": 2, "matrix_dim": 2, "normalize": False})
13 |         bilinear = BilinearAttention.from_params(params)
14 |         bilinear._weight_matrix = Parameter(torch.FloatTensor([[-0.3, 0.5], [2.0, -1.0]]))
15 |         bilinear._bias = Parameter(torch.FloatTensor([0.1]))
16 |         a_vectors = torch.FloatTensor([[1, 1]])
17 |         b_vectors = torch.FloatTensor([[[1, 0], [0, 1]]])
18 |         result = bilinear(a_vectors, b_vectors).detach().numpy()
19 |         assert result.shape == (1, 2)
20 |         assert_almost_equal(result, [[1.8, -0.4]])
21 | 


--------------------------------------------------------------------------------
/tests/modules/attention/cosine_attention_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from numpy.testing import assert_almost_equal
 3 | import numpy
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.common.testing.test_case import AllenNlpTestCase
 7 | from allennlp.modules.attention.attention import Attention
 8 | from allennlp.modules.attention.cosine_attention import CosineAttention
 9 | 
10 | 
11 | class TestCosineAttention(AllenNlpTestCase):
12 |     def test_can_init_cosine(self):
13 |         legacy_attention = Attention.from_params(Params({"type": "cosine"}))
14 |         isinstance(legacy_attention, CosineAttention)
15 | 
16 |     def test_cosine_similarity(self):
17 |         linear = CosineAttention(normalize=False)
18 |         output = linear(
19 |             torch.FloatTensor([[0, 0, 0], [1, 1, 1]]),
20 |             torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
21 |         )
22 | 
23 |         assert_almost_equal(output.numpy(), numpy.array([[0.0, 0.0], [0.9948, 0.9973]]), decimal=2)
24 | 


--------------------------------------------------------------------------------
/tests/modules/attention/dot_product_attention_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from numpy.testing import assert_almost_equal
 3 | import numpy
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.common.testing.test_case import AllenNlpTestCase
 7 | from allennlp.modules.attention.attention import Attention
 8 | from allennlp.modules.attention.dot_product_attention import DotProductAttention
 9 | 
10 | 
11 | class TestDotProductAttention(AllenNlpTestCase):
12 |     def test_can_init_dot(self):
13 |         legacy_attention = Attention.from_params(Params({"type": "dot_product"}))
14 |         isinstance(legacy_attention, DotProductAttention)
15 | 
16 |     def test_dot_product_similarity(self):
17 |         attn = DotProductAttention(normalize=False)
18 |         output = attn(
19 |             torch.FloatTensor([[0, 0, 0], [1, 1, 1]]),
20 |             torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
21 |         )
22 | 
23 |         assert_almost_equal(output.numpy(), numpy.array([[0.0, 0.0], [24.0, 33.0]]), decimal=2)
24 | 


--------------------------------------------------------------------------------
/tests/modules/attention/scaled_dot_product_attention_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from numpy.testing import assert_almost_equal
 3 | import numpy
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.common.testing.test_case import AllenNlpTestCase
 7 | from allennlp.modules.attention.attention import Attention
 8 | from allennlp.modules.attention.scaled_dot_product_attention import ScaledDotProductAttention
 9 | 
10 | 
11 | class TestScaledDotProductAttention(AllenNlpTestCase):
12 |     def test_can_init_scaled_dot(self):
13 |         legacy_attention = Attention.from_params(
14 |             Params({"type": "scaled_dot_product", "scaling_factor": 9})
15 |         )
16 |         isinstance(legacy_attention, ScaledDotProductAttention)
17 | 
18 |     def test_scaled_dot_product_similarity(self):
19 |         attn = ScaledDotProductAttention(9, normalize=False)
20 |         vector = torch.FloatTensor([[0, 0, 0], [1, 1, 1]])
21 |         matrix = torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
22 |         output = attn(vector, matrix)
23 | 
24 |         assert_almost_equal(output.numpy(), numpy.array([[0.0, 0.0], [8.0, 11.0]]), decimal=2)
25 | 


--------------------------------------------------------------------------------
/tests/modules/gated_sum_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | import numpy
 5 | 
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | from allennlp.modules import GatedSum
 8 | 
 9 | 
10 | class TestGatedSum(AllenNlpTestCase):
11 |     def test_gated_sum_can_run_forward(self):
12 |         a = torch.FloatTensor([1, 2, 3, 4, 5])
13 |         b = -a + 0.1
14 |         weight_value = 2
15 |         gate_value = torch.sigmoid(torch.FloatTensor([1]))
16 |         expected = gate_value * a + (1 - gate_value) * b
17 | 
18 |         with torch.no_grad():  # because we want to change the weight
19 |             gated_sum = GatedSum(a.size(-1))
20 |             gated_sum._gate.weight *= 0
21 |             gated_sum._gate.weight += weight_value
22 |             gated_sum._gate.bias *= 0
23 | 
24 |             out = gated_sum(a, b)
25 |             numpy.testing.assert_almost_equal(expected.data.numpy(), out.data.numpy(), decimal=5)
26 | 
27 |         with pytest.raises(ValueError):
28 |             GatedSum(a.size(-1))(a, b.unsqueeze(0))
29 | 
30 |         with pytest.raises(ValueError):
31 |             GatedSum(100)(a, b)
32 | 
33 |     def test_input_output_dim(self):
34 |         dim = 77
35 |         gated_sum = GatedSum(dim)
36 |         numpy.testing.assert_equal(gated_sum.get_input_dim(), dim)
37 |         numpy.testing.assert_equal(gated_sum.get_output_dim(), dim)
38 | 


--------------------------------------------------------------------------------
/tests/modules/highway_test.py:
--------------------------------------------------------------------------------
 1 | from numpy.testing import assert_almost_equal
 2 | import torch
 3 | 
 4 | from allennlp.modules import Highway
 5 | from allennlp.common.testing import AllenNlpTestCase
 6 | 
 7 | 
 8 | class TestHighway(AllenNlpTestCase):
 9 |     def test_forward_works_on_simple_input(self):
10 |         highway = Highway(2, 2)
11 | 
12 |         highway._layers[0].weight.data.fill_(1)
13 |         highway._layers[0].bias.data.fill_(0)
14 |         highway._layers[1].weight.data.fill_(2)
15 |         highway._layers[1].bias.data.fill_(-2)
16 |         input_tensor = torch.FloatTensor([[-2, 1], [3, -2]])
17 |         result = highway(input_tensor).data.numpy()
18 |         assert result.shape == (2, 2)
19 |         # This was checked by hand.
20 |         assert_almost_equal(result, [[-0.0394, 0.0197], [1.7527, -0.5550]], decimal=4)
21 | 
22 |     def test_forward_works_on_nd_input(self):
23 |         highway = Highway(2, 2)
24 |         input_tensor = torch.ones(2, 2, 2)
25 |         output = highway(input_tensor)
26 |         assert output.size() == (2, 2, 2)
27 | 


--------------------------------------------------------------------------------
/tests/modules/masked_layer_norm_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules.masked_layer_norm import MaskedLayerNorm
 6 | from allennlp.nn import util
 7 | 
 8 | 
 9 | class TestMaskedLayerNorm(AllenNlpTestCase):
10 |     def test_masked_layer_norm(self):
11 |         x_n = np.random.rand(2, 3, 7)
12 |         mask_n = np.array([[1, 1, 0], [1, 1, 1]])
13 | 
14 |         x = torch.from_numpy(x_n).float()
15 |         mask = torch.from_numpy(mask_n).bool()
16 | 
17 |         layer_norm = MaskedLayerNorm(7, gamma0=0.2)
18 |         normed_x = layer_norm(x, mask)
19 | 
20 |         N = 7 * 5
21 |         mean = (x_n * np.expand_dims(mask_n, axis=-1)).sum() / N
22 |         std = np.sqrt(
23 |             (((x_n - mean) * np.expand_dims(mask_n, axis=-1)) ** 2).sum() / N
24 |             + util.tiny_value_of_dtype(torch.float)
25 |         )
26 |         expected = 0.2 * (x_n - mean) / (std + util.tiny_value_of_dtype(torch.float))
27 | 
28 |         assert np.allclose(normed_x.data.numpy(), expected)
29 | 


--------------------------------------------------------------------------------
/tests/modules/matrix_attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/matrix_attention/__init__.py


--------------------------------------------------------------------------------
/tests/modules/matrix_attention/cosine_matrix_attention_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from numpy.testing import assert_almost_equal
 3 | import numpy
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.common.testing.test_case import AllenNlpTestCase
 7 | from allennlp.modules.matrix_attention import CosineMatrixAttention
 8 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
 9 | 
10 | 
11 | class TestCosineMatrixAttention(AllenNlpTestCase):
12 |     def test_can_init_cosine(self):
13 |         legacy_attention = MatrixAttention.from_params(Params({"type": "cosine"}))
14 |         isinstance(legacy_attention, CosineMatrixAttention)
15 | 
16 |     def test_cosine_similarity(self):
17 |         # example use case: a batch of size 2.
18 |         # With a time element component (e.g. sentences of length 2) each word is a vector of length 3.
19 |         # It is comparing this with another input of the same type
20 |         output = CosineMatrixAttention()(
21 |             torch.FloatTensor([[[0, 0, 0], [4, 5, 6]], [[-7, -8, -9], [10, 11, 12]]]),
22 |             torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
23 |         )
24 | 
25 |         # For the first batch there is
26 |         #       no correlation between the first words of the input matrix
27 |         #       but perfect correlation for the second word
28 |         # For the second batch there is
29 |         #     negative correlation for the first words
30 |         #     correlation for the second word
31 |         assert_almost_equal(
32 |             output.numpy(), numpy.array([[[0, 0], [0.97, 1]], [[-1, -0.99], [0.99, 1]]]), decimal=2
33 |         )
34 | 


--------------------------------------------------------------------------------
/tests/modules/matrix_attention/dot_product_matrix_attention_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from numpy.testing import assert_almost_equal
 3 | import numpy
 4 | 
 5 | from allennlp.common import Params
 6 | from allennlp.common.testing.test_case import AllenNlpTestCase
 7 | from allennlp.modules.matrix_attention import DotProductMatrixAttention
 8 | from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
 9 | 
10 | 
11 | class TestDotProductMatrixAttention(AllenNlpTestCase):
12 |     def test_can_init_dot(self):
13 |         legacy_attention = MatrixAttention.from_params(Params({"type": "dot_product"}))
14 |         isinstance(legacy_attention, DotProductMatrixAttention)
15 | 
16 |     def test_dot_product_similarity(self):
17 |         # example use case: a batch of size 2,
18 |         # with a time element component (e.g. sentences of length 2) each word is a vector of length 3.
19 |         # it is comparing this with another input of the same type
20 |         output = DotProductMatrixAttention()(
21 |             torch.FloatTensor([[[0, 0, 0], [4, 5, 6]], [[-7, -8, -9], [10, 11, 12]]]),
22 |             torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
23 |         )
24 | 
25 |         # for the first batch there is
26 |         #       no correlation between the first words of the input matrix
27 |         #       but perfect correlation for the second word
28 |         # for the second batch there is
29 |         #       negative correlation for the first words
30 |         #       a correlation for the second word
31 |         assert_almost_equal(
32 |             output.numpy(), numpy.array([[[0, 0], [32, 77]], [[-194, -266], [266, 365]]]), decimal=2
33 |         )
34 | 


--------------------------------------------------------------------------------
/tests/modules/matrix_attention/matrix_attention_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | from allennlp.modules import MatrixAttention
 5 | from allennlp.modules.matrix_attention import BilinearMatrixAttention, LinearMatrixAttention
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("attention_type", MatrixAttention.list_available())
 9 | def test_all_attention_works_the_same(attention_type: str):
10 |     module_cls = MatrixAttention.by_name(attention_type)
11 | 
12 |     matrix1 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
13 |     matrix2 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6]]])
14 | 
15 |     if module_cls in {BilinearMatrixAttention, LinearMatrixAttention}:
16 |         module = module_cls(matrix1.size(-1), matrix2.size(-1))
17 |     else:
18 |         module = module_cls()
19 | 
20 |     output = module(matrix1, matrix2)
21 |     assert tuple(output.size()) == (1, 3, 2)
22 | 


--------------------------------------------------------------------------------
/tests/modules/seq2seq_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common import Params
 4 | from allennlp.common.checks import ConfigurationError
 5 | from allennlp.modules import Seq2SeqEncoder
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | 
 8 | 
 9 | class TestSeq2SeqEncoder(AllenNlpTestCase):
10 |     def test_from_params_builders_encoder_correctly(self):
11 |         # We're just making sure parameters get passed through correctly here, and that the basic
12 |         # API works.
13 |         params = Params(
14 |             {
15 |                 "type": "lstm",
16 |                 "bidirectional": True,
17 |                 "num_layers": 3,
18 |                 "input_size": 5,
19 |                 "hidden_size": 7,
20 |                 "stateful": True,
21 |             }
22 |         )
23 |         encoder = Seq2SeqEncoder.from_params(params)
24 | 
25 |         assert encoder.__class__.__name__ == "LstmSeq2SeqEncoder"
26 |         assert encoder._module.__class__.__name__ == "LSTM"
27 |         assert encoder._module.num_layers == 3
28 |         assert encoder._module.input_size == 5
29 |         assert encoder._module.hidden_size == 7
30 |         assert encoder._module.bidirectional is True
31 |         assert encoder._module.batch_first is True
32 |         assert encoder.stateful is True
33 | 
34 |     def test_from_params_requires_batch_first(self):
35 |         params = Params({"type": "lstm", "batch_first": False})
36 |         with pytest.raises(ConfigurationError):
37 |             Seq2SeqEncoder.from_params(params)
38 | 


--------------------------------------------------------------------------------
/tests/modules/seq2seq_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/seq2seq_encoders/__init__.py


--------------------------------------------------------------------------------
/tests/modules/seq2seq_encoders/feedforward_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules import FeedForward
 6 | from allennlp.modules.seq2seq_encoders.feedforward_encoder import FeedForwardEncoder
 7 | from allennlp.nn import Activation
 8 | 
 9 | 
10 | class TestFeedforwardEncoder(AllenNlpTestCase):
11 |     def test_get_dimension_is_correct(self):
12 |         feedforward = FeedForward(
13 |             input_dim=10, num_layers=1, hidden_dims=10, activations=Activation.by_name("linear")()
14 |         )
15 |         encoder = FeedForwardEncoder(feedforward)
16 |         assert encoder.get_input_dim() == feedforward.get_input_dim()
17 |         assert encoder.get_output_dim() == feedforward.get_output_dim()
18 | 
19 |     def test_feedforward_encoder_exactly_match_feedforward_each_item(self):
20 |         feedforward = FeedForward(
21 |             input_dim=10, num_layers=1, hidden_dims=10, activations=Activation.by_name("linear")()
22 |         )
23 |         encoder = FeedForwardEncoder(feedforward)
24 |         tensor = torch.randn([2, 3, 10])
25 |         output = encoder(tensor)
26 |         target = feedforward(tensor)
27 |         numpy.testing.assert_array_almost_equal(
28 |             target.detach().cpu().numpy(), output.detach().cpu().numpy()
29 |         )
30 | 
31 |         # mask should work
32 |         mask = torch.tensor([[True, True, True], [True, False, False]])
33 |         output = encoder(tensor, mask)
34 |         target = feedforward(tensor) * mask.unsqueeze(dim=-1).float()
35 |         numpy.testing.assert_array_almost_equal(
36 |             target.detach().cpu().numpy(), output.detach().cpu().numpy()
37 |         )
38 | 


--------------------------------------------------------------------------------
/tests/modules/seq2seq_encoders/pass_through_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules.seq2seq_encoders import PassThroughEncoder
 6 | 
 7 | 
 8 | class TestPassThroughEncoder(AllenNlpTestCase):
 9 |     def test_get_dimension_is_correct(self):
10 |         encoder = PassThroughEncoder(input_dim=9)
11 |         assert encoder.get_input_dim() == 9
12 |         assert encoder.get_output_dim() == 9
13 | 
14 |     def test_pass_through_encoder_passes_through(self):
15 |         encoder = PassThroughEncoder(input_dim=9)
16 |         tensor = torch.randn([2, 3, 9])
17 |         output = encoder(tensor)
18 |         numpy.testing.assert_array_almost_equal(
19 |             tensor.detach().cpu().numpy(), output.detach().cpu().numpy()
20 |         )
21 | 
22 |     def test_pass_through_encoder_with_mask(self):
23 |         encoder = PassThroughEncoder(input_dim=9)
24 |         tensor = torch.randn([2, 3, 9])
25 |         mask = torch.tensor([[True, True, True], [True, False, False]])
26 |         output = encoder(tensor, mask)
27 | 
28 |         target = tensor * mask.unsqueeze(dim=-1).float()
29 |         numpy.testing.assert_array_almost_equal(
30 |             output.detach().cpu().numpy(), target.detach().cpu().numpy()
31 |         )
32 | 


--------------------------------------------------------------------------------
/tests/modules/seq2vec_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common import Params
 4 | from allennlp.common.checks import ConfigurationError
 5 | from allennlp.modules import Seq2VecEncoder
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | 
 8 | 
 9 | class TestSeq2VecEncoder(AllenNlpTestCase):
10 |     def test_from_params_builders_encoder_correctly(self):
11 |         # We're just making sure parameters get passed through correctly here, and that the basic
12 |         # API works.
13 |         params = Params(
14 |             {
15 |                 "type": "lstm",
16 |                 "bidirectional": True,
17 |                 "num_layers": 3,
18 |                 "input_size": 5,
19 |                 "hidden_size": 7,
20 |             }
21 |         )
22 |         encoder = Seq2VecEncoder.from_params(params)
23 | 
24 |         assert encoder.__class__.__name__ == "LstmSeq2VecEncoder"
25 |         assert encoder._module.__class__.__name__ == "LSTM"
26 |         assert encoder._module.num_layers == 3
27 |         assert encoder._module.input_size == 5
28 |         assert encoder._module.hidden_size == 7
29 |         assert encoder._module.bidirectional is True
30 |         assert encoder._module.batch_first is True
31 | 
32 |     def test_from_params_requires_batch_first(self):
33 |         params = Params({"type": "lstm", "batch_first": False})
34 |         with pytest.raises(ConfigurationError):
35 |             Seq2VecEncoder.from_params(params)
36 | 


--------------------------------------------------------------------------------
/tests/modules/seq2vec_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/seq2vec_encoders/__init__.py


--------------------------------------------------------------------------------
/tests/modules/seq2vec_encoders/bert_pooler_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import torch
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules.seq2vec_encoders import BertPooler
 6 | 
 7 | 
 8 | class TestBertPooler(AllenNlpTestCase):
 9 |     def test_encoder(self):
10 |         encoder = BertPooler("bert-base-uncased")
11 |         assert encoder.get_input_dim() == encoder.get_output_dim()
12 |         embedding = torch.rand(8, 24, encoder.get_input_dim())
13 | 
14 |         pooled1 = encoder(embedding)
15 |         assert pooled1.size() == (8, encoder.get_input_dim())
16 | 
17 |         embedding[:, 1:, :] = 0
18 |         pooled2 = encoder(embedding)
19 |         numpy.testing.assert_array_almost_equal(pooled1.detach().numpy(), pooled2.detach().numpy())
20 | 


--------------------------------------------------------------------------------
/tests/modules/seq2vec_encoders/cls_pooler_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import torch
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules.seq2vec_encoders.cls_pooler import ClsPooler
 6 | 
 7 | 
 8 | class TestClsPooler(AllenNlpTestCase):
 9 |     def test_encoder(self):
10 |         embedding = torch.rand(5, 50, 7)
11 |         encoder = ClsPooler(embedding_dim=7)
12 |         pooled = encoder(embedding, mask=None)
13 | 
14 |         assert list(pooled.size()) == [5, 7]
15 |         numpy.testing.assert_array_almost_equal(embedding[:, 0], pooled)
16 | 
17 |     def test_cls_at_end(self):
18 |         embedding = torch.arange(20).reshape(5, 4).unsqueeze(-1).expand(5, 4, 7)
19 |         mask = torch.tensor(
20 |             [
21 |                 [True, True, True, True],
22 |                 [True, True, True, False],
23 |                 [True, True, True, True],
24 |                 [True, False, False, False],
25 |                 [True, True, False, False],
26 |             ]
27 |         )
28 |         expected = torch.LongTensor([3, 6, 11, 12, 17]).unsqueeze(-1).expand(5, 7)
29 | 
30 |         encoder = ClsPooler(embedding_dim=7, cls_is_last_token=True)
31 |         pooled = encoder(embedding, mask=mask)
32 | 
33 |         assert list(pooled.size()) == [5, 7]
34 |         numpy.testing.assert_array_almost_equal(expected, pooled)
35 | 


--------------------------------------------------------------------------------
/tests/modules/seq2vec_encoders/cnn_highway_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | from allennlp.modules.seq2vec_encoders.cnn_highway_encoder import CnnHighwayEncoder
 6 | from allennlp.modules.time_distributed import TimeDistributed
 7 | 
 8 | 
 9 | class TestCnnHighwayEncoder(AllenNlpTestCase):
10 |     def run_encoder_against_random_embeddings(self, do_layer_norm):
11 |         encoder = CnnHighwayEncoder(
12 |             activation="relu",
13 |             embedding_dim=4,
14 |             filters=[[1, 4], [2, 8], [3, 16], [4, 32], [5, 64]],
15 |             num_highway=2,
16 |             projection_dim=16,
17 |             projection_location="after_cnn",
18 |             do_layer_norm=do_layer_norm,
19 |         )
20 |         encoder = TimeDistributed(encoder)
21 | 
22 |         embedding = torch.from_numpy(np.random.randn(5, 6, 50, 4)).float()
23 |         mask = torch.ones(5, 6, 50).bool()
24 |         token_embedding = encoder(embedding, mask)
25 | 
26 |         assert list(token_embedding.size()) == [5, 6, 16]
27 | 
28 |     def test_cnn_highway_encoder(self):
29 |         self.run_encoder_against_random_embeddings(do_layer_norm=False)
30 | 
31 |     def test_cnn_highway_encoder_with_layer_norm(self):
32 |         self.run_encoder_against_random_embeddings(do_layer_norm=True)
33 | 


--------------------------------------------------------------------------------
/tests/modules/span_extractors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/span_extractors/__init__.py


--------------------------------------------------------------------------------
/tests/modules/stacked_alternating_lstm_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import torch
 3 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 4 | 
 5 | from allennlp.modules.stacked_alternating_lstm import StackedAlternatingLstm
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | 
 8 | 
 9 | class TestStackedAlternatingLstm(AllenNlpTestCase):
10 |     def test_stacked_alternating_lstm_completes_forward_pass(self):
11 |         input_tensor = torch.rand(4, 5, 3)
12 |         input_tensor[1, 4:, :] = 0.0
13 |         input_tensor[2, 2:, :] = 0.0
14 |         input_tensor[3, 1:, :] = 0.0
15 |         input_tensor = pack_padded_sequence(input_tensor, [5, 4, 2, 1], batch_first=True)
16 |         lstm = StackedAlternatingLstm(3, 7, 3)
17 |         output, _ = lstm(input_tensor)
18 |         output_sequence, _ = pad_packed_sequence(output, batch_first=True)
19 |         numpy.testing.assert_array_equal(output_sequence.data[1, 4:, :].numpy(), 0.0)
20 |         numpy.testing.assert_array_equal(output_sequence.data[2, 2:, :].numpy(), 0.0)
21 |         numpy.testing.assert_array_equal(output_sequence.data[3, 1:, :].numpy(), 0.0)
22 | 
23 |     def test_lstms_are_interleaved(self):
24 |         lstm = StackedAlternatingLstm(3, 7, 8)
25 |         for i, layer in enumerate(lstm.lstm_layers):
26 |             if i % 2 == 0:
27 |                 assert layer.go_forward
28 |             else:
29 |                 assert not layer.go_forward
30 | 


--------------------------------------------------------------------------------
/tests/modules/stacked_elmo_lstm_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import torch
 3 | 
 4 | from allennlp.modules.elmo_lstm import ElmoLstm
 5 | from allennlp.common.testing import AllenNlpTestCase
 6 | 
 7 | 
 8 | class TestElmoLstmCell(AllenNlpTestCase):
 9 |     def test_elmo_lstm(self):
10 |         input_tensor = torch.rand(4, 5, 3)
11 |         input_tensor[1, 4:, :] = 0.0
12 |         input_tensor[2, 2:, :] = 0.0
13 |         input_tensor[3, 1:, :] = 0.0
14 |         mask = torch.ones([4, 5]).bool()
15 |         mask[1, 4:] = False
16 |         mask[2, 2:] = False
17 |         mask[3, 1:] = False
18 | 
19 |         lstm = ElmoLstm(
20 |             num_layers=2,
21 |             input_size=3,
22 |             hidden_size=5,
23 |             cell_size=7,
24 |             memory_cell_clip_value=2,
25 |             state_projection_clip_value=1,
26 |         )
27 |         output_sequence = lstm(input_tensor, mask)
28 | 
29 |         # Check all the layer outputs are masked properly.
30 |         numpy.testing.assert_array_equal(output_sequence.data[:, 1, 4:, :].numpy(), 0.0)
31 |         numpy.testing.assert_array_equal(output_sequence.data[:, 2, 2:, :].numpy(), 0.0)
32 |         numpy.testing.assert_array_equal(output_sequence.data[:, 3, 1:, :].numpy(), 0.0)
33 | 
34 |         # LSTM state should be (num_layers, batch_size, hidden_size)
35 |         assert list(lstm._states[0].size()) == [2, 4, 10]
36 |         # LSTM memory cell should be (num_layers, batch_size, cell_size)
37 |         assert list((lstm._states[1].size())) == [2, 4, 14]
38 | 


--------------------------------------------------------------------------------
/tests/modules/text_field_embedders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/text_field_embedders/__init__.py


--------------------------------------------------------------------------------
/tests/modules/token_embedders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/token_embedders/__init__.py


--------------------------------------------------------------------------------
/tests/modules/token_embedders/pass_through_embedder_test.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import torch
 3 | from allennlp.modules.token_embedders import PassThroughTokenEmbedder
 4 | from allennlp.common.testing import AllenNlpTestCase
 5 | 
 6 | 
 7 | class TestBagOfWordCountsTokenEmbedder(AllenNlpTestCase):
 8 |     def test_pass_through_embedder(self):
 9 |         embedder = PassThroughTokenEmbedder(3)
10 |         tensor = torch.randn([4, 3])
11 |         numpy.testing.assert_equal(tensor.numpy(), embedder(tensor).numpy())
12 |         assert embedder.get_output_dim() == 3
13 | 


--------------------------------------------------------------------------------
/tests/modules/transformer/activation_layer_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pytest
 3 | 
 4 | from allennlp.common import Params
 5 | from allennlp.modules.transformer import ActivationLayer
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def params_dict():
10 |     return {
11 |         "hidden_size": 5,
12 |         "intermediate_size": 3,
13 |         "activation": "relu",
14 |     }
15 | 
16 | 
17 | @pytest.fixture
18 | def params(params_dict):
19 |     return Params(params_dict)
20 | 
21 | 
22 | @pytest.fixture
23 | def activation_layer(params):
24 |     return ActivationLayer.from_params(params.duplicate())
25 | 
26 | 
27 | def test_can_construct_from_params(activation_layer, params_dict):
28 |     activation_layer = activation_layer
29 |     assert activation_layer.dense.in_features == params_dict["hidden_size"]
30 |     assert activation_layer.dense.out_features == params_dict["intermediate_size"]
31 | 
32 | 
33 | def test_forward_runs(activation_layer):
34 |     activation_layer.forward(torch.randn(7, 5))
35 | 


--------------------------------------------------------------------------------
/tests/modules/transformer/output_layer_test.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import torch
 3 | 
 4 | from allennlp.common import Params
 5 | from allennlp.modules.transformer import OutputLayer
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | 
 8 | 
 9 | class TestOutputLayer(AllenNlpTestCase):
10 |     def setup_method(self):
11 |         super().setup_method()
12 | 
13 |         self.params_dict = {
14 |             "input_size": 3,
15 |             "hidden_size": 5,
16 |             "dropout": 0.1,
17 |         }
18 | 
19 |         params = Params(copy.deepcopy(self.params_dict))
20 | 
21 |         self.output_layer = OutputLayer.from_params(params)
22 | 
23 |     def test_can_construct_from_params(self):
24 | 
25 |         output_layer = self.output_layer
26 | 
27 |         assert output_layer.dense.in_features == self.params_dict["input_size"]
28 |         assert output_layer.dense.out_features == self.params_dict["hidden_size"]
29 | 
30 |         assert output_layer.layer_norm.normalized_shape[0] == self.params_dict["hidden_size"]
31 | 
32 |         assert output_layer.dropout.p == self.params_dict["dropout"]
33 | 
34 |     def test_forward_runs(self):
35 | 
36 |         self.output_layer.forward(torch.randn(3, 3), torch.randn(3, 5))
37 | 


--------------------------------------------------------------------------------
/tests/modules/transformer/transformer_pooler_test.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import torch
 3 | 
 4 | from allennlp.common import Params
 5 | from allennlp.modules.transformer import TransformerPooler
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | 
 8 | 
 9 | class TestTransformerPooler(AllenNlpTestCase):
10 |     def setup_method(self):
11 |         super().setup_method()
12 | 
13 |         self.params_dict = {
14 |             "hidden_size": 5,
15 |             "intermediate_size": 3,
16 |         }
17 | 
18 |         params = Params(copy.deepcopy(self.params_dict))
19 | 
20 |         self.pooler = TransformerPooler.from_params(params)
21 | 
22 |     def test_can_construct_from_params(self):
23 | 
24 |         assert self.pooler.dense.in_features == self.params_dict["hidden_size"]
25 |         assert self.pooler.dense.out_features == self.params_dict["intermediate_size"]
26 | 
27 |     def test_forward_runs(self):
28 | 
29 |         out = self.pooler.forward(torch.randn(2, 7, 5))
30 |         assert out.size() == (2, 3)
31 | 


--------------------------------------------------------------------------------
/tests/modules/vision/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/modules/vision/__init__.py


--------------------------------------------------------------------------------
/tests/modules/vision/grid_embedder_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.common.testing import AllenNlpTestCase, requires_gpu
 2 | from allennlp.data.image_loader import TorchImageLoader
 3 | from allennlp.modules.vision.grid_embedder import ResnetBackbone
 4 | 
 5 | 
 6 | class TestResnetBackbone(AllenNlpTestCase):
 7 |     @requires_gpu
 8 |     def test_forward_runs(self):
 9 |         loader = TorchImageLoader(device="cuda:0")
10 |         backbone = ResnetBackbone().to("cuda:0")
11 | 
12 |         image_pixels, image_size = loader(
13 |             [self.FIXTURES_ROOT / "data" / "images" / "COCO_train2014_000000458752.jpg"]
14 |         )
15 |         result = backbone(image_pixels, image_size)
16 |         assert tuple(result.keys()) == backbone.get_feature_names()
17 | 


--------------------------------------------------------------------------------
/tests/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/nn/__init__.py


--------------------------------------------------------------------------------
/tests/nn/checkpoint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/nn/checkpoint/__init__.py


--------------------------------------------------------------------------------
/tests/nn/parallel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/nn/parallel/__init__.py


--------------------------------------------------------------------------------
/tests/predictors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/predictors/__init__.py


--------------------------------------------------------------------------------
/tests/predictors/sentence_tagger_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.common.testing import AllenNlpTestCase
 2 | from allennlp.models.archival import load_archive
 3 | from allennlp.predictors import Predictor
 4 | 
 5 | 
 6 | class TestSentenceTaggerPredictor(AllenNlpTestCase):
 7 |     def test_predictions_to_labeled_instances(self):
 8 |         inputs = {"sentence": "Eric Wallace was an intern at AI2"}
 9 | 
10 |         archive = load_archive(
11 |             self.FIXTURES_ROOT / "simple_tagger" / "serialization" / "model.tar.gz"
12 |         )
13 |         predictor = Predictor.from_archive(archive, "sentence_tagger")
14 | 
15 |         instance = predictor._json_to_instance(inputs)
16 |         predictor._dataset_reader.apply_token_indexers(instance)
17 |         outputs = predictor._model.forward_on_instance(instance)
18 |         new_instances = predictor.predictions_to_labeled_instances(instance, outputs)
19 |         assert len(new_instances) > 1
20 |         for new_instance in new_instances:
21 |             assert "tags" in new_instance
22 |             assert len(new_instance["tags"]) == 7  # 7 words in input
23 | 


--------------------------------------------------------------------------------
/tests/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/training/__init__.py


--------------------------------------------------------------------------------
/tests/training/learning_rate_schedulers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/training/learning_rate_schedulers/__init__.py


--------------------------------------------------------------------------------
/tests/training/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/training/metrics/__init__.py


--------------------------------------------------------------------------------
/tests/training/metrics/average_test.py:
--------------------------------------------------------------------------------
 1 | from allennlp.common.testing import (
 2 |     AllenNlpTestCase,
 3 |     multi_device,
 4 |     run_distributed_test,
 5 |     global_distributed_metric,
 6 | )
 7 | from allennlp.training.metrics import Average
 8 | 
 9 | 
10 | class AverageTest(AllenNlpTestCase):
11 |     def setup_method(self):
12 |         super().setup_method()
13 |         self.metric = Average()
14 | 
15 |     @multi_device
16 |     def test_distributed_average(self, device: str):
17 |         device_ids = [-1, -1] if device == "cpu" else [0, 1]
18 |         metric_kwargs = {
19 |             "value": [1.0, 2.0],
20 |         }
21 |         run_distributed_test(
22 |             device_ids,
23 |             global_distributed_metric,
24 |             self.metric,
25 |             metric_kwargs,
26 |             1.5,
27 |             exact=True,
28 |         )
29 | 


--------------------------------------------------------------------------------
/tests/training/momentum_schedulers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/training/momentum_schedulers/__init__.py


--------------------------------------------------------------------------------
/tests/training/no_op_trainer_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict
 3 | 
 4 | import torch
 5 | 
 6 | from allennlp.common.testing import AllenNlpTestCase
 7 | from allennlp.data import Vocabulary
 8 | from allennlp.data.dataset_readers import SequenceTaggingDatasetReader
 9 | from allennlp.models.model import Model
10 | from allennlp.training import NoOpTrainer
11 | 
12 | 
13 | class ConstantModel(Model):
14 |     def forward(self, *inputs) -> Dict[str, torch.Tensor]:
15 |         return {"class": torch.tensor(98)}
16 | 
17 | 
18 | class TestNoOpTrainer(AllenNlpTestCase):
19 |     def setup_method(self):
20 |         super().setup_method()
21 |         self.instances = SequenceTaggingDatasetReader().read(
22 |             self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
23 |         )
24 |         vocab = Vocabulary.from_instances(self.instances)
25 |         self.vocab = vocab
26 |         self.model = ConstantModel(vocab)
27 | 
28 |     def test_trainer_serializes(self):
29 |         serialization_dir = self.TEST_DIR / "serialization_dir"
30 |         trainer = NoOpTrainer(serialization_dir=serialization_dir, model=self.model)
31 |         metrics = trainer.train()
32 |         assert metrics == {}
33 |         assert os.path.exists(serialization_dir / "best.th")
34 |         assert os.path.exists(serialization_dir / "vocabulary")
35 | 


--------------------------------------------------------------------------------
/tests/tutorials/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/tutorials/__init__.py


--------------------------------------------------------------------------------
/tests/tutorials/tagger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/allennlp/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/tests/tutorials/tagger/__init__.py


--------------------------------------------------------------------------------
/tests/tutorials/tagger/basic_allennlp_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from allennlp.common.testing import AllenNlpTestCase
 4 | 
 5 | 
 6 | @pytest.mark.skip("makes test-install fail (and also takes 30 seconds)")
 7 | class TestBasicAllenNlp(AllenNlpTestCase):
 8 |     @classmethod
 9 |     def test_run_as_script(cls):
10 |         # Just ensure the tutorial runs without throwing an exception.
11 | 
12 |         import tutorials.tagger.basic_allennlp  # noqa
13 | 


--------------------------------------------------------------------------------
/tests/version_test.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import pytest
 4 | 
 5 | from allennlp.version import VERSION
 6 | 
 7 | 
 8 | # Regex to check that the current version set in `allennlp.version` adheres to
 9 | # PEP 440, as well as some of our own internal conventions, such as the `.dev`
10 | # suffix being used only for nightly builds.
11 | # 0.0.0rc0.post0.dev20200424
12 | VALID_VERSION_RE = re.compile(
13 |     r"^"
14 |     r"(0|[1-9]\d*)"  # major
15 |     r"\.(0|[1-9]\d*)"  # minor
16 |     r"\.(0|[1-9]\d*)"  # patch
17 |     r"(rc(0|[1-9]\d*))?"  # patch suffix
18 |     r"(\.post(0|[1-9]\d*))?"  # [.postN]
19 |     r"(\.dev2020[0-9]{4})?"  # [.devDATE]
20 |     r"$"
21 | )
22 | 
23 | 
24 | def is_valid(version: str) -> bool:
25 |     return VALID_VERSION_RE.match(version) is not None
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     "version, valid",
30 |     [
31 |         # Valid versions:
32 |         ("1.0.0", True),
33 |         ("1.0.0rc3", True),
34 |         ("1.0.0.post0", True),
35 |         ("1.0.0.post1", True),
36 |         ("1.0.0rc3.post0", True),
37 |         ("1.0.0rc3.post0.dev20200424", True),
38 |         # Invalid versions:
39 |         ("1.0.0.rc3", False),
40 |         ("1.0.0rc01", False),
41 |         ("1.0.0rc3.dev2020424", False),
42 |     ],
43 | )
44 | def test_is_valid_helper(version: str, valid: bool):
45 |     assert is_valid(version) is valid
46 | 
47 | 
48 | def test_version():
49 |     """
50 |     Ensures current version is consistent with our conventions.
51 |     """
52 |     assert is_valid(VERSION)
53 | 


--------------------------------------------------------------------------------