├── .gitignore ├── .pylintrc ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── build_tools ├── circle │ ├── build_doc.sh │ └── install_doc_requirements.sh └── travis │ ├── after_success.sh │ ├── install.sh │ └── test_script.sh ├── circle.yml ├── codecov.yml ├── deep_qa ├── README.md ├── __init__.py ├── common │ ├── __init__.py │ ├── checks.py │ ├── models.py │ ├── params.py │ ├── tee_logger.py │ └── util.py ├── data │ ├── README.md │ ├── __init__.py │ ├── data_generator.py │ ├── data_indexer.py │ ├── dataset_readers │ │ ├── __init__.py │ │ └── squad_sentence_selection_reader.py │ ├── datasets │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── entailment │ │ │ ├── __init__.py │ │ │ └── snli_dataset.py │ │ └── language_modeling │ │ │ ├── __init__.py │ │ │ └── language_modeling_dataset.py │ ├── embeddings.py │ ├── instances │ │ ├── README.md │ │ ├── __init__.py │ │ ├── entailment │ │ │ ├── __init__.py │ │ │ ├── sentence_pair_instance.py │ │ │ └── snli_instance.py │ │ ├── instance.py │ │ ├── language_modeling │ │ │ ├── __init__.py │ │ │ └── sentence_instance.py │ │ ├── reading_comprehension │ │ │ ├── __init__.py │ │ │ ├── character_span_instance.py │ │ │ ├── mc_question_passage_instance.py │ │ │ └── question_passage_instance.py │ │ ├── sequence_tagging │ │ │ ├── __init__.py │ │ │ ├── pretokenized_tagging_instance.py │ │ │ └── tagging_instance.py │ │ └── text_classification │ │ │ ├── __init__.py │ │ │ └── text_classification_instance.py │ └── tokenizers │ │ ├── __init__.py │ │ ├── character_tokenizer.py │ │ ├── tokenizer.py │ │ ├── word_and_character_tokenizer.py │ │ ├── word_filter.py │ │ ├── word_processor.py │ │ ├── word_splitter.py │ │ ├── word_stemmer.py │ │ └── word_tokenizer.py ├── layers │ ├── README.md │ ├── __init__.py │ ├── additive.py │ ├── attention │ │ ├── README.md │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── gated_attention.py │ │ ├── masked_softmax.py │ │ ├── matrix_attention.py │ │ ├── max_similarity_softmax.py │ │ └── weighted_sum.py │ ├── backend │ │ ├── README.md │ │ ├── __init__.py │ │ ├── add_mask.py │ │ ├── batch_dot.py │ │ ├── collapse_to_batch.py │ │ ├── envelope.py │ │ ├── expand_from_batch.py │ │ ├── max.py │ │ ├── multiply.py │ │ ├── permute.py │ │ ├── repeat.py │ │ ├── repeat_like.py │ │ ├── replace_masked_values.py │ │ └── squeeze.py │ ├── bigru_index_selector.py │ ├── complex_concat.py │ ├── encoders │ │ ├── __init__.py │ │ ├── attentive_gru.py │ │ ├── bag_of_words.py │ │ ├── convolutional_encoder.py │ │ ├── positional_encoder.py │ │ └── shareable_gru.py │ ├── entailment_models │ │ ├── __init__.py │ │ ├── decomposable_attention.py │ │ ├── multiple_choice_tuple_entailment.py │ │ └── word_alignment.py │ ├── highway.py │ ├── l1_normalize.py │ ├── masked_layer.py │ ├── noisy_or.py │ ├── option_attention_sum.py │ ├── overlap.py │ ├── subtract_minimum.py │ ├── vector_matrix_merge.py │ ├── vector_matrix_split.py │ └── wrappers │ │ ├── __init__.py │ │ ├── add_encoder_mask.py │ │ ├── encoder_wrapper.py │ │ ├── output_mask.py │ │ └── time_distributed.py ├── models │ ├── README.md │ ├── __init__.py │ ├── entailment │ │ ├── README.md │ │ ├── __init__.py │ │ └── decomposable_attention.py │ ├── reading_comprehension │ │ ├── __init__.py │ │ ├── attention_sum_reader.py │ │ ├── bidirectional_attention.py │ │ └── gated_attention_reader.py │ ├── sequence_tagging │ │ ├── README.md │ │ ├── __init__.py │ │ └── simple_tagger.py │ └── text_classification │ │ ├── README.md │ │ ├── __init__.py │ │ └── classification_model.py ├── run.py ├── tensors │ ├── README.md │ ├── __init__.py │ ├── backend.py │ ├── masked_operations.py │ └── similarity_functions │ │ ├── README.md │ │ ├── __init__.py │ │ ├── bilinear.py │ │ ├── cosine_similarity.py │ │ ├── dot_product.py │ │ ├── linear.py │ │ └── similarity_function.py ├── testing │ ├── __init__.py │ └── test_case.py └── training │ ├── README.md │ ├── __init__.py │ ├── losses.py │ ├── models.py │ ├── multi_gpu.py │ ├── optimizers.py │ ├── step.py │ ├── text_trainer.py │ ├── train_utils.py │ └── trainer.py ├── doc ├── Makefile ├── _static │ └── custom.css ├── _templates │ └── layout.html ├── common │ ├── about_common.rst │ ├── checks.rst │ └── params.rst ├── conf.py ├── data │ ├── about_data.rst │ ├── data_generator.rst │ ├── datasets.rst │ ├── entailment.rst │ ├── general_data_utils.rst │ ├── instances.rst │ ├── reading_comprehension.rst │ ├── sequence_tagging.rst │ ├── text_classification.rst │ └── tokenizers.rst ├── img │ └── module_breakdown.png ├── index.rst ├── layers │ ├── about_layers.rst │ ├── attention.rst │ ├── backend.rst │ ├── core_layers.rst │ ├── encoders.rst │ ├── entailment_models.rst │ └── wrappers.rst ├── models │ ├── about_models.rst │ ├── entailment.rst │ ├── reading_comprehension.rst │ └── text_classification.rst ├── run.rst ├── tensors │ ├── about_tensors.rst │ ├── core_tensors.rst │ └── similarity_functions.rst └── training │ ├── about_trainers.rst │ ├── misc.rst │ ├── multi_gpu.rst │ ├── text_trainer.rst │ └── trainer.rst ├── example_experiments ├── entailment │ └── snli_decomposable_attention.json ├── reading_comprehension │ ├── asreader_who_did_what.json │ ├── bidaf_squad.json │ └── gareader_who_did_what.json └── sequence_tagging │ └── simple_tagger.json ├── pytest.ini ├── requirements.txt ├── scripts ├── clean_newsqa.py ├── clean_raw_omnibus.py ├── install_requirements.sh ├── pylint.sh ├── run_ensemble.py ├── run_model.py └── set_processor.sh ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── common ├── __init__.py ├── pythonhashseed_test.py └── test_util.py ├── data ├── __init__.py ├── data_generator_test.py ├── data_indexer_test.py ├── dataset_readers │ ├── __init__.py │ └── squad_sentence_selection_reader_test.py ├── datasets │ ├── __init__.py │ ├── dataset_test.py │ ├── language_modeling_dataset_test.py │ └── snli_dataset_test.py ├── embeddings_test.py ├── instances │ ├── __init__.py │ ├── entailment │ │ ├── __init__.py │ │ ├── sentence_pair_instance_test.py │ │ └── snli_instance_test.py │ ├── language_modeling │ │ ├── __init__.py │ │ └── sentence_instance_test.py │ ├── reading_comprehension │ │ ├── __init__.py │ │ ├── character_span_instance_test.py │ │ └── mc_question_passage_instance_test.py │ ├── sequence_tagging │ │ ├── __init__.py │ │ ├── pretokenized_tagging_instance_test.py │ │ └── test_tagging_instance.py │ ├── text_classification │ │ ├── __init__.py │ │ └── text_classification_instance_test.py │ └── text_instance_test.py └── tokenizers │ ├── tokenizer_test.py │ ├── word_processor_test.py │ └── word_splitter_test.py ├── example_experiments_test.py ├── layers ├── __init__.py ├── attention │ ├── __init__.py │ ├── attention_test.py │ ├── gated_attention_test.py │ ├── masked_softmax_test.py │ ├── matrix_attention_test.py │ └── weighted_sum_test.py ├── attentive_gru_test.py ├── backend │ ├── __init__.py │ ├── batch_dot_test.py │ ├── collapse_and_expand_test.py │ ├── envelope_test.py │ ├── max_test.py │ ├── multiply_test.py │ ├── permute_test.py │ ├── repeat_like_test.py │ ├── repeat_test.py │ └── replace_masked_values_test.py ├── bigru_index_selector_test.py ├── complex_concat_test.py ├── decomposable_attention_test.py ├── encoders │ ├── __init__.py │ └── bow_encoder_test.py ├── noisy_or_test.py ├── overlap_test.py ├── positional_encoder_test.py ├── test_l1_normalize.py ├── test_option_attention_sum.py ├── test_subtract_minimum.py ├── tuple_alignment_test.py ├── vector_matrix_merge_test.py ├── vector_matrix_split_test.py └── wrappers │ ├── __init__.py │ ├── add_encoder_mask_test.py │ ├── encoder_wrapper_test.py │ └── time_distributed_test.py ├── models ├── __init__.py ├── entailment │ ├── __init__.py │ └── decomposable_attention_test.py ├── reading_comprehension │ ├── __init__.py │ ├── attention_sum_reader_test.py │ ├── bidirectional_attention_test.py │ └── gated_attention_reader_test.py └── sequence_tagging │ ├── __init__.py │ └── simple_tagger_test.py ├── run_test.py ├── tensors ├── __init__.py ├── backend_test.py ├── masked_operations_test.py └── similarity_functions │ ├── __init__.py │ ├── bilinear_test.py │ ├── cosine_similarity_test.py │ ├── dot_product_test.py │ └── linear_test.py └── training ├── __init__.py ├── losses_test.py ├── multi_gpu_test.py ├── text_trainer_test.py └── train_utils_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /data/ 3 | /models/ 4 | /experiments/ 5 | .cache/ 6 | .coverage 7 | _build 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # use new container-based travis workers 2 | sudo: false 3 | dist: trusty 4 | 5 | language: python 6 | 7 | cache: 8 | directories: 9 | - /home/travis/nltk_data 10 | - /home/travis/download 11 | - /home/travis/miniconda3 12 | - /home/travis/.cache/pip 13 | 14 | env: 15 | matrix: 16 | # run tests, submit coverage info 17 | - COVERAGE="true" 18 | # pylint checks skip the tests 19 | - RUN_PYLINT="true" SKIP_TESTS="true" 20 | 21 | notifications: 22 | email: false 23 | 24 | install: source build_tools/travis/install.sh 25 | script: bash build_tools/travis/test_script.sh 26 | after_success: source build_tools/travis/after_success.sh 27 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include requirements.txt 3 | include scripts/* 4 | include example_experiments/*.json 5 | recursive-exclude tests * 6 | -------------------------------------------------------------------------------- /build_tools/circle/build_doc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | set -e 4 | 5 | MAKE_TARGET=html-strict 6 | 7 | source activate testenv 8 | 9 | # The pipefail is requested to propagate exit code 10 | set -o pipefail && cd doc && make $MAKE_TARGET 2>&1 | tee ~/log.txt 11 | 12 | echo "Finished building docs." 13 | echo "Artifacts in $CIRCLE_ARTIFACTS" 14 | -------------------------------------------------------------------------------- /build_tools/circle/install_doc_requirements.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | set -e 4 | 5 | # Installing required system packages to support the rendering of math 6 | # notation in the HTML documentation 7 | rm -rf ~/.pyenv && rm -rf ~/virtualenvs 8 | sudo -E apt-get -yq remove texlive-binaries --purge 9 | sudo -E apt-get -yq update 10 | sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes \ 11 | install dvipng texlive-latex-base texlive-latex-extra \ 12 | texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended 13 | 14 | # deactivate circleci virtualenv and setup a miniconda env instead 15 | if [[ `type -t deactivate` ]]; then 16 | deactivate 17 | fi 18 | 19 | # Install dependencies with miniconda 20 | pushd . 21 | cd 22 | mkdir -p download 23 | cd download 24 | echo "Cached in $HOME/download :" 25 | ls -l 26 | if [[ ! -f miniconda.sh ]] 27 | then 28 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh \ 29 | -O miniconda.sh 30 | fi 31 | chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH 32 | cd .. 33 | export PATH="$MINICONDA_PATH/bin:$PATH" 34 | conda update --yes --quiet conda 35 | popd 36 | 37 | # Configure the conda environment and put it in the path using the 38 | # provided versions. 39 | conda create -n $CONDA_ENV_NAME --yes --quiet python=3.5.2 40 | source activate testenv 41 | 42 | # Install pip dependencies. 43 | pip install --quiet -r requirements.txt 44 | -------------------------------------------------------------------------------- /build_tools/travis/after_success.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called by the "after_success" step defined in 3 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 4 | 5 | set -e 6 | 7 | if [[ "$COVERAGE" == "true" ]]; then 8 | # Ignore codecov failures as the codecov server is not 9 | # very reliable but we don't want travis to report a failure 10 | # in the github UI just because the coverage report failed to 11 | # be published. 12 | codecov || echo "codecov upload failed" 13 | fi 14 | -------------------------------------------------------------------------------- /build_tools/travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo 'List files from cached directories' 5 | echo 'pip:' 6 | ls $HOME/.cache/pip 7 | 8 | # Deactivate the travis-provided virtual environment and setup a 9 | # conda-based environment instead 10 | deactivate 11 | 12 | # Add the miniconda bin directory to $PATH 13 | export PATH=/home/travis/miniconda3/bin:$PATH 14 | echo $PATH 15 | 16 | # Use the miniconda installer for setup of conda itself 17 | pushd . 18 | cd 19 | mkdir -p download 20 | cd download 21 | if [[ ! -f /home/travis/miniconda3/bin/activate ]] 22 | then 23 | if [[ ! -f miniconda.sh ]] 24 | then 25 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 26 | -O miniconda.sh 27 | fi 28 | chmod +x miniconda.sh && ./miniconda.sh -b -f 29 | conda update --yes conda 30 | # If we are running pylint, use Python 3.5.2 due to 31 | # bug in pylint. https://github.com/PyCQA/pylint/issues/1295 32 | conda create -n testenv352 --yes python=3.5.2 33 | conda create -n testenv --yes python=3.5 34 | fi 35 | cd .. 36 | popd 37 | 38 | # Activate the python environment we created. 39 | if [[ "$RUN_PYLINT" == "true" ]]; then 40 | source activate testenv352 41 | else 42 | source activate testenv 43 | fi 44 | 45 | # Install requirements via pip in our conda environment 46 | pip install -U -r requirements.txt 47 | 48 | # List the packages to get their versions for debugging 49 | pip list 50 | 51 | # Install punkt tokenizer 52 | python -m nltk.downloader punkt 53 | 54 | # Install spacy data 55 | python -m spacy.en.download all 56 | -------------------------------------------------------------------------------- /build_tools/travis/test_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called by the "script" step defined in 3 | # .travis.yml. See http://docs.travis-ci.com/ for more details. 4 | # The behavior of the script is controlled by environment variabled defined 5 | # in the .travis.yml in the top level folder of the project. 6 | 7 | # License: 3-clause BSD 8 | 9 | set -e 10 | 11 | python --version 12 | 13 | export PYTHONHASHSEED=2157 14 | 15 | run_tests() { 16 | KERAS_BACKEND=tensorflow py.test -v --cov=deep_qa --durations=20 17 | } 18 | 19 | if [[ "$RUN_PYLINT" == "true" ]]; then 20 | source scripts/pylint.sh 21 | fi 22 | 23 | if [[ "$SKIP_TESTS" != "true" ]]; then 24 | run_tests 25 | fi 26 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | environment: 3 | MINICONDA_PATH: $HOME/miniconda 4 | CONDA_ENV_NAME: testenv 5 | PATH: $MINICONDA_PATH/bin:$PATH 6 | 7 | dependencies: 8 | cache_directories: 9 | - "~/download" 10 | # Check whether the doc build is required, install build dependencies and 11 | # run sphinx to build the doc. 12 | override: 13 | - bash build_tools/circle/install_doc_requirements.sh 14 | - bash build_tools/circle/build_doc.sh 15 | 16 | test: 17 | override: 18 | - | 19 | export PATH="$MINICONDA_PATH/bin:$PATH" 20 | source activate $CONDA_ENV_NAME 21 | # we could run doctests here in the future 22 | # make doctests 23 | general: 24 | # Open the built docs to the CircleCI API 25 | artifacts: 26 | - "doc/_build/html" 27 | - "~/log.txt" -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | precision: 0 3 | round: down 4 | status: 5 | patch: 6 | default: 7 | target: 90 8 | project: 9 | default: 10 | threshold: 1% 11 | changes: false 12 | comment: false 13 | ignore: 14 | - "deep_qa/contrib" 15 | -------------------------------------------------------------------------------- /deep_qa/README.md: -------------------------------------------------------------------------------- 1 | # DeepQA 2 | 3 | DeepQA is organised into the following main sections: 4 | 5 | - `common`: Code for parameter parsing, logging and runtime checks. 6 | - `data`: Indexing, padding, tokenisation, stemming, embedding and general dataset manipulation 7 | happens here. 8 | - `layers`: The bulk of the library. Use these Layers to compose new models. Some of these Layers 9 | are very similar to what you might find in Keras, but altered slightly to support arbitrary 10 | dimensions or correct masking. 11 | - `models`: Frameworks for different types of task. These generally all extend the TextTrainer 12 | class which provides training capabilities to a DeepQaModel. We have models for Sequence 13 | Tagging, Entailment, Multiple Choice QA, Reading Comprehension and more. Take a look at the READMEs 14 | under `model` for more details - each task typically has a README describing the task definition. 15 | - `tensors`: Convenience functions for writing the internals of Layers. Will almost exclusively be 16 | used inside Layer implementations. 17 | - `training`: This module does the heavy lifting for training and optimisation. We also wrap the 18 | Keras Model class to give it some useful debugging functionality. 19 | 20 | The `data` and `models` sections are, in turn, structured according to what task they are intended 21 | for (e.g., text classification, reading comprehension, sequence tagging, etc.). This should make 22 | it easy to see if something you are trying to do is already implemented in DeepQA or not. 23 | 24 | -------------------------------------------------------------------------------- /deep_qa/__init__.py: -------------------------------------------------------------------------------- 1 | from .run import run_model, evaluate_model, load_model, score_dataset, score_dataset_with_ensemble 2 | from .run import compute_accuracy, run_model_from_file 3 | -------------------------------------------------------------------------------- /deep_qa/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/common/__init__.py -------------------------------------------------------------------------------- /deep_qa/common/checks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | REQUIRED_PYTHONHASHSEED = '2157' 5 | 6 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 7 | 8 | class ConfigurationError(Exception): 9 | def __init__(self, message): 10 | super(ConfigurationError, self).__init__() 11 | self.message = message 12 | 13 | def __str__(self): 14 | return repr(self.message) 15 | 16 | 17 | def ensure_pythonhashseed_set(): 18 | message = """You must set PYTHONHASHSEED to %s so we get repeatable results and tests pass. 19 | You can do this with the command `export PYTHONHASHSEED=%s`. 20 | See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONHASHSEED for more info. 21 | """ 22 | assert os.environ.get('PYTHONHASHSEED', None) == REQUIRED_PYTHONHASHSEED, \ 23 | message % (REQUIRED_PYTHONHASHSEED, REQUIRED_PYTHONHASHSEED) 24 | 25 | 26 | def log_keras_version_info(): 27 | import keras 28 | logger.info("Keras version: " + keras.__version__) 29 | from keras import backend as K 30 | try: 31 | backend = K.backend() 32 | except AttributeError: 33 | backend = K._BACKEND # pylint: disable=protected-access 34 | if backend == 'theano': 35 | import theano 36 | logger.info("Theano version: " + theano.__version__) 37 | logger.warning("Using Keras' theano backend is not supported! Expect to crash...") 38 | elif backend == 'tensorflow': 39 | import tensorflow 40 | logger.info("Tensorflow version: " + tensorflow.__version__) # pylint: disable=no-member 41 | -------------------------------------------------------------------------------- /deep_qa/common/models.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from keras.models import Model 4 | 5 | from ..training.models import DeepQaModel 6 | 7 | 8 | def get_submodel(model: Model, 9 | input_layer_names: List[str], 10 | output_layer_names: List[str], 11 | train_model: bool=False, 12 | name=None): 13 | """ 14 | Returns a new model constructed from ``model``. This model will be a subset of the given 15 | ``Model``, with the inputs specified by ``input_layer_names`` and the outputs specified by 16 | ``output_layer_names``. For example, if the input model is :class:`BiDAF 17 | .models.reading_comprehens.bidirectional_attention.BidirectionalAttentionFlow`, you can use 18 | this to get a model that outputs the passage embedding, just before the span prediction 19 | layers, by calling 20 | ``get_submodel(bidaf.model, ['question_input', 'passage_input'], ['final_merged_passage'])``. 21 | """ 22 | layer_input_dict = {} 23 | layer_output_dict = {} 24 | for layer in model.layers: 25 | layer_input_dict[layer.name] = layer.get_input_at(0) 26 | layer_output_dict[layer.name] = layer.get_output_at(0) 27 | input_layers = [layer_input_dict[name] for name in input_layer_names] 28 | output_layers = [layer_output_dict[name] for name in output_layer_names] 29 | submodel = DeepQaModel(inputs=input_layers, outputs=output_layers, name=name) 30 | if not train_model: 31 | submodel.trainable = False 32 | return submodel 33 | -------------------------------------------------------------------------------- /deep_qa/common/tee_logger.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | 4 | 5 | class TeeLogger: 6 | """ 7 | This class is an attempt to maintain logs of both stdout and stderr for when models are run. 8 | To use this class, at the beginning of your script insert these lines:: 9 | 10 | sys.stdout = TeeLogger("stdout.log", sys.stdout) 11 | sys.stderr = TeeLogger("stdout.log", sys.stderr) 12 | """ 13 | def __init__(self, filename: str, terminal: io.TextIOWrapper): 14 | self.terminal = terminal 15 | parent_directory = os.path.dirname(filename) 16 | os.makedirs(parent_directory, exist_ok=True) 17 | self.log = open(filename, 'a') 18 | 19 | def write(self, message): 20 | self.terminal.write(message) 21 | # We'll special case a particular thing that keras does, to make the log file more 22 | # readable. Keras uses ^H characters to get the training line to update for each batch 23 | # without adding more lines to the terminal output. Displaying those in a file won't work 24 | # correctly, so we'll just make sure that each batch shows up on its own line. 25 | if '\x08' in message: 26 | message = message.replace('\x08', '') 27 | if len(message) == 0 or message[-1] != '\n': 28 | message += '\n' 29 | self.log.write(message) 30 | 31 | def flush(self): 32 | self.terminal.flush() 33 | self.log.flush() 34 | -------------------------------------------------------------------------------- /deep_qa/common/util.py: -------------------------------------------------------------------------------- 1 | from itertools import zip_longest 2 | from typing import Any, Dict, List 3 | import random 4 | 5 | 6 | def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[List[Any]]: 7 | """ 8 | Takes a list and groups it into sublists of size ``count``, using ``default_value`` to pad the 9 | list at the end if the list is not divisable by ``count``. 10 | 11 | For example: 12 | >>> group_by_count([1, 2, 3, 4, 5, 6, 7], 3, 0) 13 | [[1, 2, 3], [4, 5, 6], [7, 0, 0]] 14 | 15 | This is a short method, but it's complicated and hard to remember as a one-liner, so we just 16 | make a function out of it. 17 | """ 18 | return [list(l) for l in zip_longest(*[iter(iterable)] * count, fillvalue=default_value)] 19 | 20 | 21 | def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -> Dict[Any, float]: 22 | """ 23 | Returns a new dictionary with noise added to every key in ``dictionary``. The noise is 24 | uniformly distributed within ``noise_param`` percent of the value for every value in the 25 | dictionary. 26 | """ 27 | new_dict = {} 28 | for key, value in dictionary.items(): 29 | noise_value = value * noise_param 30 | noise = random.uniform(-noise_value, noise_value) 31 | new_dict[key] = value + noise 32 | return new_dict 33 | 34 | 35 | def clean_layer_name(input_name: str, 36 | strip_right_of_last_backslash: bool=True, 37 | strip_numerics_after_underscores: bool=True): 38 | """ 39 | There exist cases when layer names need to be concatenated in order to create new, unique 40 | layer names. However, the indices added to layer names designating the ith output of calling 41 | the layer cannot occur within a layer name apart from at the end, so this utility function 42 | removes these. 43 | 44 | Parameters 45 | ---------- 46 | 47 | input_name: str, required 48 | A Keras layer name. 49 | strip_right_of_last_backslash: bool, optional, (default = True) 50 | Should we strip anything past the last backslash in the name? 51 | This can be useful for controlling scopes. 52 | strip_numerics_after_underscores: bool, optional, (default = True) 53 | If there are numerical values after an underscore at the end of the layer name, 54 | this flag specifies whether or not to remove them. 55 | """ 56 | # Always strip anything after :, as these will be numerical 57 | # counts of the number of times the layer has been called, 58 | # which cannot be included in a layer name. 59 | if ':' in input_name: 60 | input_name = input_name.split(':')[0] 61 | if '/' in input_name and strip_right_of_last_backslash: 62 | input_name = input_name.rsplit('/', 1)[0] 63 | if input_name.split('_')[-1].isdigit() and strip_numerics_after_underscores: 64 | input_name = '_'.join(input_name.split('_')[:-1]) 65 | 66 | return input_name 67 | -------------------------------------------------------------------------------- /deep_qa/data/README.md: -------------------------------------------------------------------------------- 1 | # Data 2 | 3 | This module contains code for processing data. There's a `DataIndexer`, whose job it is to convert 4 | from strings to word (or character) indices suitable for use with an embedding matrix. There's 5 | code to load pre-trained embeddings from a file, to tokenize sentences, and, most importantly, to 6 | convert training and testing examples into numpy arrays that can be used with Keras. 7 | 8 | The most important thing to understand about the data processing code is the `Dataset` object. A 9 | `Dataset` is a collection of `Instances`, which are the individual examples used for training and 10 | testing. `Dataset` has two subclasses: `TextDataset`, which contains `Instances` with raw strings 11 | and can be read directly from a file, and `IndexedDataset`, which contains `Instances` whose raw 12 | strings have been converted to word (or character) indices. The `IndexedDataset` has methods for 13 | padding sequences to a consistent length, so that models can be compiled, and for converting the 14 | `Instances` to numpy arrays. The file formats read by `TextDataset`, and the format of the numpy 15 | arrays produced by `IndexedDataset`, are determined by the underlying `Instance` type used by the 16 | `Dataset`. See the `instances` module for more detail on this. 17 | -------------------------------------------------------------------------------- /deep_qa/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets.dataset import Dataset, IndexedDataset, TextDataset 2 | 3 | from .data_generator import DataGenerator 4 | from .data_indexer import DataIndexer 5 | from .tokenizers import tokenizers 6 | -------------------------------------------------------------------------------- /deep_qa/data/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/dataset_readers/__init__.py -------------------------------------------------------------------------------- /deep_qa/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from .entailment.snli_dataset import SnliDataset 4 | from .language_modeling.language_modeling_dataset import LanguageModelingDataset 5 | from .dataset import Dataset, TextDataset, IndexedDataset 6 | 7 | 8 | concrete_datasets = OrderedDict() # pylint: disable=invalid-name 9 | concrete_datasets["text"] = TextDataset 10 | concrete_datasets["language_modeling"] = LanguageModelingDataset 11 | concrete_datasets["snli"] = SnliDataset 12 | -------------------------------------------------------------------------------- /deep_qa/data/datasets/entailment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/datasets/entailment/__init__.py -------------------------------------------------------------------------------- /deep_qa/data/datasets/entailment/snli_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import json 3 | 4 | from overrides import overrides 5 | 6 | from ..dataset import TextDataset, log_label_counts 7 | from ...instances import TextInstance 8 | from ....common.params import Params 9 | 10 | 11 | class SnliDataset(TextDataset): 12 | 13 | def __init__(self, instances: List[TextInstance], params: Params=None): 14 | super(SnliDataset, self).__init__(instances, params) 15 | 16 | @staticmethod 17 | @overrides 18 | def read_from_file(filename: str, instance_class, params: Params=None): 19 | 20 | instances = [] 21 | for line in open(filename, 'r'): 22 | example = json.loads(line) 23 | 24 | # TODO(mark) why does this not match snli? Fix. 25 | label = example["gold_label"] 26 | if label == "entailment": 27 | label = "entails" 28 | elif label == "contradiction": 29 | label = "contradicts" 30 | 31 | text = example["sentence1"] 32 | hypothesis = example["sentence2"] 33 | instances.append(instance_class(text, hypothesis, label)) 34 | log_label_counts(instances) 35 | return SnliDataset(instances, params) 36 | -------------------------------------------------------------------------------- /deep_qa/data/datasets/language_modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/datasets/language_modeling/__init__.py -------------------------------------------------------------------------------- /deep_qa/data/datasets/language_modeling/language_modeling_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from overrides import overrides 4 | 5 | from ..dataset import TextDataset, log_label_counts 6 | from ...instances import TextInstance 7 | from ...instances.language_modeling import SentenceInstance 8 | from ....common.params import Params 9 | 10 | 11 | class LanguageModelingDataset(TextDataset): 12 | 13 | def __init__(self, instances: List[TextInstance], params: Params=None): 14 | # TODO(Mark): We are splitting on spaces below, so this won't end up being 15 | # the exact sequence length. This could be solved by passing the tokeniser 16 | # to the dataset. 17 | self.sequence_length = params.pop("sequence_length") 18 | super(LanguageModelingDataset, self).__init__(instances) 19 | 20 | @staticmethod 21 | @overrides 22 | def read_from_file(filename: str, instance_class, params: Params=None): 23 | 24 | sequence_length = params.get("sequence_length", 20) 25 | with open(filename, "r") as text_file: 26 | text = text_file.readlines() 27 | text = " ".join([x.replace("\n", " ").strip() for x in text]).split(" ") 28 | 29 | instances = [] 30 | for index in range(0, len(text) - sequence_length, sequence_length): 31 | word_sequence = " ".join(text[index: index + sequence_length]) 32 | instances.append(SentenceInstance(word_sequence)) 33 | 34 | log_label_counts(instances) 35 | return LanguageModelingDataset(instances, params) 36 | -------------------------------------------------------------------------------- /deep_qa/data/instances/README.md: -------------------------------------------------------------------------------- 1 | # Instances 2 | 3 | An `Instance` is a single training or testing example for a Keras model. The base classes for 4 | working with `Instances` are found in `instance.py`. There are two subclasses: (1) `TextInstance`, 5 | which is a raw instance that contains actual strings, and can be used to determine a vocabulary 6 | for a model, or read directly from a file; and (2) `IndexedInstance`, which has had its raw 7 | strings converted to word (or character) indices, and can be padded to a consistent length and 8 | converted to numpy arrays for use with Keras. 9 | 10 | There are a lot of different concrete `Instance` objects you can use. Some examples: 11 | 12 | - A `TrueFalseInstance`, that contains a single sentence with a true/false label. The numpy array 13 | for this instance is just a single word index sequence. 14 | - A `MultipleTrueFalseInstance`, which contains several `TrueFalseInstances`, only one of which is 15 | true. The numpy array here has shape `(num_options, sentence_length)`, and the label is a 16 | one-hot vector of length `num_options`. 17 | - A `BackgroundInstance`, which wraps another `Instance` type with a set of background sentences, 18 | adding an additional input of size `(knowledge_length, sentence_length)`. 19 | - A `LogicalFormInstance`, which is a `TrueFalseInstance` where the "sentence" is actually a 20 | tree-structured logical form (hmm, maybe we should call this a `TreeInstance` instead... 21 | TODO(matt).). In addition to the numpy array containing the word index sequence, there's another 22 | array containing shift / reduce operations so that you can construct a tree-structured network 23 | using a sequence, like in the [SPINN 24 | paper](https://www.semanticscholar.org/paper/A-Fast-Unified-Model-for-Parsing-and-Sentence-Bowman-Gauthier/23c141141f4f63c061d3cce14c71893959af5721) 25 | by Sam Bowman and others (see the [TreeCompositionLSTM 26 | encoder](https://github.com/allenai/deep_qa/blob/master/src/main/python/deep_qa/layers/encoders/tree_composition_lstm.py) 27 | for a way to actually use this in a model). 28 | 29 | A lot of the magic of how the DeepQA library works happens here, in the concrete `Instance` 30 | classes in this module. Most of the code can be totally agnostic to how exactly the input is 31 | structured, because the conversion to numpy arrays happens here, not in the `Trainer` or `Solver` 32 | classes, with only the specific `_build_model()` methods needing to know about the format of their 33 | input and output. 34 | -------------------------------------------------------------------------------- /deep_qa/data/instances/__init__.py: -------------------------------------------------------------------------------- 1 | from .instance import Instance, TextInstance, IndexedInstance 2 | -------------------------------------------------------------------------------- /deep_qa/data/instances/entailment/__init__.py: -------------------------------------------------------------------------------- 1 | from .sentence_pair_instance import SentencePairInstance, IndexedSentencePairInstance 2 | from .snli_instance import SnliInstance 3 | -------------------------------------------------------------------------------- /deep_qa/data/instances/language_modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .sentence_instance import SentenceInstance, IndexedSentenceInstance 2 | -------------------------------------------------------------------------------- /deep_qa/data/instances/reading_comprehension/__init__.py: -------------------------------------------------------------------------------- 1 | from .character_span_instance import CharacterSpanInstance, IndexedCharacterSpanInstance 2 | from .mc_question_passage_instance import McQuestionPassageInstance, IndexedMcQuestionPassageInstance 3 | from .question_passage_instance import QuestionPassageInstance, IndexedQuestionPassageInstance 4 | -------------------------------------------------------------------------------- /deep_qa/data/instances/sequence_tagging/__init__.py: -------------------------------------------------------------------------------- 1 | from .pretokenized_tagging_instance import PreTokenizedTaggingInstance 2 | from .tagging_instance import TaggingInstance, IndexedTaggingInstance 3 | 4 | concrete_instances = { # pylint: disable=invalid-name 5 | 'PreTokenizedTaggingInstance': PreTokenizedTaggingInstance, 6 | } 7 | -------------------------------------------------------------------------------- /deep_qa/data/instances/sequence_tagging/pretokenized_tagging_instance.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy 4 | from overrides import overrides 5 | 6 | from .tagging_instance import TaggingInstance 7 | from ...data_indexer import DataIndexer 8 | 9 | class PreTokenizedTaggingInstance(TaggingInstance): 10 | """ 11 | This is a ``TaggingInstance`` where the text has been pre-tokenized. Thus the ``text`` member 12 | variable here is actually a ``List[str]``, instead of a ``str``. 13 | 14 | When using this ``Instance``, you `must` use the ``NoOpWordSplitter`` as well, or things will 15 | break. You probably also do not want any kind of filtering (though stemming is ok), because 16 | only the words will get filtered, not the labels. 17 | """ 18 | def __init__(self, text: List[str], label: List[str], index: int=None): 19 | super(PreTokenizedTaggingInstance, self).__init__(text, label, index) 20 | 21 | @classmethod 22 | @overrides 23 | def read_from_line(cls, line: str): 24 | """ 25 | Reads a ``PreTokenizedTaggingInstance`` from a line. The format has one of two options: 26 | 27 | 1. [example index][token1]###[tag1][tab][token2]###[tag2][tab]... 28 | 2. [token1]###[tag1][tab][token2]###[tag2][tab]... 29 | """ 30 | fields = line.split("\t") 31 | 32 | if fields[0].isdigit(): 33 | index = int(fields[0]) 34 | fields = fields[1:] 35 | else: 36 | index = None 37 | tokens = [] 38 | tags = [] 39 | for field in fields: 40 | token, tag = field.rsplit("###", 1) 41 | tokens.append(token) 42 | tags.append(tag) 43 | return cls(tokens, tags, index) 44 | 45 | @overrides 46 | def tags_in_label(self): 47 | return [tag for tag in self.label] 48 | 49 | @overrides 50 | def _index_label(self, label: List[str], data_indexer: DataIndexer) -> List[int]: 51 | tag_indices = [data_indexer.get_word_index(tag, namespace='tags') for tag in label] 52 | indexed_label = [] 53 | for tag_index in tag_indices: 54 | # We subtract 2 here to account for the unknown and padding tokens that the DataIndexer 55 | # uses. 56 | tag_one_hot = numpy.zeros(data_indexer.get_vocab_size(namespace='tags') - 2) 57 | tag_one_hot[tag_index - 2] = 1 58 | indexed_label.append(tag_one_hot) 59 | return indexed_label 60 | -------------------------------------------------------------------------------- /deep_qa/data/instances/text_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .text_classification_instance import TextClassificationInstance, IndexedTextClassificationInstance 2 | -------------------------------------------------------------------------------- /deep_qa/data/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from .character_tokenizer import CharacterTokenizer 4 | from .word_and_character_tokenizer import WordAndCharacterTokenizer 5 | from .word_tokenizer import WordTokenizer 6 | 7 | # The first item added here will be used as the default in some cases. 8 | tokenizers = OrderedDict() # pylint: disable=invalid-name 9 | tokenizers['words'] = WordTokenizer 10 | tokenizers['characters'] = CharacterTokenizer 11 | tokenizers['words and characters'] = WordAndCharacterTokenizer 12 | -------------------------------------------------------------------------------- /deep_qa/data/tokenizers/word_processor.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .word_splitter import word_splitters 4 | from .word_stemmer import word_stemmers 5 | from .word_filter import word_filters 6 | from ...common.params import Params 7 | 8 | 9 | class WordProcessor: 10 | """ 11 | A WordProcessor handles the splitting of strings into words (with the use of a WordSplitter) as well as any 12 | desired post-processing (e.g., stemming, filtering, etc.) 13 | 14 | Parameters 15 | ---------- 16 | word_splitter: str, default="simple" 17 | The string name of the ``WordSplitter`` of choice (see the options at the bottom of 18 | ``word_splitter.py``). 19 | 20 | word_filter: str, default="pass_through" 21 | The name of the ``WordFilter`` to use (see the options at the bottom of 22 | ``word_filter.py``). 23 | 24 | word_stemmer: str, default="pass_through" 25 | The name of the ``WordStemmer`` to use (see the options at the bottom of 26 | ``word_stemmer.py``). 27 | """ 28 | def __init__(self, params: Params): 29 | word_splitter_choice = params.pop_choice('word_splitter', list(word_splitters.keys()), 30 | default_to_first_choice=True) 31 | self.word_splitter = word_splitters[word_splitter_choice]() 32 | word_filter_choice = params.pop_choice('word_filter', list(word_filters.keys()), 33 | default_to_first_choice=True) 34 | self.word_filter = word_filters[word_filter_choice]() 35 | word_stemmer_choice = params.pop_choice('word_stemmer', list(word_stemmers.keys()), 36 | default_to_first_choice=True) 37 | self.word_stemmer = word_stemmers[word_stemmer_choice]() 38 | params.assert_empty("WordProcessor") 39 | 40 | def get_tokens(self, sentence: str) -> List[str]: 41 | """ 42 | Does whatever processing is required to convert a string of text into a sequence of tokens. 43 | 44 | At a minimum, this uses a ``WordSplitter`` to split words into text. It may also do 45 | stemming or stopword removal, depending on the parameters given to the constructor. 46 | """ 47 | words = self.word_splitter.split_words(sentence) 48 | filtered_words = self.word_filter.filter_words(words) 49 | stemmed_words = [self.word_stemmer.stem_word(word) for word in filtered_words] 50 | return stemmed_words 51 | -------------------------------------------------------------------------------- /deep_qa/data/tokenizers/word_stemmer.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from nltk.stem import PorterStemmer as NltkPorterStemmer 4 | from overrides import overrides 5 | 6 | 7 | class WordStemmer: 8 | """ 9 | A ``WordStemmer`` lemmatizes words. This means that we map words to their root form, so that, 10 | e.g., "have", "has", and "had" all have the same internal representation. 11 | 12 | You should think carefully about whether and how much stemming you want in your model. Kind of 13 | the whole point of using word embeddings is so that you don't have to do this, but in a highly 14 | inflected language, or in a low-data setting, you might need it anyway. The default 15 | ``WordStemmer`` does nothing, just returning the work token as-is. 16 | """ 17 | def stem_word(self, word: str) -> str: 18 | """Converts a word to its lemma""" 19 | raise NotImplementedError 20 | 21 | 22 | class PassThroughWordStemmer(WordStemmer): 23 | """ 24 | Does not stem words; it's a no-op. This is the default word stemmer. 25 | """ 26 | @overrides 27 | def stem_word(self, word: str) -> str: 28 | return word 29 | 30 | 31 | class PorterStemmer(WordStemmer): 32 | """ 33 | Uses NLTK's PorterStemmer to stem words. 34 | """ 35 | def __init__(self): 36 | self.stemmer = NltkPorterStemmer() 37 | 38 | @overrides 39 | def stem_word(self, word: str) -> str: 40 | return self.stemmer.stem(word) 41 | 42 | 43 | word_stemmers = OrderedDict() # pylint: disable=invalid-name 44 | word_stemmers['pass_through'] = PassThroughWordStemmer 45 | word_stemmers['porter'] = PorterStemmer 46 | -------------------------------------------------------------------------------- /deep_qa/data/tokenizers/word_tokenizer.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Dict, List, Tuple 2 | 3 | from overrides import overrides 4 | from keras.layers import Layer 5 | 6 | from .tokenizer import Tokenizer 7 | from .word_processor import WordProcessor 8 | from ..data_indexer import DataIndexer 9 | from ...common.params import Params 10 | 11 | 12 | class WordTokenizer(Tokenizer): 13 | """ 14 | A ``WordTokenizer`` splits strings into word tokens. 15 | 16 | There are several ways that you can split a string into words, so we rely on a 17 | ``WordProcessor`` to do that work for us. Note that we're using the word "tokenizer" here for 18 | something different than is typical in NLP - we're referring here to how strings are 19 | represented as numpy arrays, not the linguistic notion of splitting sentences into tokens. 20 | Those things are handled in the ``WordProcessor``, which is a common dependency in several 21 | ``Tokenizers``. 22 | 23 | Parameters 24 | ---------- 25 | processor: Dict[str, Any], default={} 26 | Contains parameters for processing text strings into word tokens, including, e.g., 27 | splitting, stemming, and filtering words. See ``WordProcessor`` for a complete description 28 | of available parameters. 29 | """ 30 | def __init__(self, params: Params): 31 | self.word_processor = WordProcessor(params.pop('processor', {})) 32 | super(WordTokenizer, self).__init__(params) 33 | 34 | @overrides 35 | def tokenize(self, text: str) -> List[str]: 36 | return self.word_processor.get_tokens(text) 37 | 38 | @overrides 39 | def get_words_for_indexer(self, text: str) -> Dict[str, List[str]]: 40 | return {'words': self.tokenize(text)} 41 | 42 | @overrides 43 | def index_text(self, text: str, data_indexer: DataIndexer) -> List: 44 | return [data_indexer.get_word_index(word, namespace='words') for word in self.tokenize(text)] 45 | 46 | @overrides 47 | def embed_input(self, 48 | input_layer: Layer, 49 | embed_function: Callable[[Layer, str, str], Layer], 50 | text_trainer, 51 | embedding_suffix: str=""): 52 | # pylint: disable=protected-access 53 | return embed_function(input_layer, 54 | embedding_name='words' + embedding_suffix, 55 | vocab_name='words') 56 | 57 | @overrides 58 | def get_sentence_shape(self, sentence_length: int, word_length: int) -> Tuple[int]: 59 | return (sentence_length,) 60 | 61 | @overrides 62 | def get_padding_lengths(self, sentence_length: int, word_length: int) -> Dict[str, int]: 63 | return {'num_sentence_words': sentence_length} 64 | -------------------------------------------------------------------------------- /deep_qa/layers/README.md: -------------------------------------------------------------------------------- 1 | # Layers 2 | 3 | Custom layers that we have implemented belong here. These include things like knowledge encoders 4 | (which encode the memory component of a memory network), knowledge selectors (which perform an 5 | attention over the memory), and entailment models. There's also an `encoders` submodule, 6 | containing sentence encoders that convert an embedded word (or character) sequence into a vector. 7 | -------------------------------------------------------------------------------- /deep_qa/layers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # Individual layers. 3 | from .additive import Additive 4 | from .bigru_index_selector import BiGRUIndexSelector 5 | from .complex_concat import ComplexConcat 6 | from .highway import Highway 7 | from .l1_normalize import L1Normalize 8 | from .masked_layer import MaskedLayer 9 | from .noisy_or import BetweenZeroAndOne, NoisyOr 10 | from .option_attention_sum import OptionAttentionSum 11 | from .overlap import Overlap 12 | from .vector_matrix_merge import VectorMatrixMerge 13 | from .vector_matrix_split import VectorMatrixSplit 14 | -------------------------------------------------------------------------------- /deep_qa/layers/additive.py: -------------------------------------------------------------------------------- 1 | from overrides import overrides 2 | 3 | from .masked_layer import MaskedLayer 4 | 5 | class Additive(MaskedLayer): 6 | """ 7 | This ``Layer`` `adds` a parameter value to each cell in the input tensor, similar to a bias 8 | vector in a ``Dense`` layer, but this `only` adds, one value per cell. The value to add is 9 | learned. 10 | 11 | Parameters 12 | ---------- 13 | initializer: str, optional (default='glorot_uniform') 14 | Keras initializer for the additive weight. 15 | """ 16 | def __init__(self, initializer='glorot_uniform', **kwargs): 17 | super(Additive, self).__init__(**kwargs) 18 | 19 | self.initializer = initializer 20 | self._additive_weight = None 21 | 22 | @overrides 23 | def build(self, input_shape): 24 | super(Additive, self).build(input_shape) 25 | self._additive_weight = self.add_weight(shape=input_shape[1:], 26 | name='%s_additive' % self.name, 27 | initializer=self.initializer) 28 | 29 | @overrides 30 | def call(self, inputs, mask=None): 31 | return inputs + self._additive_weight 32 | 33 | @overrides 34 | def get_config(self): 35 | base_config = super(Additive, self).get_config() 36 | config = { 37 | 'initializer': self.initializer, 38 | } 39 | config.update(base_config) 40 | return config 41 | -------------------------------------------------------------------------------- /deep_qa/layers/attention/README.md: -------------------------------------------------------------------------------- 1 | Layers in this module compute some kind of "attention" over a vector or matrix. "Attention" 2 | typically means a normalized probability distribution, and is typically computed using a softmax 3 | after some similarity computation, so we're also grouping layers that do some kind of specialized 4 | softmax in this module. 5 | -------------------------------------------------------------------------------- /deep_qa/layers/attention/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .attention import Attention 3 | from .gated_attention import GatedAttention 4 | from .masked_softmax import MaskedSoftmax 5 | from .matrix_attention import MatrixAttention 6 | from .max_similarity_softmax import MaxSimilaritySoftmax 7 | from .weighted_sum import WeightedSum 8 | -------------------------------------------------------------------------------- /deep_qa/layers/attention/masked_softmax.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | from ...tensors.backend import last_dim_flatten 6 | from ...tensors.masked_operations import masked_softmax 7 | 8 | class MaskedSoftmax(MaskedLayer): 9 | ''' 10 | This Layer performs a masked softmax. This could just be a `Lambda` layer that calls our 11 | `tensors.masked_softmax` function, except that `Lambda` layers do not properly handle masked 12 | input. 13 | 14 | The expected input to this layer is a tensor of shape `(batch_size, num_options)`, with a mask 15 | of the same shape. We also accept an input tensor of shape `(batch_size, num_options, 1)`, 16 | which we will squeeze to be `(batch_size, num_options)` (though the mask must still be 17 | `(batch_size, num_options)`). 18 | 19 | While we give the expected input as having two modes, we also accept higher-order tensors. In 20 | those cases, we'll first perform a `last_dim_flatten` on both the input and the mask, so that 21 | we always do the softmax over a single dimension (the last one). 22 | 23 | We give no output mask, as we expect this to only be used at the end of the model, to get a 24 | final probability distribution over class labels (and it's a softmax, so you'll have zeros in 25 | the tensor itself; do you really still need a mask?). If you need this to propagate the mask 26 | for whatever reason, it would be pretty easy to change it to optionally do so - submit a PR. 27 | ''' 28 | def __init__(self, **kwargs): 29 | super(MaskedSoftmax, self).__init__(**kwargs) 30 | 31 | @overrides 32 | def compute_mask(self, inputs, mask=None): 33 | # pylint: disable=unused-argument 34 | # We do not need a mask beyond this layer. 35 | return None 36 | 37 | @overrides 38 | def compute_output_shape(self, input_shape): 39 | if input_shape[-1] == 1: 40 | return input_shape[:-1] 41 | else: 42 | return input_shape 43 | 44 | @overrides 45 | def call(self, inputs, mask=None): 46 | input_shape = K.int_shape(inputs) 47 | if input_shape[-1] == 1: 48 | inputs = K.squeeze(inputs, axis=-1) 49 | input_shape = input_shape[:-1] 50 | if len(input_shape) > 2: 51 | original_inputs = inputs 52 | inputs = last_dim_flatten(inputs) 53 | if mask is not None: 54 | mask = last_dim_flatten(mask) 55 | # Now we have both inputs and mask with shape (?, num_options), and can do a softmax. 56 | softmax_result = masked_softmax(inputs, mask) 57 | if len(input_shape) > 2: 58 | original_shape = K.shape(original_inputs) 59 | input_shape = K.concatenate([[-1], original_shape[1:]], 0) 60 | softmax_result = K.reshape(softmax_result, input_shape) 61 | return softmax_result 62 | -------------------------------------------------------------------------------- /deep_qa/layers/attention/max_similarity_softmax.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | from ...tensors.masked_operations import masked_batch_dot, masked_softmax 6 | 7 | 8 | class MaxSimilaritySoftmax(MaskedLayer): 9 | ''' 10 | This layer takes encoded questions and knowledge in a multiple choice 11 | setting and computes the similarity between each of the question embeddings 12 | and the background knowledge, and returns a softmax over the options. 13 | 14 | Inputs: 15 | 16 | - encoded_questions (batch_size, num_options, encoding_dim) 17 | - encoded_knowledge (batch_size, num_options, knowledge_length, encoding_dim) 18 | 19 | Output: 20 | 21 | - option_probabilities (batch_size, num_options) 22 | 23 | This is a pretty niche layer that does a very specific computation. We only 24 | made it its own class instead of a ``Lambda`` layer so that we could handle 25 | masking correctly, which ``Lambda`` does not. 26 | 27 | ''' 28 | def __init__(self, knowledge_axis, max_knowledge_length, **kwargs): 29 | self.knowledge_axis = knowledge_axis 30 | self.max_knowledge_length = max_knowledge_length 31 | super(MaxSimilaritySoftmax, self).__init__(**kwargs) 32 | 33 | @overrides 34 | def compute_mask(self, inputs, mask=None): 35 | # pylint: disable=unused-argument 36 | return None 37 | 38 | @overrides 39 | def compute_output_shape(self, input_shapes): 40 | # (batch_size, num_options) 41 | return (input_shapes[0][0], input_shapes[0][1]) 42 | 43 | @overrides 44 | def call(self, inputs, mask=None): 45 | questions, knowledge = inputs 46 | question_mask, knowledge_mask = mask 47 | question_knowledge_similarity = masked_batch_dot(questions, knowledge, question_mask, knowledge_mask) 48 | max_knowledge_similarity = K.max(question_knowledge_similarity, axis=-1) # (samples, num_options) 49 | return masked_softmax(max_knowledge_similarity, question_mask) 50 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/README.md: -------------------------------------------------------------------------------- 1 | Layers in this module generally just implement some simple operation from the Keras backend as a 2 | Layer. The reason we have these as Layers is largely so that we can properly handle masking. 3 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from .add_mask import AddMask 2 | from .batch_dot import BatchDot 3 | from .collapse_to_batch import CollapseToBatch 4 | from .envelope import Envelope 5 | from .expand_from_batch import ExpandFromBatch 6 | from .max import Max 7 | from .multiply import Multiply 8 | from .permute import Permute 9 | from .replace_masked_values import ReplaceMaskedValues 10 | from .repeat import Repeat 11 | from .repeat_like import RepeatLike 12 | from .squeeze import Squeeze 13 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/add_mask.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class AddMask(MaskedLayer): 8 | """ 9 | This ``Layer`` adds a mask to a tensor. It is intended solely for testing, though if you have 10 | a use case for this outside of testing, feel free to use it. The ``call()`` method just 11 | returns the inputs, and the ``compute_mask`` method calls ``K.not_equal(inputs, mask_value)``, 12 | and that's it. This is different from Keras' ``Masking`` layer, which assumes higher-order 13 | input and does a ``K.any()`` call in ``compute_mask``. 14 | 15 | Input: 16 | - tensor: a tensor of arbitrary shape 17 | 18 | Output: 19 | - the same tensor, now with a mask attached of the same shape 20 | 21 | Parameters 22 | ---------- 23 | mask_value: float, optional (default=0.0) 24 | This is the value that we will compare to in ``compute_mask``. 25 | """ 26 | def __init__(self, mask_value: float=0.0, **kwargs): 27 | self.mask_value = mask_value 28 | super(AddMask, self).__init__(**kwargs) 29 | 30 | @overrides 31 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument 32 | return K.cast(K.not_equal(inputs, self.mask_value), 'bool') 33 | 34 | @overrides 35 | def compute_output_shape(self, input_shape): 36 | return input_shape 37 | 38 | @overrides 39 | def call(self, inputs, mask=None): 40 | # It turns out that Keras doesn't like it if you just return inputs, so we need to return a 41 | # different tensor object. Just doing a cast apparently doesn't work, either, so we'll 42 | # add 0. 43 | return inputs + 0.0 44 | 45 | @overrides 46 | def get_config(self): 47 | config = {'mask_value': self.mask_value} 48 | base_config = super(AddMask, self).get_config() 49 | config.update(base_config) 50 | return config 51 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/envelope.py: -------------------------------------------------------------------------------- 1 | from overrides import overrides 2 | 3 | from keras import backend as K 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class Envelope(MaskedLayer): 8 | """ 9 | Given a probability distribution over a begin index and an end index of some sequence, this 10 | ``Layer`` computes an envelope over the sequence, a probability that each element lies within 11 | "begin" and "end". 12 | 13 | Specifically, the computation done here is the following:: 14 | 15 | after_span_begin = K.cumsum(span_begin, axis=-1) 16 | after_span_end = K.cumsum(span_end, axis=-1) 17 | before_span_end = 1 - after_span_end 18 | envelope = after_span_begin * before_span_end 19 | 20 | Inputs: 21 | - span_begin: tensor with shape ``(batch_size, sequence_length)``, representing a 22 | probability distribution over a start index in the sequence 23 | - span_end: tensor with shape ``(batch_size, sequence_length)``, representing a probability 24 | distribution over an end index in the sequence 25 | 26 | Outputs: 27 | - envelope: tensor with shape ``(batch_size, sequence_length)``, representing a probability 28 | for each index of the sequence belonging in the span 29 | 30 | If there is a mask associated with either of the inputs, we ignore it, assuming that you used 31 | the mask correctly when you computed your probability distributions. But we support masking in 32 | this layer, so that you have an output mask if you really need it. We just return the first 33 | mask that is not ``None`` (or ``None``, if both are ``None``). 34 | 35 | """ 36 | def __init__(self, **kwargs): 37 | super(Envelope, self).__init__(**kwargs) 38 | 39 | @overrides 40 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument 41 | span_begin_mask, span_end_mask = mask 42 | return span_begin_mask if span_begin_mask is not None else span_end_mask 43 | 44 | @overrides 45 | def compute_output_shape(self, input_shape): 46 | span_begin_shape, _ = input_shape 47 | return span_begin_shape 48 | 49 | @overrides 50 | def call(self, inputs, mask=None): 51 | span_begin, span_end = inputs 52 | after_span_begin = K.cumsum(span_begin, axis=-1) 53 | after_span_end = K.cumsum(span_end, axis=-1) 54 | before_span_end = 1.0 - after_span_end 55 | return after_span_begin * before_span_end 56 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/max.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | from ...tensors.backend import switch, very_negative_like 6 | 7 | 8 | class Max(MaskedLayer): 9 | """ 10 | This ``Layer`` performs a max over some dimension. Keras has a similar layer called 11 | ``GlobalMaxPooling1D``, but it is not as configurable as this one, and it does not support 12 | masking. 13 | 14 | If the mask is not ``None``, it must be the same shape as the input. 15 | 16 | Input: 17 | - A tensor of arbitrary shape (having at least 3 dimensions). 18 | 19 | Output: 20 | - A tensor with one less dimension, where we have taken a max over one of the dimensions. 21 | """ 22 | def __init__(self, axis: int=-1, **kwargs): 23 | self.axis = axis 24 | super(Max, self).__init__(**kwargs) 25 | 26 | @overrides 27 | def compute_mask(self, inputs, mask=None): 28 | # pylint: disable=unused-argument 29 | if mask is None: 30 | return None 31 | return K.any(mask, axis=self.axis) 32 | 33 | @overrides 34 | def compute_output_shape(self, input_shape): 35 | axis = self.axis 36 | if axis < 0: 37 | axis += len(input_shape) 38 | return input_shape[:axis] + input_shape[axis+1:] 39 | 40 | @overrides 41 | def call(self, inputs, mask=None): 42 | if mask is not None: 43 | inputs = switch(mask, inputs, very_negative_like(inputs)) 44 | return K.max(inputs, axis=self.axis) 45 | 46 | @overrides 47 | def get_config(self): 48 | config = {'axis': self.axis} 49 | base_config = super(Max, self).get_config() 50 | config.update(base_config) 51 | return config 52 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/multiply.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class Multiply(MaskedLayer): 8 | """ 9 | This ``Layer`` performs elementwise multiplication between two tensors, supporting masking. We 10 | literally just call ``tensor_1 * tensor_2``; the only reason this is a ``Layer`` is so that we 11 | can support masking (and because it's slightly nicer to read in a model definition than a 12 | lambda layer). 13 | 14 | We also try to be a little bit smart if you're wanting to broadcast the multiplication, by 15 | having the tensors differ in the number of dimensions by one. 16 | 17 | Input: 18 | - tensor_1: a tensor of arbitrary shape, with an optional mask of the same shape 19 | - tensor_2: a tensor with the same shape as ``tensor_1`` (or one less or one more 20 | dimension), with an optional mask of the same shape 21 | 22 | Output: 23 | - ``tensor_1 * tensor_2``. 24 | """ 25 | def __init__(self, **kwargs): 26 | super(Multiply, self).__init__(**kwargs) 27 | 28 | @overrides 29 | def compute_mask(self, inputs, mask=None): 30 | # pylint: disable=unused-argument 31 | tensor_1, tensor_2 = inputs 32 | tensor_1_mask, tensor_2_mask = mask 33 | if tensor_1_mask is None: 34 | tensor_1_mask = K.ones_like(tensor_1) 35 | if tensor_2_mask is None: 36 | tensor_2_mask = K.ones_like(tensor_2) 37 | tensor_1_mask, tensor_2_mask = self.expand_dims_if_necessary(tensor_1_mask, tensor_2_mask) 38 | return K.cast(tensor_1_mask, 'uint8') * K.cast(tensor_2_mask, 'uint8') 39 | 40 | @overrides 41 | def compute_output_shape(self, input_shape): 42 | return input_shape[0] 43 | 44 | @overrides 45 | def call(self, inputs, mask=None): 46 | tensor_1, tensor_2 = inputs 47 | tensor_1, tensor_2 = self.expand_dims_if_necessary(tensor_1, tensor_2) 48 | return tensor_1 * tensor_2 49 | 50 | @staticmethod 51 | def expand_dims_if_necessary(tensor_1, tensor_2): 52 | tensor_1_ndim = K.ndim(tensor_1) 53 | tensor_2_ndim = K.ndim(tensor_2) 54 | if tensor_1_ndim == tensor_2_ndim: 55 | return tensor_1, tensor_2 56 | elif tensor_1_ndim == tensor_2_ndim - 1: 57 | return K.expand_dims(tensor_1), tensor_2 58 | elif tensor_2_ndim == tensor_1_ndim - 1: 59 | return tensor_1, K.expand_dims(tensor_2) 60 | else: 61 | raise RuntimeError("Can't multiply two tensors with ndims " 62 | "{} and {}".format(tensor_1_ndim, tensor_2_ndim)) 63 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/permute.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from keras import backend as K 4 | from overrides import overrides 5 | 6 | from ..masked_layer import MaskedLayer 7 | 8 | 9 | class Permute(MaskedLayer): 10 | """ 11 | This ``Layer`` calls ``K.permute_dimensions`` on both the input and the mask. 12 | 13 | If the mask is not ``None``, it must have the same shape as the input. 14 | 15 | Input: 16 | - A tensor of arbitrary shape. 17 | 18 | Output: 19 | - A tensor with permuted dimensions. 20 | """ 21 | def __init__(self, pattern: Tuple[int], **kwargs): 22 | self.pattern = pattern 23 | super(Permute, self).__init__(**kwargs) 24 | 25 | @overrides 26 | def compute_mask(self, inputs, mask=None): 27 | # pylint: disable=unused-argument 28 | if mask is None: 29 | return None 30 | return K.permute_dimensions(mask, self.pattern) 31 | 32 | @overrides 33 | def compute_output_shape(self, input_shape): 34 | return tuple([input_shape[i] for i in self.pattern]) 35 | 36 | @overrides 37 | def call(self, inputs, mask=None): 38 | return K.permute_dimensions(inputs, pattern=self.pattern) 39 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/repeat.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class Repeat(MaskedLayer): 8 | """ 9 | This ``Layer`` calls ``K.repeat_elements`` on both the input and the mask, after calling 10 | ``K.expand_dims``. 11 | 12 | If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis 13 | parameter as we do for the input. 14 | 15 | Input: 16 | - A tensor of arbitrary shape. 17 | 18 | Output: 19 | - The input tensor repeated along one of the dimensions. 20 | 21 | Parameters 22 | ---------- 23 | axis: int 24 | We will add a dimension to the input tensor at this axis. 25 | repetitions: int 26 | The new dimension will have this size to it, with each slice being identical to the 27 | original input tensor. 28 | """ 29 | def __init__(self, axis: int, repetitions: int, **kwargs): 30 | self.axis = axis 31 | self.repetitions = repetitions 32 | super(Repeat, self).__init__(**kwargs) 33 | 34 | @overrides 35 | def compute_mask(self, inputs, mask=None): 36 | # pylint: disable=unused-argument 37 | if mask is None: 38 | return None 39 | return self.__repeat_tensor(mask) 40 | 41 | @overrides 42 | def compute_output_shape(self, input_shape): 43 | return input_shape[:self.axis] + (self.repetitions,) + input_shape[self.axis:] 44 | 45 | @overrides 46 | def call(self, inputs, mask=None): 47 | return self.__repeat_tensor(inputs) 48 | 49 | def __repeat_tensor(self, tensor): 50 | return K.repeat_elements(K.expand_dims(tensor, self.axis), self.repetitions, self.axis) 51 | 52 | @overrides 53 | def get_config(self): 54 | base_config = super(Repeat, self).get_config() 55 | config = {'axis': self.axis, 'repetitions': self.repetitions} 56 | config.update(base_config) 57 | return config 58 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/repeat_like.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class RepeatLike(MaskedLayer): 8 | """ 9 | This ``Layer`` is like :class:`~.repeat.Repeat`, but gets the number of repetitions to use from 10 | a second input tensor. This allows doing a number of repetitions that is unknown at graph 11 | compilation time, and is necessary when the ``repetitions`` argument to ``Repeat`` would be 12 | ``None``. 13 | 14 | If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis 15 | parameter as we do for the input. 16 | 17 | Input: 18 | - A tensor of arbitrary shape, which we will expand and tile. 19 | - A second tensor whose shape along one dimension we will copy 20 | 21 | Output: 22 | - The input tensor repeated along one of the dimensions. 23 | 24 | Parameters 25 | ---------- 26 | axis: int 27 | We will add a dimension to the input tensor at this axis. 28 | copy_from_axis: int 29 | We will copy the dimension from the second tensor at this axis. 30 | """ 31 | def __init__(self, axis: int, copy_from_axis: int, **kwargs): 32 | self.axis = axis 33 | self.copy_from_axis = copy_from_axis 34 | super(RepeatLike, self).__init__(**kwargs) 35 | 36 | @overrides 37 | def compute_mask(self, inputs, mask=None): 38 | # pylint: disable=unused-argument 39 | if mask is None or mask[0] is None: 40 | return None 41 | return self.__repeat_tensor(mask[0], inputs[1]) 42 | 43 | @overrides 44 | def compute_output_shape(self, input_shape): 45 | return input_shape[0][:self.axis] + (input_shape[1][self.copy_from_axis],) + input_shape[0][self.axis:] 46 | 47 | @overrides 48 | def call(self, inputs, mask=None): 49 | return self.__repeat_tensor(inputs[0], inputs[1]) 50 | 51 | def __repeat_tensor(self, to_repeat, to_copy): 52 | expanded = K.expand_dims(to_repeat, self.axis) 53 | ones = [1] * K.ndim(expanded) 54 | num_repetitions = K.shape(to_copy)[self.copy_from_axis] 55 | tile_shape = K.concatenate([ones[:self.axis], [num_repetitions], ones[self.axis+1:]], 0) 56 | return K.tile(expanded, tile_shape) 57 | 58 | @overrides 59 | def get_config(self): 60 | base_config = super(RepeatLike, self).get_config() 61 | config = {'axis': self.axis, 'copy_from_axis': self.copy_from_axis} 62 | config.update(base_config) 63 | return config 64 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/replace_masked_values.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ...tensors.backend import switch 5 | from ..masked_layer import MaskedLayer 6 | 7 | 8 | class ReplaceMaskedValues(MaskedLayer): 9 | """ 10 | This ``Layer`` replaces all masked values in a tensor with some value. You might want to do 11 | this before passing the tensor into a layer that does a max or a min, for example, to replace 12 | all masked values with something very large or very negative. We basically just call 13 | ``switch`` on the mask. 14 | 15 | Input: 16 | - tensor: a tensor of arbitrary shape 17 | 18 | Output: 19 | - the same tensor, with masked values replaced by some input value 20 | 21 | Parameters 22 | ---------- 23 | replace_with: float 24 | We will replace all masked values in the tensor with this value. 25 | """ 26 | def __init__(self, replace_with: float, **kwargs): 27 | self.replace_with = replace_with 28 | super(ReplaceMaskedValues, self).__init__(**kwargs) 29 | 30 | @overrides 31 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument 32 | return mask 33 | 34 | @overrides 35 | def compute_output_shape(self, input_shape): 36 | return input_shape 37 | 38 | @overrides 39 | def call(self, inputs, mask=None): 40 | if mask is None: 41 | # It turns out that Keras doesn't like it if you just return inputs, so we need to 42 | # return a different tensor object. Just doing a cast apparently doesn't work, either, 43 | # so we'll add 0. 44 | return inputs + 0.0 45 | return switch(mask, inputs, K.ones_like(inputs) * self.replace_with) 46 | 47 | @overrides 48 | def get_config(self): 49 | config = {'replace_with': self.replace_with} 50 | base_config = super(ReplaceMaskedValues, self).get_config() 51 | config.update(base_config) 52 | return config 53 | -------------------------------------------------------------------------------- /deep_qa/layers/backend/squeeze.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class Squeeze(MaskedLayer): 8 | """ 9 | This ``Layer`` removes a 1-D dimension from the tensor at index ``axis``, acting as simply 10 | a layer version of the backend squeeze function. 11 | 12 | If the mask is not ``None``, it must be the same shape as the input. 13 | 14 | Input: 15 | - A tensor of arbitrary shape (having at least 3 dimensions). 16 | 17 | Output: 18 | - A tensor with the same data as ``inputs`` but reduced dimensions. 19 | 20 | Parameters 21 | ---------- 22 | axis: int, optional (default=-1) 23 | The axis that we should squeeze. 24 | """ 25 | def __init__(self, axis: int=-1, **kwargs): 26 | self.axis = axis 27 | super(Squeeze, self).__init__(**kwargs) 28 | 29 | @overrides 30 | def compute_mask(self, inputs, mask=None): 31 | # pylint: disable=unused-argument 32 | if mask is None: 33 | return None 34 | return K.squeeze(mask, axis=self.axis) 35 | 36 | @overrides 37 | def compute_output_shape(self, input_shape): 38 | axis = self.axis 39 | if axis < 0: 40 | axis += len(input_shape) 41 | return input_shape[:axis] + input_shape[axis+1:] 42 | 43 | @overrides 44 | def call(self, inputs, mask=None): 45 | return K.squeeze(inputs, axis=self.axis) 46 | 47 | @overrides 48 | def get_config(self): 49 | base_config = super(Squeeze, self).get_config() 50 | config = {'axis': self.axis} 51 | config.update(base_config) 52 | return config 53 | -------------------------------------------------------------------------------- /deep_qa/layers/encoders/bag_of_words.py: -------------------------------------------------------------------------------- 1 | from overrides import overrides 2 | 3 | from keras import backend as K 4 | from keras.engine import InputSpec 5 | 6 | from ..masked_layer import MaskedLayer 7 | 8 | class BOWEncoder(MaskedLayer): 9 | ''' 10 | Bag of Words Encoder takes a matrix of shape (num_words, word_dim) and returns a vector of size (word_dim), 11 | which is an average of the (unmasked) rows in the input matrix. This could have been done using a Lambda 12 | layer, except that Lambda layer does not support masking (as of Keras 1.0.7). 13 | ''' 14 | def __init__(self, **kwargs): 15 | self.input_spec = [InputSpec(ndim=3)] 16 | 17 | # For consistency of handling sentence encoders, we will often get passed this parameter. 18 | # We don't use it, but Layer will complain if it's there, so we get rid of it here. 19 | kwargs.pop('units', None) 20 | super(BOWEncoder, self).__init__(**kwargs) 21 | 22 | @overrides 23 | def compute_output_shape(self, input_shape): 24 | return (input_shape[0], input_shape[2]) # removing second dimension 25 | 26 | @overrides 27 | def call(self, inputs, mask=None): 28 | # pylint: disable=redefined-variable-type 29 | if mask is None: 30 | return K.mean(inputs, axis=1) 31 | else: 32 | # Compute weights such that masked elements have zero weights and the remaining 33 | # weight is ditributed equally among the unmasked elements. 34 | # Mask (samples, num_words) has 0s for masked elements and 1s everywhere else. 35 | # Mask is of type int8. While theano would automatically make weighted_mask below 36 | # of type float32 even if mask remains int8, tensorflow would complain. Let's cast it 37 | # explicitly to remain compatible with tf. 38 | float_mask = K.cast(mask, 'float32') 39 | # Expanding dims of the denominator to make it the same shape as the numerator, epsilon added to avoid 40 | # division by zero. 41 | # (samples, num_words) 42 | weighted_mask = float_mask / (K.sum(float_mask, axis=1, keepdims=True) + K.epsilon()) 43 | if K.ndim(weighted_mask) < K.ndim(inputs): 44 | weighted_mask = K.expand_dims(weighted_mask) 45 | return K.sum(inputs * weighted_mask, axis=1) # (samples, word_dim) 46 | 47 | @overrides 48 | def compute_mask(self, inputs, mask=None): 49 | # We need to override this method because Layer passes the input mask unchanged since this layer 50 | # supports masking. We don't want that. After the input is averaged, we can stop propagating 51 | # the mask. 52 | return None 53 | -------------------------------------------------------------------------------- /deep_qa/layers/encoders/shareable_gru.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.layers import GRU, InputSpec 3 | 4 | 5 | class ShareableGRU(GRU): 6 | def __init__(self, *args, **kwargs): 7 | super(ShareableGRU, self).__init__(*args, **kwargs) 8 | 9 | def call(self, x, mask=None, **kwargs): 10 | input_shape = K.int_shape(x) 11 | res = super(ShareableGRU, self).call(x, mask, **kwargs) 12 | self.input_spec = [InputSpec(shape=(self.input_spec[0].shape[0], 13 | None, 14 | self.input_spec[0].shape[2]))] 15 | if K.ndim(x) == K.ndim(res): 16 | # A recent change in Keras 17 | # (https://github.com/fchollet/keras/commit/a9b6bef0624c67d6df1618ca63d8e8141b0df4d0) 18 | # made it so that K.rnn with a tensorflow backend does not retain shape information for 19 | # the sequence length, even if it's present in the input. We need to fix that here so 20 | # that our models have the right shape information. A simple K.reshape is good enough 21 | # to fix this. 22 | result_shape = K.int_shape(res) 23 | if input_shape[1] is not None and result_shape[1] is None: 24 | shape = (input_shape[0] if input_shape[0] is not None else -1, 25 | input_shape[1], result_shape[2]) 26 | res = K.reshape(res, shape=shape) 27 | return res 28 | -------------------------------------------------------------------------------- /deep_qa/layers/entailment_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .decomposable_attention import DecomposableAttentionEntailment 2 | from .multiple_choice_tuple_entailment import MultipleChoiceTupleEntailment 3 | 4 | entailment_models = { # pylint: disable=invalid-name 5 | 'decomposable_attention': DecomposableAttentionEntailment, 6 | 'multiple_choice_tuple_attention': MultipleChoiceTupleEntailment, 7 | } 8 | -------------------------------------------------------------------------------- /deep_qa/layers/highway.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Highway as KerasHighway 2 | 3 | class Highway(KerasHighway): 4 | """ 5 | Keras' `Highway` layer does not support masking, but it easily could, just by returning the 6 | mask. This `Layer` makes this possible. 7 | """ 8 | def __init__(self, **kwargs): 9 | super(Highway, self).__init__(**kwargs) 10 | self.supports_masking = True 11 | -------------------------------------------------------------------------------- /deep_qa/layers/l1_normalize.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from .masked_layer import MaskedLayer 5 | from ..tensors.backend import l1_normalize 6 | 7 | 8 | class L1Normalize(MaskedLayer): 9 | """ 10 | This Layer normalizes a tensor by its L1 norm. This could just be a 11 | ``Lambda`` layer that calls our ``tensors.l1_normalize`` function, 12 | except that ``Lambda`` layers do not properly handle masked input. 13 | 14 | The expected input to this layer is a tensor of shape 15 | ``(batch_size, x)``, with an optional mask of the same shape. 16 | We also accept as input a tensor of shape ``(batch_size, x, 1)``, 17 | which will be squeezed to shape ``(batch_size, x)`` 18 | (though the mask must still be of shape ``(batch_size, x)``). 19 | 20 | We give no output mask, as we expect this to only be used at the end of 21 | the model, to get a final probability distribution over class labels. If 22 | you need this to propagate the mask for your model, it would be pretty 23 | easy to change it to optionally do so - submit a PR. 24 | """ 25 | 26 | def __init__(self, **kwargs): 27 | super(L1Normalize, self).__init__(**kwargs) 28 | 29 | @overrides 30 | def compute_mask(self, inputs, mask=None): 31 | # pylint: disable=unused-argument 32 | # We do not need a mask beyond this layer. 33 | return None 34 | 35 | @overrides 36 | def compute_output_shape(self, input_shape): 37 | return (input_shape[0], input_shape[1]) 38 | 39 | @overrides 40 | def call(self, inputs, mask=None): 41 | if K.ndim(inputs) == 3: 42 | inputs = K.squeeze(inputs, axis=2) 43 | if K.ndim(inputs) != 2: 44 | raise ValueError("L1Normalize layer only supports inputs of shape " 45 | "(batch_size, x) or (batch_size, x, 1)") 46 | return l1_normalize(inputs, mask) 47 | -------------------------------------------------------------------------------- /deep_qa/layers/masked_layer.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Layer 2 | 3 | 4 | class MaskedLayer(Layer): 5 | """ 6 | Keras 2.0 allowed for arbitrary differences in arguments to the ``call`` method of ``Layers``. 7 | As part of this, they removed the default ``mask=None`` argument, which means that if you want 8 | to implement ``call`` with a mask, you need to disable a pylint warning. Instead of disabling 9 | it in every single layer in our codebase, which could lead to uncaught errors, we'll have a 10 | single place where we disable it, and have other layers inherit from this class. 11 | """ 12 | def __init__(self, **kwargs): 13 | self.supports_masking = True 14 | super(MaskedLayer, self).__init__(**kwargs) 15 | 16 | def call(self, inputs, mask=None): # pylint: disable=arguments-differ 17 | raise NotImplementedError 18 | -------------------------------------------------------------------------------- /deep_qa/layers/subtract_minimum.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from deep_qa.layers.masked_layer import MaskedLayer 5 | from deep_qa.tensors.backend import VERY_LARGE_NUMBER 6 | 7 | class SubtractMinimum(MaskedLayer): 8 | ''' 9 | This layer is used to normalize across a tensor axis. Normalization is done by finding the 10 | minimum value across the specified axis, and then subtracting that value from all values 11 | (again, across the spcified axis). Note that this also works just fine if you want to find the 12 | minimum across more than one axis. 13 | 14 | Inputs: 15 | - A tensor with arbitrary dimension, and a mask of the same shape (currently doesn't 16 | support masks with other shapes). 17 | 18 | Output: 19 | - The same tensor, with the minimum across one (or more) of the dimensions subtracted. 20 | 21 | Parameters 22 | ---------- 23 | axis: int 24 | The axis (or axes) across which to find the minimum. Can be a single int, a list of ints, 25 | or None. We just call `K.min` with this parameter, so anything that's valid there works 26 | here too. 27 | ''' 28 | def __init__(self, axis: int, **kwargs): 29 | self.axis = axis 30 | super(SubtractMinimum, self).__init__(**kwargs) 31 | 32 | @overrides 33 | def compute_output_shape(self, input_shape): # pylint: disable=no-self-use 34 | return input_shape 35 | 36 | @overrides 37 | def compute_mask(self, inputs, mask=None): 38 | return mask 39 | 40 | @overrides 41 | def call(self, inputs, mask=None): 42 | if mask is not None: 43 | mask_value = False if K.dtype(mask) == 'bool' else 0 44 | # Make sure masked values don't affect the input, by adding a very large number. 45 | mask_flipped_and_scaled = K.cast(K.equal(mask, mask_value), "float32") * VERY_LARGE_NUMBER 46 | minimums = K.min(inputs + mask_flipped_and_scaled, axis=self.axis, keepdims=True) 47 | else: 48 | minimums = K.min(inputs, axis=self.axis, keepdims=True) 49 | normalized = inputs - minimums 50 | return normalized 51 | 52 | @overrides 53 | def get_config(self): 54 | base_config = super(SubtractMinimum, self).get_config() 55 | config = {'axis': self.axis} 56 | config.update(base_config) 57 | return config 58 | -------------------------------------------------------------------------------- /deep_qa/layers/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .add_encoder_mask import AddEncoderMask 2 | from .encoder_wrapper import EncoderWrapper 3 | from .output_mask import OutputMask 4 | from .time_distributed import TimeDistributed 5 | -------------------------------------------------------------------------------- /deep_qa/layers/wrappers/add_encoder_mask.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from overrides import overrides 3 | 4 | from ..masked_layer import MaskedLayer 5 | 6 | 7 | class AddEncoderMask(MaskedLayer): 8 | """ 9 | This ``Layer`` handles masking for ``TimeDistributed`` encoders, like LSTMs, that condense 10 | sequences of vectors into single vectors (not LSTMs that return sequences; masking is already 11 | handled there correctly). Our :class:`~.encoder_wrapper.EncoderWrapper` class does the correct 12 | masking computation, but it inherits from ``TimeDistributed``, which does not work with unknown 13 | dimensions at run-time. If you want to wrap an encoder using 14 | :class:`~..backend.CollapseToBatch` and :class:`~..backend.ExpandFromBatch`, you need a way to 15 | get the mask back into the right form after running your encoder. This is an issue because 16 | Keras' encoders don't return masks when they output single vectors. 17 | 18 | For example, say you have a list of sentences, like [[5, 2, 1, 0], [2, 3, 1, 1], [0, 0, 0, 0]] 19 | (using word indices instead of embeddings for simplicity), which has been padded to be three 20 | sentences, even though only two of them are actually used. After passing it though an encoder, 21 | you'll have something like [[vector], [vector], [vector]], and you want a mask that looks like 22 | [1, 1, 0]. Keras' LSTMs and such won't give this to you. This method adds it back. 23 | 24 | Inputs: 25 | - A tensor with shape ``(batch_size, ..., encoding_dim)`` that is the output of some 26 | encoder that you got with 27 | :func:`~deep_qa.training.text_trainer.TextTrainer._get_encoder()` (not a seq2seq encoder 28 | that returns sequences). 29 | The mask for this tensor must be ``None``. 30 | - A tensor with shape ``(batch_size, ..., num_words, embedding_dim)`` that was the `input` 31 | to that encoder. The mask for this tensor must have shape ``(batch_size, ..., 32 | num_words)``. 33 | 34 | Output: 35 | - The first input tensor, with a mask computed from the second input tensor. The 36 | computation is just ``K.any()`` on the last dimension. 37 | """ 38 | @overrides 39 | def compute_output_shape(self, input_shape): 40 | return input_shape[0] 41 | 42 | @overrides 43 | def compute_mask(self, inputs, mask=None): 44 | encoder_mask, embedding_mask = mask 45 | if encoder_mask is not None: 46 | raise RuntimeError("Refusing to add an encoder mask, because the tensor already has one") 47 | return K.any(embedding_mask, axis=-1) 48 | 49 | @overrides 50 | def call(self, inputs, mask=None): # pylint: disable=unused-argument 51 | # It turns out that Keras doesn't like it if you just return inputs, so we need to return a 52 | # different tensor object. Just doing a cast apparently doesn't work, either, so we'll 53 | # add 0. 54 | return inputs[0] + 0.0 55 | -------------------------------------------------------------------------------- /deep_qa/layers/wrappers/encoder_wrapper.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from deep_qa.layers.wrappers.time_distributed import TimeDistributed 3 | 4 | 5 | class EncoderWrapper(TimeDistributed): 6 | ''' 7 | This class TimeDistributes a sentence encoder, applying the encoder to several word sequences. 8 | The only difference between this and the regular TimeDistributed is in how we handle the mask. 9 | Typically, an encoder will handle masked embedded input, and return None as its mask, as it 10 | just returns a vector and no more masking is necessary. However, if the encoder is 11 | TimeDistributed, we might run into a situation where _all_ of the words in a given sequence are 12 | masked (because we padded the number of sentences, for instance). In this case, we just want 13 | to mask the entire sequence. EncoderWrapper returns a mask with the same dimension as the 14 | input sequences, where sequences are masked if _all_ of their words were masked. 15 | 16 | Notes 17 | ----- 18 | For seq2seq encoders, one should use either ``TimeDistributed`` or 19 | ``TimeDistributedWithMask`` since ``EncoderWrapper`` reduces the dimensionality 20 | of the input mask. 21 | ''' 22 | def compute_mask(self, x, input_mask=None): 23 | # pylint: disable=unused-argument 24 | # Input mask (coming from Embedding) will be of shape (batch_size, knowledge_length, num_words). 25 | # Output mask should be of shape (batch_size, knowledge_length) with 0s for background sentences that 26 | # are all padding. 27 | if input_mask is None: 28 | return None 29 | else: 30 | # An output bit is 0 only if the bits corresponding to all input words are 0. 31 | return K.any(input_mask, axis=-1) 32 | -------------------------------------------------------------------------------- /deep_qa/layers/wrappers/output_mask.py: -------------------------------------------------------------------------------- 1 | from overrides import overrides 2 | 3 | from ..masked_layer import MaskedLayer 4 | 5 | 6 | class OutputMask(MaskedLayer): 7 | """ 8 | This Layer is purely for debugging. You can wrap this on a layer's output to get the mask 9 | output by that layer as a model output, for easier visualization of what the model is actually 10 | doing. 11 | 12 | Don't try to use this in an actual model. 13 | """ 14 | @overrides 15 | def compute_mask(self, inputs, mask=None): 16 | return None 17 | 18 | @overrides 19 | def call(self, inputs, mask=None): # pylint: disable=unused-argument 20 | return mask 21 | -------------------------------------------------------------------------------- /deep_qa/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | In this module we define a number of concrete models. The models are grouped by task, where each 4 | task has a roughly coherent input/output specification. See the README in each submodule for a 5 | description of the task models in that submodule are designed to solve. 6 | 7 | You should think of these models as more of "model families" than actual models, though, as there 8 | are typically options left unspecified in the models themselves. For example, models in this 9 | module might have a layer that encodes word sequences into vectors; they just call a method on 10 | `TextTrainer` to get an encoder, and the decision for which actual encoder is used (an LSTM, a 11 | CNN, or something else) happens in the parameters passed to `TextTrainer`. If you really want to, 12 | you can hard-code specific decisions for these things, but most models we have here use the 13 | `TextTrainer` API to abstract away these decisions, giving implementations of a class of similar 14 | models, instead of a single model. 15 | 16 | We also define a few general `Pretrainers` in a submodule here. The `Pretrainers` in this 17 | top-level submodule are suitable to pre-train a large class of models (e.g., any model that 18 | encodes sentences), while more task-specific `Pretrainers` are found in that task's submodule. 19 | -------------------------------------------------------------------------------- /deep_qa/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .entailment import concrete_models as entailment_models 2 | from .sequence_tagging import concrete_models as sequence_tagging_models 3 | from .reading_comprehension import concrete_models as reading_comprehension_models 4 | from .text_classification import concrete_models as text_classification_models 5 | 6 | concrete_models = {} # pylint: disable=invalid-name 7 | __concrete_task_models = [ # pylint: disable=invalid-name 8 | entailment_models, 9 | sequence_tagging_models, 10 | reading_comprehension_models, 11 | text_classification_models, 12 | ] 13 | for models_for_task in __concrete_task_models: 14 | for model_name, model_class in models_for_task.items(): 15 | if model_name in concrete_models: 16 | raise RuntimeError("Duplicate model name found: " + model_name) 17 | concrete_models[model_name] = model_class 18 | -------------------------------------------------------------------------------- /deep_qa/models/entailment/README.md: -------------------------------------------------------------------------------- 1 | Entailment models take two sequences of text as input and make a classification decision on the 2 | pair. Typically that decision represents whether one sentence entails the other, but we'll use 3 | this family of models to represent any kind of classification decision over pairs of text. 4 | 5 | *Inputs:* Two text sequences 6 | 7 | *Output:* Some classification decision (typically "entails/not entails", 8 | "entails/neutral/contradicts", or similar) 9 | -------------------------------------------------------------------------------- /deep_qa/models/entailment/__init__.py: -------------------------------------------------------------------------------- 1 | from .decomposable_attention import DecomposableAttention 2 | 3 | concrete_models = { # pylint: disable=invalid-name 4 | 'DecomposableAttention': DecomposableAttention, 5 | } 6 | -------------------------------------------------------------------------------- /deep_qa/models/reading_comprehension/__init__.py: -------------------------------------------------------------------------------- 1 | from .attention_sum_reader import AttentionSumReader 2 | from .bidirectional_attention import BidirectionalAttentionFlow 3 | from .gated_attention_reader import GatedAttentionReader 4 | 5 | concrete_models = { # pylint: disable=invalid-name 6 | 'AttentionSumReader': AttentionSumReader, 7 | 'BidirectionalAttentionFlow': BidirectionalAttentionFlow, 8 | 'GatedAttentionReader': GatedAttentionReader, 9 | } 10 | -------------------------------------------------------------------------------- /deep_qa/models/sequence_tagging/README.md: -------------------------------------------------------------------------------- 1 | Sequence tagging models take a sequence of text as input and produce as output a label for each 2 | token in the sequence. These models could do named entity recognition with BIO tags, or part of 3 | speech tagging, or other similar tasks. 4 | -------------------------------------------------------------------------------- /deep_qa/models/sequence_tagging/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_tagger import SimpleTagger 2 | 3 | concrete_models = { # pylint: disable=invalid-name 4 | 'SimpleTagger': SimpleTagger, 5 | } 6 | -------------------------------------------------------------------------------- /deep_qa/models/sequence_tagging/simple_tagger.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Input, TimeDistributed 2 | from overrides import overrides 3 | 4 | from ...common.params import Params 5 | from ...data.instances.sequence_tagging import concrete_instances 6 | from ...training.text_trainer import TextTrainer 7 | from ...training.models import DeepQaModel 8 | 9 | 10 | class SimpleTagger(TextTrainer): 11 | """ 12 | This ``SimpleTagger`` simply encodes a sequence of text with some number of stacked 13 | ``seq2seq_encoders``, then predicts a tag at each index. 14 | 15 | Parameters 16 | ---------- 17 | num_stacked_rnns : int, optional (default: ``1``) 18 | The number of ``seq2seq_encoders`` that we should stack on top of each other before 19 | predicting tags. 20 | instance_type : str 21 | Specifies the particular subclass of ``TaggedSequenceInstance`` to use for loading data, 22 | which in turn defines things like how the input data is formatted and tokenized. 23 | """ 24 | def __init__(self, params: Params): 25 | self.num_stacked_rnns = params.pop('num_stacked_rnns', 1) 26 | instance_type_choice = params.pop_choice("instance_type", concrete_instances.keys()) 27 | self.instance_type = concrete_instances[instance_type_choice] 28 | super(SimpleTagger, self).__init__(params) 29 | 30 | @overrides 31 | def _instance_type(self): # pylint: disable=no-self-use 32 | return self.instance_type 33 | 34 | @overrides 35 | def _build_model(self): 36 | # shape: (batch_size, text_length) 37 | text_input = Input(shape=self._get_sentence_shape(), dtype='int32', name='text_input') 38 | # shape: (batch_size, text_length, embedding_dim) 39 | text_embedding = self._embed_input(text_input) 40 | for i in range(self.num_stacked_rnns): 41 | encoder = self._get_seq2seq_encoder(name="encoder_{}".format(i), 42 | fallback_behavior="use default params") 43 | # shape still (batch_size, text_length, embedding_dim) 44 | text_embedding = encoder(text_embedding) 45 | # The -2 below is because we are ignoring the padding and unknown tokens that the 46 | # DataIndexer has by default. 47 | predicted_tags = TimeDistributed(Dense(self.data_indexer.get_vocab_size('tags') - 2, 48 | activation='softmax'))(text_embedding) 49 | return DeepQaModel(input=text_input, output=predicted_tags) 50 | 51 | @overrides 52 | def _set_padding_lengths_from_model(self): 53 | self._set_text_lengths_from_model_input(self.model.get_input_shape_at(0)[1:]) 54 | -------------------------------------------------------------------------------- /deep_qa/models/text_classification/README.md: -------------------------------------------------------------------------------- 1 | Text classification models take a sequence of text as input and classify it into one of several 2 | classes. 3 | 4 | *Input:* Text sequence 5 | 6 | *Output:* Class label 7 | -------------------------------------------------------------------------------- /deep_qa/models/text_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification_model import ClassificationModel 2 | 3 | concrete_models = { # pylint: disable=invalid-name 4 | 'ClassificationModel': ClassificationModel, 5 | } 6 | -------------------------------------------------------------------------------- /deep_qa/tensors/README.md: -------------------------------------------------------------------------------- 1 | This module contains convenience functions for working with Keras tensors. Typically these 2 | functions will be called inside some `Layer` class. 3 | -------------------------------------------------------------------------------- /deep_qa/tensors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/tensors/__init__.py -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/README.md: -------------------------------------------------------------------------------- 1 | Similarity functions take a pair of tensors with the same shape, and compute a similarity function 2 | on the vectors in the last dimension. For example, the tensors might both have shape 3 | `(batch_size, sentence_length, embedding_dim)`, and we will compute some function of the two 4 | vectors of length `embedding_dim` for each position `(batch_size, sentence_length)`, returning a 5 | tensor of shape `(batch_size, sentence_length)`. 6 | 7 | The similarity function could be as simple as a dot product, or it could be a more complex, 8 | parameterized function. The SimilarityFunction class exposes an API for a Layer that wants to 9 | allow for multiple similarity functions, such as for initializing and returning weights. 10 | 11 | If you want to compute a similarity between tensors of different sizes, you need to first tile them 12 | in the appropriate dimensions to make them the same before you can use these functions. The 13 | Attention and MatrixAttention layers do this. 14 | -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from .bilinear import Bilinear 4 | from .dot_product import DotProduct 5 | from .linear import Linear 6 | from .cosine_similarity import CosineSimilarity 7 | 8 | # The first item added here will be used as the default in some cases. 9 | similarity_functions = OrderedDict() # pylint: disable=invalid-name 10 | similarity_functions['dot_product'] = DotProduct 11 | similarity_functions['bilinear'] = Bilinear 12 | similarity_functions['linear'] = Linear 13 | similarity_functions['cosine_similarity'] = CosineSimilarity 14 | -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/bilinear.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from keras import backend as K 4 | from overrides import overrides 5 | 6 | from .similarity_function import SimilarityFunction 7 | 8 | 9 | class Bilinear(SimilarityFunction): 10 | """ 11 | This similarity function performs a bilinear transformation of the two input vectors. This 12 | function has a matrix of weights W and a bias b, and the similarity between two vectors x and y 13 | is computed as `x^T W y + b`. 14 | """ 15 | def __init__(self, **kwargs): 16 | super(Bilinear, self).__init__(**kwargs) 17 | self.weight_matrix = None 18 | self.bias = None 19 | 20 | @overrides 21 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']: 22 | self.weight_matrix = K.variable(self.init((tensor_1_dim, tensor_2_dim)), 23 | name=self.name + "_weights") 24 | self.bias = K.variable(self.init((1,)), name=self.name + "_bias") 25 | return [self.weight_matrix, self.bias] 26 | 27 | @overrides 28 | def compute_similarity(self, tensor_1, tensor_2): 29 | dot_product = K.sum(K.dot(tensor_1, self.weight_matrix) * tensor_2, axis=-1) 30 | return self.activation(dot_product + self.bias) 31 | -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/cosine_similarity.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from keras import backend as K 4 | from overrides import overrides 5 | 6 | from ...common.checks import ConfigurationError 7 | from .similarity_function import SimilarityFunction 8 | 9 | 10 | class CosineSimilarity(SimilarityFunction): 11 | """ 12 | This similarity function simply computes the cosine similarity between each pair of vectors. It has 13 | no parameters. 14 | """ 15 | def __init__(self, **kwargs): 16 | super(CosineSimilarity, self).__init__(**kwargs) 17 | 18 | @overrides 19 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']: 20 | if tensor_1_dim != tensor_2_dim: 21 | raise ConfigurationError("Tensor dims must match for cosine product similarity, but " 22 | "were {} and {}".format(tensor_1_dim, tensor_2_dim)) 23 | return [] 24 | 25 | @overrides 26 | def compute_similarity(self, tensor_1, tensor_2): 27 | return K.sum(K.l2_normalize(tensor_1, axis=-1) * K.l2_normalize(tensor_2, axis=-1), 28 | axis=-1) 29 | -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/dot_product.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from keras import backend as K 4 | from overrides import overrides 5 | 6 | from ...common.checks import ConfigurationError 7 | from .similarity_function import SimilarityFunction 8 | 9 | 10 | class DotProduct(SimilarityFunction): 11 | """ 12 | This similarity function simply computes the dot product between each pair of vectors. It has 13 | no parameters. 14 | """ 15 | def __init__(self, **kwargs): 16 | super(DotProduct, self).__init__(**kwargs) 17 | 18 | @overrides 19 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']: 20 | if tensor_1_dim != tensor_2_dim: 21 | raise ConfigurationError("Tensor dims must match for dot product similarity, but " 22 | "were {} and {}".format(tensor_1_dim, tensor_2_dim)) 23 | return [] 24 | 25 | @overrides 26 | def compute_similarity(self, tensor_1, tensor_2): 27 | return K.sum(tensor_1 * tensor_2, axis=-1) 28 | -------------------------------------------------------------------------------- /deep_qa/tensors/similarity_functions/similarity_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Similarity functions take a pair of tensors with the same shape, and compute a similarity function 3 | on the vectors in the last dimension. For example, the tensors might both have shape 4 | `(batch_size, sentence_length, embedding_dim)`, and we will compute some function of the two 5 | vectors of length `embedding_dim` for each position `(batch_size, sentence_length)`, returning a 6 | tensor of shape `(batch_size, sentence_length)`. 7 | 8 | The similarity function could be as simple as a dot product, or it could be a more complex, 9 | parameterized function. The SimilarityFunction class exposes an API for a Layer that wants to 10 | allow for multiple similarity functions, such as for initializing and returning weights. 11 | 12 | If you want to compute a similarity between tensors of different sizes, you need to first tile them 13 | in the appropriate dimensions to make them the same before you can use these functions. The 14 | Attention and MatrixAttention layers do this. 15 | """ 16 | from typing import List 17 | 18 | from keras import activations, initializers 19 | 20 | class SimilarityFunction: 21 | def __init__(self, name: str, initialization: str='glorot_uniform', activation: str='linear'): 22 | self.name = name 23 | self.init = initializers.get(initialization) 24 | self.activation = activations.get(activation) 25 | 26 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']: 27 | """ 28 | Called in a `Layer.build()` method that uses this SimilarityFunction, here we both 29 | initialize whatever weights are necessary for this similarity function, and return them so 30 | they can be included in `Layer.trainable_weights`. 31 | 32 | 33 | Parameters 34 | ---------- 35 | tensor_1_dim : int 36 | The last dimension (typically ``embedding_dim``) of the first input tensor. We need 37 | this so we can initialize weights appropriately. 38 | tensor_2_dim : int 39 | The last dimension (typically ``embedding_dim``) of the second input tensor. We need 40 | this so we can initialize weights appropriately. 41 | """ 42 | raise NotImplementedError 43 | 44 | def compute_similarity(self, tensor_1, tensor_2): 45 | """ 46 | Takes two tensors of the same shape, such as (batch_size, length_1, length_2, 47 | embedding_dim). Computes a (possibly parameterized) similarity on the final dimension and 48 | returns a tensor with one less dimension, such as (batch_size, length_1, length_2). 49 | """ 50 | raise NotImplementedError 51 | -------------------------------------------------------------------------------- /deep_qa/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/testing/__init__.py -------------------------------------------------------------------------------- /deep_qa/training/README.md: -------------------------------------------------------------------------------- 1 | # Trainers 2 | 3 | `Trainers` specify data, a model, and a way to train the model with the data. This module groups 4 | all of the common code related to these things, making only minimal assumptions about what kind of 5 | data you're using or what the structure of your model is. Really, a `Trainer` is just a nicer 6 | interface to a Keras `Model`, we just call it something else to not create too much naming 7 | confusion, and because the `Trainer` class provides a lot of functionality around training the 8 | model that a Keras `Model` doesn't. 9 | 10 | We also deal with the notion of _pre-training_ in this module. A `Pretrainer` is a `Trainer` that 11 | depends on another `Trainer`, building its model using pieces of the enclosed `Trainer`, so that 12 | training the `Pretrainer` updates the weights in the enclosed `Trainer` object. 13 | -------------------------------------------------------------------------------- /deep_qa/training/__init__.py: -------------------------------------------------------------------------------- 1 | from .text_trainer import TextTrainer 2 | from .trainer import Trainer 3 | -------------------------------------------------------------------------------- /deep_qa/training/optimizers.py: -------------------------------------------------------------------------------- 1 | r""" 2 | It turns out that Keras' design is somewhat crazy\*, and there is no list of 3 | optimizers that you can just import from Keras. So, this module specifies a 4 | list, and a helper function or two for dealing with optimizer parameters. 5 | Unfortunately, this means that we have a list that must be kept in sync with 6 | Keras. Oh well. 7 | 8 | \* Have you seen their get_from_module() method? See here: 9 | https://github.com/fchollet/keras/blob/6e42b0e4a77fb171295b541a6ae9a3a4a79f9c87/keras/utils/generic_utils.py#L10. 10 | That method means I could pass in 'clip_norm' as an optimizer, and it would try 11 | to use that function as an optimizer. It also means there is no simple list of 12 | implemented optimizers I can grab. 13 | 14 | \* I should also note that Keras is an incredibly useful library that does a lot 15 | of things really well. It just has a few quirks... 16 | """ 17 | import logging 18 | from typing import Union 19 | 20 | # pylint: disable=no-name-in-module 21 | from tensorflow.python.training.gradient_descent import GradientDescentOptimizer 22 | from tensorflow.python.training.rmsprop import RMSPropOptimizer 23 | from tensorflow.python.training.adadelta import AdadeltaOptimizer 24 | from tensorflow.python.training.adagrad import AdagradOptimizer 25 | from tensorflow.python.training.adam import AdamOptimizer 26 | # pylint: enable=no-name-in-module 27 | from ..common.params import Params 28 | 29 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 30 | 31 | 32 | optimizers = { # pylint: disable=invalid-name 33 | 'sgd': GradientDescentOptimizer, 34 | 'rmsprop': RMSPropOptimizer, 35 | 'adagrad': AdagradOptimizer, 36 | 'adadelta': AdadeltaOptimizer, 37 | 'adam': AdamOptimizer 38 | } 39 | 40 | 41 | def optimizer_from_params(params: Union[Params, str]): 42 | """ 43 | This method converts from a parameter object like we use in our Trainer 44 | code into an optimizer object suitable for use with Keras. The simplest 45 | case for both of these is a string that shows up in `optimizers` above - if 46 | `params` is just one of those strings, we return it, and everyone is happy. 47 | If not, we assume `params` is a Dict[str, Any], with a "type" key, where 48 | the value for "type" must be one of those strings above. We take the rest 49 | of the parameters and pass them to the optimizer's constructor. 50 | 51 | """ 52 | if isinstance(params, str): 53 | optimizer = params 54 | params = {} 55 | else: 56 | optimizer = params.pop_choice("type", optimizers.keys()) 57 | return optimizers[optimizer](**params) 58 | -------------------------------------------------------------------------------- /doc/_static/custom.css: -------------------------------------------------------------------------------- 1 | .toggle .header { 2 | display: block; 3 | clear: both; 4 | } 5 | 6 | .toggle .header:after { 7 | content: " ▼"; 8 | } 9 | 10 | .toggle .header.open:after { 11 | content: " ▲"; 12 | } 13 | 14 | .wy-nav-content a.internal code span.pre { 15 | color: blue; 16 | text-decoration: underline; 17 | } 18 | -------------------------------------------------------------------------------- /doc/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% set css_files = css_files + ["_static/custom.css"] %} 4 | 5 | {% block footer %} 6 | 16 | {% endblock %} 17 | -------------------------------------------------------------------------------- /doc/common/about_common.rst: -------------------------------------------------------------------------------- 1 | Common Utils 2 | ============ 3 | 4 | Here are some general utilities that we've written to help in other parts of the 5 | code base. 6 | -------------------------------------------------------------------------------- /doc/common/checks.rst: -------------------------------------------------------------------------------- 1 | Checks 2 | ====== 3 | 4 | .. automodule:: deep_qa.common.checks 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /doc/common/params.rst: -------------------------------------------------------------------------------- 1 | Parameter Utils 2 | =============== 3 | 4 | .. automodule:: deep_qa.common.params 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /doc/data/about_data.rst: -------------------------------------------------------------------------------- 1 | About Data 2 | ========== 3 | 4 | This module contains code for processing data. There's a DataIndexer, whose job 5 | it is to convert from strings to word (or character) indices suitable for use 6 | with an embedding matrix. There's code to load pre-trained embeddings from a 7 | file, to tokenize sentences, and, most importantly, to convert training and 8 | testing examples into numpy arrays that can be used with Keras. 9 | 10 | The most important thing to understand about the data processing code is the 11 | Dataset object. A Dataset is a collection of Instances, which are the individual 12 | examples used for training and testing. Dataset has two subclasses: TextDataset, 13 | which contains Instances with raw strings and can be read directly from a file, 14 | and IndexedDataset, which contains Instances whose raw strings have been 15 | converted to word (or character) indices. The IndexedDataset has methods for 16 | padding sequences to a consistent length, so that models can be compiled, and 17 | for converting the Instances to numpy arrays. The file formats read by 18 | TextDataset, and the format of the numpy arrays produced by IndexedDataset, are 19 | determined by the underlying Instance type used by the Dataset. See the 20 | instances module for more detail on this. 21 | -------------------------------------------------------------------------------- /doc/data/data_generator.rst: -------------------------------------------------------------------------------- 1 | Data Generators 2 | *************** 3 | 4 | .. automodule:: deep_qa.data.data_generator 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /doc/data/datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets 2 | ======== 3 | 4 | 5 | deep_qa.data.dataset 6 | -------------------- 7 | 8 | .. automodule:: deep_qa.data.datasets.dataset 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | Entailment 14 | ---------- 15 | 16 | .. automodule:: deep_qa.data.datasets.entailment.snli_dataset 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | Language Modeling 22 | ----------------- 23 | 24 | .. automodule:: deep_qa.data.datasets.language_modeling.language_modeling_dataset 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: -------------------------------------------------------------------------------- /doc/data/entailment.rst: -------------------------------------------------------------------------------- 1 | Entailment Instances 2 | ==================== 3 | 4 | These ``Instances`` are designed for an entailment task, where the input is a pair of sentences 5 | (or larger text sequences) and the output is a classification decision. 6 | 7 | SentencePairInstances 8 | --------------------- 9 | 10 | .. automodule:: deep_qa.data.instances.entailment.sentence_pair_instance 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | SnliInstances 16 | ------------- 17 | 18 | .. automodule:: deep_qa.data.instances.entailment.snli_instance 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /doc/data/general_data_utils.rst: -------------------------------------------------------------------------------- 1 | General Data Utils 2 | ================== 3 | 4 | deep_qa.data.data_indexer 5 | ------------------------- 6 | 7 | .. automodule:: deep_qa.data.data_indexer 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | deep_qa.data.embeddings 13 | ----------------------- 14 | 15 | .. automodule:: deep_qa.data.embeddings 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/data/instances.rst: -------------------------------------------------------------------------------- 1 | Base Instances 2 | ============== 3 | 4 | An :class:`~deep_qa.data.instances.instance.Instance` is a single training or testing example for a Keras model. The base classes for 5 | working with ``Instances`` are found in instance.py. There are two subclasses: (1) 6 | :class:`~deep_qa.data.instances.instance.TextInstance`, which is a raw instance that contains 7 | actual strings, and can be used to determine a vocabulary for a model, or read directly from a 8 | file; and (2) :class:`~deep_qa.data.instances.instance.IndexedInstance`, which has had its raw 9 | strings converted to word (or character) indices, and can be padded to a consistent length and 10 | converted to numpy arrays for use with Keras. 11 | 12 | Concrete ``Instance`` classes are organized in the code by the task they are designed for (e.g., 13 | text classification, reading comprehension, sequence tagging, etc.). 14 | 15 | A lot of the magic of how the DeepQA library works happens here, in the concrete Instance classes 16 | in this module. Most of the code can be totally agnostic to how exactly the input is structured, 17 | because the conversion to numpy arrays happens here, not in the Trainer or TextTrainer classes, 18 | with only the specific ``_build_model()`` methods needing to know about the format of their input 19 | and output (and even some of the details there are transparent to the model class). 20 | 21 | .. automodule:: deep_qa.data.instances.instance 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | -------------------------------------------------------------------------------- /doc/data/reading_comprehension.rst: -------------------------------------------------------------------------------- 1 | Reading Comprehension Instances 2 | =============================== 3 | 4 | These ``Instances`` are designed for the set of tasks known today as "reading comprehension", where 5 | the input is a natural language question, a passage, and (optionally) some number of answer 6 | options, and the output is either a (span begin index, span end index) decision over the passage, 7 | or a classification decision over the answer options (if provided). 8 | 9 | 10 | QuestionPassageInstances 11 | ------------------------ 12 | 13 | .. automodule:: deep_qa.data.instances.reading_comprehension.question_passage_instance 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | McQuestionPassageInstances 19 | -------------------------- 20 | 21 | .. automodule:: deep_qa.data.instances.reading_comprehension.mc_question_passage_instance 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | CharacterSpanInstances 27 | ---------------------- 28 | 29 | .. automodule:: deep_qa.data.instances.reading_comprehension.character_span_instance 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | -------------------------------------------------------------------------------- /doc/data/sequence_tagging.rst: -------------------------------------------------------------------------------- 1 | Sequence Tagging Instances 2 | ========================== 3 | 4 | These ``Instances`` are designed for a sequence tagging task, where the input is a passage of 5 | natural language (e.g., a sentence), and the output is some classification decision for each token 6 | in that passage (e.g., part-of-speech tags, any kind of BIO tagging like NER or chunking, etc.). 7 | 8 | TaggingInstances 9 | ---------------- 10 | 11 | .. automodule:: deep_qa.data.instances.sequence_tagging.tagging_instance 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | 16 | PretokenizedTaggingInstances 17 | ---------------------------- 18 | 19 | .. automodule:: deep_qa.data.instances.sequence_tagging.pretokenized_tagging_instance 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /doc/data/text_classification.rst: -------------------------------------------------------------------------------- 1 | Text Classification Instances 2 | ***************************** 3 | 4 | These ``Instances`` are designed for any classification task over a single passage of text. The 5 | input is the passage (e.g., a sentence, a document, etc.), and the output is a single label (e.g., 6 | positive / negative sentiment, spam / not spam, essay grade, etc.). 7 | 8 | TextClassificationInstances 9 | --------------------------- 10 | 11 | .. automodule:: deep_qa.data.instances.text_classification.text_classification_instance 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | -------------------------------------------------------------------------------- /doc/data/tokenizers.rst: -------------------------------------------------------------------------------- 1 | Tokenizers 2 | ========== 3 | 4 | 5 | character_tokenizer 6 | ------------------- 7 | 8 | .. automodule:: deep_qa.data.tokenizers.character_tokenizer 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | tokenizer 14 | --------- 15 | 16 | .. automodule:: deep_qa.data.tokenizers.tokenizer 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | word_and_character_tokenizer 22 | ---------------------------- 23 | 24 | .. automodule:: deep_qa.data.tokenizers.word_and_character_tokenizer 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | word_splitter 30 | ------------- 31 | 32 | .. automodule:: deep_qa.data.tokenizers.word_splitter 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | tokenizers.word_tokenizer 38 | ------------------------- 39 | 40 | .. automodule:: deep_qa.data.tokenizers.word_tokenizer 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | -------------------------------------------------------------------------------- /doc/img/module_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/doc/img/module_breakdown.png -------------------------------------------------------------------------------- /doc/layers/about_layers.rst: -------------------------------------------------------------------------------- 1 | About Layers 2 | ============ 3 | 4 | Custom layers that we have implemented belong here. These include things like 5 | knowledge encoders (which encode the memory component of a memory network), 6 | knowledge selectors (which perform an attention over the memory), and entailment 7 | models. There's also an encoders submodule, containing sentence encoders that 8 | convert an embedded word (or character) sequence into a vector. 9 | -------------------------------------------------------------------------------- /doc/layers/attention.rst: -------------------------------------------------------------------------------- 1 | Attention 2 | ========= 3 | 4 | Attention 5 | --------- 6 | 7 | .. automodule:: deep_qa.layers.attention.attention 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | GatedAttention 13 | -------------- 14 | 15 | .. automodule:: deep_qa.layers.attention.gated_attention 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | MaskedSoftmax 21 | ------------- 22 | 23 | .. automodule:: deep_qa.layers.attention.masked_softmax 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | MatrixAttention 29 | --------------- 30 | 31 | .. automodule:: deep_qa.layers.attention.matrix_attention 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | MaxSimilaritySoftmax 37 | -------------------- 38 | 39 | .. automodule:: deep_qa.layers.attention.max_similarity_softmax 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | WeightedSum 45 | ----------- 46 | 47 | .. automodule:: deep_qa.layers.attention.weighted_sum 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | -------------------------------------------------------------------------------- /doc/layers/backend.rst: -------------------------------------------------------------------------------- 1 | Backend Layers 2 | ============== 3 | 4 | Layers in this module generally just implement some simple operation from the Keras backend as a 5 | Layer. The reason we have these as Layers is largely so that we can properly handle masking. 6 | 7 | AddMask 8 | ------- 9 | 10 | .. automodule:: deep_qa.layers.backend.add_mask 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | BatchDot 16 | -------- 17 | 18 | .. automodule:: deep_qa.layers.backend.batch_dot 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | CollapseToBatch 24 | --------------- 25 | 26 | .. automodule:: deep_qa.layers.backend.collapse_to_batch 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | ExpandFromBatch 32 | --------------- 33 | 34 | .. automodule:: deep_qa.layers.backend.expand_from_batch 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Envelope 40 | -------- 41 | 42 | .. automodule:: deep_qa.layers.backend.envelope 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | Max 48 | --- 49 | 50 | .. automodule:: deep_qa.layers.backend.max 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Permute 56 | ------- 57 | 58 | .. automodule:: deep_qa.layers.backend.permute 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | Repeat 64 | ------ 65 | 66 | .. automodule:: deep_qa.layers.backend.repeat 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | RepeatLike 72 | ---------- 73 | 74 | .. automodule:: deep_qa.layers.backend.repeat_like 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | -------------------------------------------------------------------------------- /doc/layers/core_layers.rst: -------------------------------------------------------------------------------- 1 | Core Layers 2 | =========== 3 | 4 | Additive 5 | -------- 6 | 7 | .. automodule:: deep_qa.layers.additive 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | BiGRUIndexSelector 13 | ------------------ 14 | 15 | .. automodule:: deep_qa.layers.bigru_index_selector 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | ComplexConcat 21 | ------------- 22 | 23 | .. automodule:: deep_qa.layers.complex_concat 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | Highway 29 | ------- 30 | 31 | .. automodule:: deep_qa.layers.highway 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | L1Normalize 37 | ----------- 38 | 39 | .. automodule:: deep_qa.layers.l1_normalize 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | NoisyOr 45 | ------- 46 | 47 | .. automodule:: deep_qa.layers.noisy_or 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | OptionAttentionSum 53 | ------------------ 54 | 55 | .. automodule:: deep_qa.layers.option_attention_sum 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | Overlap 61 | ------- 62 | 63 | .. automodule:: deep_qa.layers.overlap 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | SubtractMinimum 69 | --------------- 70 | 71 | .. automodule:: deep_qa.layers.subtract_minimum 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | 76 | VectorMatrixMerge 77 | ----------------- 78 | 79 | .. automodule:: deep_qa.layers.vector_matrix_merge 80 | :members: 81 | :undoc-members: 82 | :show-inheritance: 83 | 84 | VectorMatrixSplit 85 | ----------------- 86 | 87 | .. automodule:: deep_qa.layers.vector_matrix_split 88 | :members: 89 | :undoc-members: 90 | :show-inheritance: 91 | -------------------------------------------------------------------------------- /doc/layers/encoders.rst: -------------------------------------------------------------------------------- 1 | Encoders 2 | ======== 3 | 4 | BagOfWords 5 | ---------- 6 | 7 | .. automodule:: deep_qa.layers.encoders.bag_of_words 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ConvolutionalEncoder 13 | -------------------- 14 | 15 | .. automodule:: deep_qa.layers.encoders.convolutional_encoder 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | PositionalEncoder 21 | ----------------- 22 | 23 | .. automodule:: deep_qa.layers.encoders.positional_encoder 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | AttentiveGRU 29 | ------------ 30 | 31 | .. automodule:: deep_qa.layers.encoders.attentive_gru 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /doc/layers/entailment_models.rst: -------------------------------------------------------------------------------- 1 | Entailment Model Layers 2 | ======================= 3 | 4 | DecomposableAttention 5 | --------------------- 6 | 7 | .. automodule:: deep_qa.layers.entailment_models.decomposable_attention 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | MultipleChoiceTupleEntailment 13 | ----------------------------- 14 | 15 | .. automodule:: deep_qa.layers.entailment_models.multiple_choice_tuple_entailment 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | WordAlignment 21 | ------------- 22 | 23 | .. automodule:: deep_qa.layers.entailment_models.word_alignment 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | -------------------------------------------------------------------------------- /doc/layers/wrappers.rst: -------------------------------------------------------------------------------- 1 | Wrappers 2 | ======== 3 | 4 | EncoderWrapper 5 | -------------- 6 | 7 | .. automodule:: deep_qa.layers.wrappers.encoder_wrapper 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | OutputMask 13 | ---------- 14 | 15 | .. automodule:: deep_qa.layers.wrappers.output_mask 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | TimeDistributed 21 | --------------- 22 | 23 | .. automodule:: deep_qa.layers.wrappers.time_distributed 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | -------------------------------------------------------------------------------- /doc/models/entailment.rst: -------------------------------------------------------------------------------- 1 | Entailment Models 2 | ================= 3 | 4 | Entailment models take two sequences of text as input and make a classification 5 | decision on the pair. Typically that decision represents whether one sentence 6 | entails the other, but we'll use this family of models to represent any kind of 7 | classification decision over pairs of text. 8 | 9 | Inputs: Two text sequences 10 | 11 | Output: Some classification decision (typically "entails/not entails", "entails/neutral/contradicts", or similar) 12 | 13 | 14 | DecomposableAttention 15 | --------------------- 16 | 17 | .. automodule:: deep_qa.models.entailment.decomposable_attention 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | -------------------------------------------------------------------------------- /doc/models/reading_comprehension.rst: -------------------------------------------------------------------------------- 1 | Reading Comprehension 2 | ===================== 3 | 4 | AttentionSumReader 5 | ------------------ 6 | 7 | .. automodule:: deep_qa.models.reading_comprehension.attention_sum_reader 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | BidirectionalAttentionFlow 13 | -------------------------- 14 | 15 | .. automodule:: deep_qa.models.reading_comprehension.bidirectional_attention 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | GatedAttentionReader 21 | -------------------- 22 | 23 | .. automodule:: deep_qa.models.reading_comprehension.gated_attention_reader 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | -------------------------------------------------------------------------------- /doc/models/text_classification.rst: -------------------------------------------------------------------------------- 1 | Text Classification 2 | =================== 3 | 4 | Text classification models take a sequence of text as input and classify it into 5 | one of several classes. 6 | 7 | Input: Text sequence 8 | 9 | Output: Class label 10 | 11 | 12 | ClassificationModel 13 | ------------------- 14 | 15 | .. automodule:: deep_qa.models.text_classification.classification_model 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/run.rst: -------------------------------------------------------------------------------- 1 | Running Models 2 | ============== 3 | 4 | .. automodule:: deep_qa.run 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /doc/tensors/about_tensors.rst: -------------------------------------------------------------------------------- 1 | Tensor Utils 2 | ============ 3 | 4 | Here are some general tensor manipulation utilities that we've written to help 5 | in other parts of the code base. 6 | -------------------------------------------------------------------------------- /doc/tensors/core_tensors.rst: -------------------------------------------------------------------------------- 1 | Core Tensor Utils 2 | ================= 3 | 4 | backend 5 | ------- 6 | 7 | .. automodule:: deep_qa.tensors.backend 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | masked_operations 13 | ----------------- 14 | 15 | .. automodule:: deep_qa.tensors.masked_operations 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/tensors/similarity_functions.rst: -------------------------------------------------------------------------------- 1 | Similarity Functions 2 | ==================== 3 | 4 | .. automodule:: deep_qa.tensors.similarity_functions 5 | 6 | bilinear 7 | -------- 8 | 9 | .. automodule:: deep_qa.tensors.similarity_functions.bilinear 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | cosine_similarity 15 | ----------------- 16 | 17 | .. automodule:: deep_qa.tensors.similarity_functions.cosine_similarity 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | dot_product 23 | ----------- 24 | 25 | .. automodule:: deep_qa.tensors.similarity_functions.dot_product 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | linear 31 | ------ 32 | 33 | .. automodule:: deep_qa.tensors.similarity_functions.linear 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | similarity_function 39 | ------------------- 40 | 41 | .. automodule:: deep_qa.tensors.similarity_functions.similarity_function 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | -------------------------------------------------------------------------------- /doc/training/about_trainers.rst: -------------------------------------------------------------------------------- 1 | About Trainers 2 | ============== 3 | 4 | A :class:`~deep_qa.training.trainer.Trainer` is the core interface to the DeepQA code. Trainers 5 | specify data, a model, and a way to train the model with the data. This module groups all of the 6 | common code related to these things, making only minimal assumptions about what kind of data you're 7 | using or what the structure of your model is. Really, a ``Trainer`` is just a nicer interface to a 8 | Keras ``Model``, we just call it something else to not create too much naming confusion, and 9 | because the Trainer class provides a lot of functionality around training the model that a Keras 10 | ``Model`` doesn't. 11 | 12 | On top of ``Trainer``, which is a nicer interface to a Keras ``Model``, this module provides a 13 | ``TextTrainer``, which adds a lot of functionality for building Keras ``Models`` that work with 14 | text. We provide APIs around word embeddings, sentence encoding, reading and padding datasets, and 15 | similar things. All of the concrete models that we have so far in DeepQA inherit from 16 | ``TextTrainer``, so understanding how to use this class is pretty important to understanding 17 | DeepQA. 18 | 19 | We also deal with the notion of pre-training in this module. A Pretrainer is a Trainer that depends 20 | on another Trainer, building its model using pieces of the enclosed Trainer, so that training the 21 | Pretrainer updates the weights in the enclosed Trainer object. 22 | -------------------------------------------------------------------------------- /doc/training/misc.rst: -------------------------------------------------------------------------------- 1 | Misc 2 | ==== 3 | 4 | Models 5 | ------ 6 | 7 | .. automodule:: deep_qa.training.models 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | Optimizers 13 | ---------- 14 | 15 | .. automodule:: deep_qa.training.optimizers 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/training/multi_gpu.rst: -------------------------------------------------------------------------------- 1 | Multi GPU Training 2 | ================== 3 | 4 | .. automodule:: deep_qa.training.multi_gpu 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /doc/training/text_trainer.rst: -------------------------------------------------------------------------------- 1 | TextTrainer 2 | =========== 3 | 4 | .. module:: deep_qa.training.text_trainer 5 | 6 | .. autoclass:: TextTrainer 7 | 8 | Utility methods 9 | ~~~~~~~~~~~~~~~ 10 | 11 | These methods are intended for use by subclasses, mostly in your ``_build_model`` implementation. 12 | 13 | .. automethod:: TextTrainer._get_sentence_shape 14 | .. automethod:: TextTrainer._embed_input 15 | .. automethod:: TextTrainer._get_encoder 16 | .. automethod:: TextTrainer._get_seq2seq_encoder 17 | .. automethod:: TextTrainer._set_text_lengths_from_model_input 18 | 19 | Abstract methods 20 | ~~~~~~~~~~~~~~~~ 21 | 22 | You `must` implement these methods in your model (along with 23 | :func:`~deep_qa.training.trainer._build_model`). The simplest concrete ``TextTrainer`` 24 | implementations only have four methods: ``__init__``, ``_instance_type`` (typically one line), 25 | ``_set_padding_lengths_from_model`` (also typically one line, for simple models), and 26 | ``_build_model``. See 27 | :class:`~deep_qa.models.text_classification.true_false_model.TrueFalseModel` and 28 | :class:`~deep_qa.models.sequence_tagging.simple_tagger.SimpleTagger` for examples. 29 | 30 | .. automethod:: TextTrainer._instance_type 31 | .. automethod:: TextTrainer._set_padding_lengths_from_model 32 | 33 | Semi-abstract methods 34 | ~~~~~~~~~~~~~~~~~~~~~ 35 | 36 | You'll likely need to override these methods, if you have anything more complex than a single sentence 37 | as input. 38 | 39 | .. automethod:: TextTrainer.get_padding_lengths 40 | .. automethod:: TextTrainer.get_instance_sorting_keys 41 | .. automethod:: TextTrainer.get_padding_memory_scaling 42 | .. automethod:: TextTrainer._set_padding_lengths 43 | 44 | Overridden ``Trainer`` methods 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | You probably don't need to override these, except for probably ``_get_custom_objects``. The rest 48 | of them you shouldn't need to worry about at all (except to call them, if they are part of the 49 | external ``Trainer`` API), but we document them here for completeness. 50 | 51 | .. automethod:: TextTrainer.create_data_arrays 52 | .. automethod:: TextTrainer.load_dataset_from_files 53 | .. automethod:: TextTrainer.score_dataset 54 | .. automethod:: TextTrainer.set_model_state_from_dataset 55 | .. automethod:: TextTrainer.set_model_state_from_indexed_dataset 56 | .. automethod:: TextTrainer._get_custom_objects 57 | .. automethod:: TextTrainer._dataset_indexing_kwargs 58 | .. automethod:: TextTrainer._load_auxiliary_files 59 | .. automethod:: TextTrainer._overall_debug_output 60 | .. automethod:: TextTrainer._save_auxiliary_files 61 | .. automethod:: TextTrainer._set_params_from_model 62 | .. automethod:: TextTrainer._uses_data_generators 63 | -------------------------------------------------------------------------------- /doc/training/trainer.rst: -------------------------------------------------------------------------------- 1 | Trainer 2 | ======= 3 | 4 | .. module:: deep_qa.training.trainer 5 | 6 | .. autoclass:: Trainer 7 | 8 | Public methods 9 | ~~~~~~~~~~~~~~ 10 | 11 | .. automethod:: Trainer.can_train 12 | .. automethod:: Trainer.evaluate_model 13 | .. automethod:: Trainer.load_data_arrays 14 | .. automethod:: Trainer.load_model 15 | .. automethod:: Trainer.train 16 | 17 | Abstract methods 18 | ~~~~~~~~~~~~~~~~ 19 | 20 | If you're doing NLP, :class:`~deep_qa.training.text_trainer.TextTrainer` implements most of these, 21 | so you shouldn't have to worry about them. The only one it doesn't is ``_build_model`` (though it 22 | adds some other abstract methods that you `might` have to worry about). 23 | 24 | .. automethod:: Trainer.create_data_arrays 25 | .. automethod:: Trainer.load_dataset_from_files 26 | .. automethod:: Trainer.score_dataset 27 | .. automethod:: Trainer.set_model_state_from_dataset 28 | .. automethod:: Trainer.set_model_state_from_indexed_dataset 29 | .. automethod:: Trainer._build_model 30 | .. automethod:: Trainer._set_params_from_model 31 | .. automethod:: Trainer._dataset_indexing_kwargs 32 | 33 | Protected methods 34 | ~~~~~~~~~~~~~~~~~ 35 | 36 | .. automethod:: Trainer._get_callbacks 37 | .. automethod:: Trainer._get_custom_objects 38 | .. automethod:: Trainer._instance_debug_output 39 | .. automethod:: Trainer._load_auxiliary_files 40 | .. automethod:: Trainer._output_debug_info 41 | .. automethod:: Trainer._overall_debug_output 42 | .. automethod:: Trainer._post_epoch_hook 43 | .. automethod:: Trainer._pre_epoch_hook 44 | .. automethod:: Trainer._save_auxiliary_files 45 | .. automethod:: Trainer._uses_data_generators 46 | -------------------------------------------------------------------------------- /example_experiments/entailment/snli_decomposable_attention.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_class": "DecomposableAttention", 3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/decomposable_attention/", 4 | "seq2seq_encoder": { 5 | "default": { 6 | "type": "bi_gru", 7 | "encoder_params": { 8 | "units": 100 9 | }, 10 | "wrapper_params": {} 11 | } 12 | }, 13 | "num_seq2seq_layers": 0, 14 | "decomposable_attention_params": { 15 | "num_hidden_layers": 2, 16 | "hidden_layer_width": 200, 17 | "hidden_layer_activation": "relu", 18 | "initializer": "random_normal" 19 | }, 20 | "data_generator": { 21 | "dynamic_padding": true 22 | }, 23 | "batch_size": 60, 24 | "patience": 3, 25 | "embeddings": { 26 | "words": { 27 | "dimension": 200, 28 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.840B.300d.txt.gz", 29 | "project": true, 30 | "fine_tune": false, 31 | "dropout": 0.2 32 | } 33 | }, 34 | "num_epochs": 20, 35 | "optimizer": { 36 | "type": "adadelta", 37 | "learning_rate": 0.5 38 | }, 39 | "validation_files": ["/net/efs/aristo/dlfa/snli/processed/dev.tsv"], 40 | "train_files": ["/net/efs/aristo/dlfa/snli/processed/train.tsv"] 41 | } 42 | -------------------------------------------------------------------------------- /example_experiments/reading_comprehension/asreader_who_did_what.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_class": "AttentionSumReader", 3 | "model_serialization_prefix": "models/multiple_choice_qa/asreader", 4 | "encoder": { 5 | "default": { 6 | "type": "bi_gru", 7 | "units": 384 8 | } 9 | }, 10 | "seq2seq_encoder": { 11 | "default": { 12 | "type": "bi_gru", 13 | "encoder_params": { 14 | "units": 384 15 | }, 16 | "wrapper_params": {} 17 | } 18 | }, 19 | "optimizer": {"type": "adam"}, 20 | "gradient_clipping": { 21 | "type": "clip_by_norm", 22 | "value": 10 23 | }, 24 | "patience": 1, 25 | "embeddings": {"words": {"dimension": 256, "dropout": 0.0}}, 26 | "num_epochs": 5, 27 | "validation_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/val.tsv"], 28 | "train_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/train.tsv"] 29 | } 30 | -------------------------------------------------------------------------------- /example_experiments/reading_comprehension/bidaf_squad.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_class": "BidirectionalAttentionFlow", 3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/bidaf", 4 | "encoder": { 5 | "word": { 6 | "type": "cnn", 7 | "ngram_filter_sizes": [5], 8 | "num_filters": 100 9 | } 10 | }, 11 | "seq2seq_encoder": { 12 | "default": { 13 | "type": "bi_gru", 14 | "encoder_params": { 15 | "units": 100 16 | }, 17 | "wrapper_params": {} 18 | } 19 | }, 20 | "data_generator": { 21 | "dynamic_padding": true, 22 | "adaptive_batch_sizes": true, 23 | "adaptive_memory_usage_constant": 440000, 24 | "maximum_batch_size": 60 25 | }, 26 | // This is not quite the same as Min's paper; we don't have encoder dropout yet. 27 | "patience": 3, 28 | "embeddings": { 29 | "words": { 30 | "dimension": 100, 31 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.6B.100d.txt.gz", 32 | "project": true, 33 | "fine_tune": false, 34 | "dropout": 0.2 35 | }, 36 | "characters": { 37 | "dimension": 8, 38 | "dropout": 0.2 39 | } 40 | }, 41 | "num_epochs": 20, 42 | "optimizer": { 43 | "type": "adadelta", 44 | "learning_rate": 0.5 45 | }, 46 | "validation_files": ["/net/efs/aristo/dlfa/squad/processed/dev.tsv"], 47 | "train_files": ["/net/efs/aristo/dlfa/squad/processed/train.tsv"] 48 | } 49 | -------------------------------------------------------------------------------- /example_experiments/reading_comprehension/gareader_who_did_what.json: -------------------------------------------------------------------------------- 1 | { 2 | "embeddings": { 3 | "words": { 4 | "dimension": 200, 5 | "pretrained_file": "/net/efs/data/dlfa/glove/glove.6B.100d.txt.gz", 6 | "fine_tune": false, 7 | "project": true, 8 | "dropout": 0.0 9 | }, 10 | "characters": { 11 | "dimension": 16, 12 | "dropout": 0.0 13 | } 14 | }, 15 | "model_class": "GatedAttentionReader", 16 | "cloze_token": "xxxxx", 17 | "num_word_characters": 10, 18 | "model_serialization_prefix": "models/multiple_choice_qa/gareader_wdw", 19 | "num_gated_attention_layers": 3, 20 | "tokenizer": { 21 | "type": "words and characters" 22 | }, 23 | "encoder": { 24 | "word": { 25 | "type": "bi_gru", 26 | "units": 25 27 | } 28 | }, 29 | "seq2seq_encoder": { 30 | "question_0": { 31 | "type": "bi_gru", 32 | "encoder_params": { 33 | "units": 128 34 | }, 35 | "wrapper_params": {} 36 | }, 37 | "document_0": { 38 | "type": "bi_gru", 39 | "encoder_params": { 40 | "units": 128 41 | }, 42 | "wrapper_params": {} 43 | }, 44 | "question_1": { 45 | "type": "bi_gru", 46 | "encoder_params": { 47 | "units": 128 48 | }, 49 | "wrapper_params": {} 50 | }, 51 | "document_1": { 52 | "type": "bi_gru", 53 | "encoder_params": { 54 | "units": 128 55 | }, 56 | "wrapper_params": {} 57 | }, 58 | "document_final": { 59 | "type": "bi_gru", 60 | "encoder_params": { 61 | "units": 128 62 | }, 63 | "wrapper_params": {} 64 | }, 65 | "question_final":{ 66 | "type": "bi_gru", 67 | "encoder_params": { 68 | "units": 128 69 | }, 70 | "wrapper_params": { 71 | "merge_mode": "None" 72 | } 73 | } 74 | }, 75 | "optimizer": { 76 | "type": "adam", 77 | "learning_rate": 0.0005 78 | }, 79 | "gradient_clipping": { 80 | "type": "clip_by_norm", 81 | "value": 10 82 | }, 83 | "patience": 5, 84 | "num_epochs": 10, 85 | "validation_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/val.tsv"], 86 | "train_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/train.tsv"] 87 | } 88 | -------------------------------------------------------------------------------- /example_experiments/sequence_tagging/simple_tagger.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_class": "SimpleTagger", 3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/simple_tagger_test/", 4 | "encoder": { 5 | "word": { 6 | "type": "cnn", 7 | "ngram_filter_sizes": [2, 3, 4, 5], 8 | "num_filters": 100 9 | } 10 | }, 11 | "seq2seq_encoder": { 12 | "default": { 13 | "type": "bi_gru", 14 | "encoder_params": { 15 | "units": 100 16 | }, 17 | "wrapper_params": {} 18 | } 19 | }, 20 | "num_stacked_rnns": 2, 21 | "instance_type": "PreTokenizedTaggingInstance", 22 | "tokenizer": { 23 | "type": "words and characters", 24 | "processor": {"word_splitter": "no_op"} 25 | }, 26 | "data_generator": { 27 | "dynamic_padding": true 28 | }, 29 | "patience": 3, 30 | "embeddings": { 31 | "words": { 32 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.6B.100d.txt.gz", 33 | "project": false, 34 | "fine_tune": false, 35 | "dropout": 0.2 36 | }, 37 | "characters": { 38 | "dimension": 8 39 | } 40 | }, 41 | "num_epochs": 20, 42 | "optimizer": { 43 | "type": "adadelta", 44 | "learning_rate": 0.5 45 | }, 46 | "validation_files": ["/net/efs/aristo/dlfa/squad/processed/tagging_dev.tsv"], 47 | "train_files": ["/net/efs/aristo/dlfa/squad/processed/tagging_train.tsv"] 48 | } 49 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --disable-warnings 3 | testpaths = tests/ 4 | python_paths = ./ 5 | -------------------------------------------------------------------------------- /scripts/clean_raw_omnibus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script takes as input raw TSV files from the Omnibus dataset and 4 | preprocesses them to be compatible with the deep_qa pipeline. 5 | """ 6 | import logging 7 | import os 8 | import csv 9 | 10 | from argparse import ArgumentParser 11 | import pandas 12 | 13 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 14 | 15 | 16 | def main(): 17 | log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 18 | logging.basicConfig(level=logging.INFO, format=log_format) 19 | parser = ArgumentParser(description=("Transform a raw Omnibus TSV " 20 | "to the format that the pipeline " 21 | "expects.")) 22 | parser.add_argument('input_csv', nargs='+', 23 | metavar="", type=str, 24 | help=("Path of TSV files to clean up. Pass in " 25 | "as many as you want, and the output " 26 | "will be a concatenation of them " 27 | "written to .clean")) 28 | 29 | arguments = parser.parse_args() 30 | all_clean_file_rows = [] 31 | for omnibus_file in arguments.input_csv: 32 | all_clean_file_rows.extend(clean_omnibus_csv(omnibus_file)) 33 | # turn the list of rows into a dataframe, and write to TSV 34 | dataframe = pandas.DataFrame(all_clean_file_rows) 35 | folder, filename = os.path.split(arguments.input_csv[-1]) 36 | outdirectory = folder + "/cleaned/" 37 | os.makedirs(outdirectory, exist_ok=True) 38 | outpath = outdirectory + filename + ".clean" 39 | logger.info("Saving cleaned file to %s", outpath) 40 | dataframe.to_csv(outpath, encoding="utf-8", index=False, 41 | sep="\t", header=False, 42 | quoting=csv.QUOTE_NONE) 43 | 44 | 45 | def clean_omnibus_csv(omnibus_file_path): 46 | logger.info("cleaning up %s", omnibus_file_path) 47 | # open the file as a csv 48 | dataframe = pandas.read_csv(omnibus_file_path, sep="\t", 49 | encoding='utf-8', header=None, 50 | quoting=csv.QUOTE_NONE) 51 | dataframe_trimmed = dataframe[[3, 9]] 52 | clean_rows = dataframe_trimmed.values.tolist() 53 | return clean_rows 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /scripts/install_requirements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install -r requirements.txt 4 | python -m nltk.downloader punkt 5 | python -m spacy.en.download all 6 | -------------------------------------------------------------------------------- /scripts/pylint.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | echo 'Starting pylint checks' 3 | pylint -d locally-disabled,locally-enabled -f colorized deep_qa tests scripts/*.py 4 | echo -e "pylint checks passed\n" 5 | -------------------------------------------------------------------------------- /scripts/run_ensemble.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | # pylint: disable=wrong-import-position 6 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 7 | from deep_qa import score_dataset_with_ensemble, compute_accuracy 8 | from deep_qa.common.checks import ensure_pythonhashseed_set 9 | 10 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 11 | 12 | 13 | def main(): 14 | usage = 'USAGE: run_ensemble.py [param_file]+ -- [data_file]+' 15 | try: 16 | separator_index = sys.argv.index('--') 17 | except ValueError: 18 | print(usage) 19 | sys.exit(-1) 20 | param_files = sys.argv[1:separator_index] 21 | dataset_files = sys.argv[separator_index + 1:] 22 | predictions, labels = score_dataset_with_ensemble(param_files, dataset_files) 23 | compute_accuracy(predictions, labels) 24 | 25 | 26 | if __name__ == "__main__": 27 | ensure_pythonhashseed_set() 28 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 29 | level=logging.INFO) 30 | main() 31 | -------------------------------------------------------------------------------- /scripts/run_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | # pylint: disable=wrong-import-position 6 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 7 | from deep_qa import run_model_from_file, evaluate_model 8 | from deep_qa.common.checks import ensure_pythonhashseed_set 9 | 10 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name 11 | 12 | 13 | def main(): 14 | usage = 'USAGE: run_model.py [param_file] [train|test]' 15 | if len(sys.argv) == 2: 16 | run_model_from_file(sys.argv[1]) 17 | elif len(sys.argv) == 3: 18 | mode = sys.argv[2] 19 | if mode == 'train': 20 | run_model_from_file(sys.argv[1]) 21 | elif mode == 'test': 22 | evaluate_model(sys.argv[1]) 23 | else: 24 | print(usage) 25 | sys.exit(-1) 26 | else: 27 | print(usage) 28 | sys.exit(-1) 29 | 30 | 31 | if __name__ == "__main__": 32 | ensure_pythonhashseed_set() 33 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 34 | level=logging.INFO) 35 | main() 36 | -------------------------------------------------------------------------------- /scripts/set_processor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | USAGE="usage: ./set-processor [gpu|cpu]" 4 | 5 | if [ $# != 1 ]; then 6 | echo "$USAGE" 7 | exit 1 8 | fi 9 | 10 | PROCESSOR=$1 11 | if [ "$PROCESSOR" == "gpu" ]; then 12 | echo "Setting the processor to '$PROCESSOR'." 13 | sed -ie 's/^tensorflow-gpu/tensorflow/g' requirements.txt 14 | sed -ie 's/^tensorflow/tensorflow-gpu/g' requirements.txt 15 | elif [ "$PROCESSOR" == "cpu" ]; then 16 | echo "Setting the processor to '$PROCESSOR'." 17 | sed -ie 's/^tensorflow-gpu/tensorflow/g' requirements.txt 18 | else 19 | echo "Unknown argument: $PROCESSOR" 20 | echo "$USAGE" 21 | exit 1 22 | fi 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/__init__.py -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/common/__init__.py -------------------------------------------------------------------------------- /tests/common/pythonhashseed_test.py: -------------------------------------------------------------------------------- 1 | from deep_qa.common.checks import ensure_pythonhashseed_set 2 | 3 | def test_pythonhashseed(): 4 | ensure_pythonhashseed_set() 5 | -------------------------------------------------------------------------------- /tests/common/test_util.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from deep_qa.common import util 3 | from deep_qa.testing.test_case import DeepQaTestCase 4 | 5 | 6 | class TestCommonUtils(DeepQaTestCase): 7 | def test_group_by_count(self): 8 | assert util.group_by_count([1, 2, 3, 4, 5, 6, 7], 3, 20) == [[1, 2, 3], [4, 5, 6], [7, 20, 20]] 9 | -------------------------------------------------------------------------------- /tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/__init__.py -------------------------------------------------------------------------------- /tests/data/dataset_readers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/dataset_readers/__init__.py -------------------------------------------------------------------------------- /tests/data/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/datasets/__init__.py -------------------------------------------------------------------------------- /tests/data/datasets/dataset_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from deep_qa.data.datasets.dataset import Dataset, TextDataset 3 | from deep_qa.data.instances.text_classification.text_classification_instance import TextClassificationInstance 4 | 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestDataset: 9 | def test_merge(self): 10 | instances = [TextClassificationInstance("testing", None, None), 11 | TextClassificationInstance("testing1", None, None)] 12 | dataset1 = Dataset(instances[:1]) 13 | dataset2 = Dataset(instances[1:]) 14 | merged = dataset1.merge(dataset2) 15 | assert merged.instances == instances 16 | 17 | 18 | class TestTextDataset(DeepQaTestCase): 19 | def test_read_from_file_with_no_default_label(self): 20 | filename = self.TEST_DIR + 'test_dataset_file' 21 | with open(filename, 'w') as datafile: 22 | datafile.write("1\tinstance1\t0\n") 23 | datafile.write("2\tinstance2\t1\n") 24 | datafile.write("3\tinstance3\n") 25 | dataset = TextDataset.read_from_file(filename, TextClassificationInstance) 26 | assert len(dataset.instances) == 3 27 | instance = dataset.instances[0] 28 | assert instance.index == 1 29 | assert instance.text == "instance1" 30 | assert instance.label is False 31 | instance = dataset.instances[1] 32 | assert instance.index == 2 33 | assert instance.text == "instance2" 34 | assert instance.label is True 35 | instance = dataset.instances[2] 36 | assert instance.index == 3 37 | assert instance.text == "instance3" 38 | assert instance.label is None 39 | -------------------------------------------------------------------------------- /tests/data/datasets/language_modeling_dataset_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from deep_qa.common.params import Params 3 | from deep_qa.data.datasets import LanguageModelingDataset 4 | from deep_qa.data.instances.language_modeling.sentence_instance import SentenceInstance 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestLanguageModellingDataset(DeepQaTestCase): 9 | 10 | def setUp(self): 11 | super(TestLanguageModellingDataset, self).setUp() 12 | self.write_sentence_data() 13 | 14 | def test_read_from_file(self): 15 | args = Params({"sequence_length": 4}) 16 | dataset = LanguageModelingDataset.read_from_file(self.TRAIN_FILE, SentenceInstance, args) 17 | 18 | instances = dataset.instances 19 | assert instances[0].text == "This is a sentence" 20 | assert instances[1].text == "for language modelling. Here's" 21 | assert instances[2].text == "another one for language" 22 | -------------------------------------------------------------------------------- /tests/data/datasets/snli_dataset_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from deep_qa.data.datasets import SnliDataset 3 | from deep_qa.data.instances.entailment.snli_instance import SnliInstance 4 | 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestSnliDataset(DeepQaTestCase): 9 | 10 | def setUp(self): 11 | super(TestSnliDataset, self).setUp() 12 | self.write_original_snli_data() 13 | 14 | def test_read_from_file(self): 15 | dataset = SnliDataset.read_from_file(self.TRAIN_FILE, SnliInstance) 16 | 17 | instance1 = SnliInstance("A person on a horse jumps over a broken down airplane.", 18 | "A person is training his horse for a competition.", 19 | "neutral") 20 | instance2 = SnliInstance("A person on a horse jumps over a broken down airplane.", 21 | "A person is at a diner, ordering an omelette.", 22 | "contradicts") 23 | instance3 = SnliInstance("A person on a horse jumps over a broken down airplane.", 24 | "A person is outdoors, on a horse.", 25 | "entails") 26 | 27 | assert len(dataset.instances) == 3 28 | instance = dataset.instances[0] 29 | assert instance.index == instance1.index 30 | assert instance.first_sentence == instance1.first_sentence 31 | assert instance.second_sentence == instance1.second_sentence 32 | assert instance.label == instance1.label 33 | instance = dataset.instances[1] 34 | assert instance.index == instance2.index 35 | assert instance.first_sentence == instance2.first_sentence 36 | assert instance.second_sentence == instance2.second_sentence 37 | assert instance.label == instance2.label 38 | instance = dataset.instances[2] 39 | assert instance.index == instance3.index 40 | assert instance.first_sentence == instance3.first_sentence 41 | assert instance.second_sentence == instance3.second_sentence 42 | assert instance.label == instance3.label 43 | -------------------------------------------------------------------------------- /tests/data/instances/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/__init__.py -------------------------------------------------------------------------------- /tests/data/instances/entailment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/entailment/__init__.py -------------------------------------------------------------------------------- /tests/data/instances/entailment/sentence_pair_instance_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | 4 | from deep_qa.data.instances.entailment.sentence_pair_instance import IndexedSentencePairInstance 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestIndexedSentencePairInstance(DeepQaTestCase): 9 | def test_get_padding_lengths_returns_max_of_both_sentences(self): 10 | instance = IndexedSentencePairInstance([1, 2, 3], [1], True) 11 | assert instance.get_padding_lengths() == {'num_sentence_words': 3} 12 | instance = IndexedSentencePairInstance([1, 2, 3], [1, 2, 3, 4], True) 13 | assert instance.get_padding_lengths() == {'num_sentence_words': 4} 14 | 15 | def test_pad_pads_both_sentences(self): 16 | instance = IndexedSentencePairInstance([1, 2], [3, 4], True) 17 | instance.pad({'num_sentence_words': 3}) 18 | assert instance.first_sentence_indices == [0, 1, 2] 19 | assert instance.second_sentence_indices == [0, 3, 4] 20 | 21 | def test_as_training_data_produces_correct_numpy_arrays(self): 22 | # pylint: disable=redefined-variable-type 23 | instance = IndexedSentencePairInstance([1, 2], [3, 4], [0, 1, 0]) 24 | inputs, label = instance.as_training_data() 25 | assert isinstance(inputs, tuple) 26 | assert len(inputs) == 2 27 | assert numpy.all(inputs[0] == numpy.asarray([1, 2])) 28 | assert numpy.all(inputs[1] == numpy.asarray([3, 4])) 29 | assert numpy.all(label == numpy.asarray([0, 1, 0])) 30 | -------------------------------------------------------------------------------- /tests/data/instances/language_modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/language_modeling/__init__.py -------------------------------------------------------------------------------- /tests/data/instances/reading_comprehension/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/reading_comprehension/__init__.py -------------------------------------------------------------------------------- /tests/data/instances/sequence_tagging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/sequence_tagging/__init__.py -------------------------------------------------------------------------------- /tests/data/instances/sequence_tagging/test_tagging_instance.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from deep_qa.data.instances.sequence_tagging.tagging_instance import IndexedTaggingInstance 3 | from deep_qa.testing.test_case import DeepQaTestCase 4 | from numpy.testing import assert_array_almost_equal 5 | 6 | 7 | class TestIndexedTaggingInstance(DeepQaTestCase): 8 | def setUp(self): 9 | super(TestIndexedTaggingInstance, self).setUp() 10 | self.instance = IndexedTaggingInstance([1, 2, 3, 4], [4, 5, 6]) 11 | 12 | def test_get_padding_lengths_returns_correct_lengths(self): 13 | assert self.instance.get_padding_lengths() == {'num_sentence_words': 4} 14 | 15 | def test_pad_truncates_correctly(self): 16 | self.instance.pad({'num_sentence_words': 2}) 17 | assert self.instance.text_indices == [1, 2] 18 | 19 | def test_pad_adds_padding_correctly(self): 20 | self.instance.pad({'num_sentence_words': 6}) 21 | assert self.instance.text_indices == [1, 2, 3, 4, 0, 0] 22 | 23 | def test_as_training_data_produces_correct_arrays(self): 24 | text_array, label_array = self.instance.as_training_data() 25 | assert_array_almost_equal(text_array, [1, 2, 3, 4]) 26 | assert_array_almost_equal(label_array, [4, 5, 6]) 27 | -------------------------------------------------------------------------------- /tests/data/instances/text_classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/text_classification/__init__.py -------------------------------------------------------------------------------- /tests/data/tokenizers/tokenizer_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | from deep_qa.data.tokenizers.word_tokenizer import WordTokenizer 4 | from deep_qa.common.params import Params 5 | 6 | class TestTokenizer: 7 | tokenizer = WordTokenizer(Params({})) 8 | passage = "On January 7, 2012, Beyoncé gave birth to her first child, a daughter, Blue Ivy " +\ 9 | "Carter, at Lenox Hill Hospital in New York. Five months later, she performed for four " +\ 10 | "nights at Revel Atlantic City's Ovation Hall to celebrate the resort's opening, her " +\ 11 | "first performances since giving birth to Blue Ivy." 12 | 13 | def test_char_span_to_token_span_handles_easy_cases(self): 14 | # "January 7, 2012" 15 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (3, 18)) 16 | assert token_span == (1, 5) 17 | # "Lenox Hill Hospital" 18 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (91, 110)) 19 | assert token_span == (22, 25) 20 | # "Lenox Hill Hospital in New York." 21 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (91, 123)) 22 | assert token_span == (22, 29) 23 | -------------------------------------------------------------------------------- /tests/data/tokenizers/word_processor_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | from deep_qa.data.tokenizers.word_processor import WordProcessor 4 | from deep_qa.common.params import Params 5 | 6 | class TestWordProcessor: 7 | def test_passes_through_correctly(self): 8 | word_processor = WordProcessor(Params({})) 9 | sentence = "this (sentence) has 'crazy' \"punctuation\"." 10 | tokens = word_processor.get_tokens(sentence) 11 | expected_tokens = ["this", "(", "sentence", ")", "has", "'", "crazy", "'", "\"", 12 | "punctuation", "\"", "."] 13 | assert tokens == expected_tokens 14 | 15 | def test_stems_and_filters_correctly(self): 16 | word_processor = WordProcessor(Params({'word_stemmer': 'porter', 'word_filter': 'stopwords'})) 17 | sentence = "this (sentence) has 'crazy' \"punctuation\"." 18 | expected_tokens = ["sentenc", "ha", "crazi", "punctuat"] 19 | tokens = word_processor.get_tokens(sentence) 20 | assert tokens == expected_tokens 21 | -------------------------------------------------------------------------------- /tests/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/__init__.py -------------------------------------------------------------------------------- /tests/layers/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/attention/__init__.py -------------------------------------------------------------------------------- /tests/layers/attention/masked_softmax_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras import backend as K 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from deep_qa.layers.attention.masked_softmax import MaskedSoftmax 9 | 10 | class TestMaskedSoftmaxLayer: 11 | def test_call_works_with_no_mask(self): 12 | batch_size = 1 13 | num_options = 4 14 | options_input = Input(shape=(num_options,), dtype='float32') 15 | softmax_result = MaskedSoftmax()(options_input) 16 | model = Model(inputs=[options_input], outputs=[softmax_result]) 17 | options_tensor = numpy.asarray([[2, 4, 0, 1]]) 18 | softmax_tensor = model.predict([options_tensor]) 19 | assert softmax_tensor.shape == (batch_size, num_options) 20 | numpy.testing.assert_almost_equal(softmax_tensor, 21 | [[0.112457, 0.830953, 0.015219, 0.041371]], 22 | decimal=5) 23 | 24 | def test_call_handles_higher_order_input(self): 25 | batch_size = 1 26 | length_1 = 5 27 | length_2 = 3 28 | num_options = 4 29 | options_input = Input(shape=(length_1, length_2, num_options,), dtype='float32') 30 | softmax_result = MaskedSoftmax()(options_input) 31 | model = Model(inputs=[options_input], outputs=[softmax_result]) 32 | options_tensor = numpy.zeros((batch_size, length_1, length_2, num_options)) 33 | for i in range(length_1): 34 | for j in range(length_2): 35 | options_tensor[0, i, j] = [2, 4, 0, 1] 36 | softmax_tensor = model.predict([options_tensor]) 37 | assert softmax_tensor.shape == (batch_size, length_1, length_2, num_options) 38 | for i in range(length_1): 39 | for j in range(length_2): 40 | numpy.testing.assert_almost_equal(softmax_tensor[0, i, j], 41 | [0.112457, 0.830953, 0.015219, 0.041371], 42 | decimal=5) 43 | 44 | def test_call_handles_masking_properly(self): 45 | options = K.variable(numpy.asarray([[2, 4, 0, 1]])) 46 | mask = K.variable(numpy.asarray([[1, 0, 1, 1]])) 47 | softmax = K.eval(MaskedSoftmax().call(options, mask=mask)) 48 | assert softmax.shape == (1, 4) 49 | numpy.testing.assert_almost_equal(softmax, [[0.66524096, 0, 0.09003057, 0.24472847]]) 50 | -------------------------------------------------------------------------------- /tests/layers/attentive_gru_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use 2 | import numpy 3 | 4 | from keras.layers import Input, Embedding, merge 5 | from keras.models import Model 6 | import keras.backend as K 7 | 8 | from deep_qa.layers.encoders import AttentiveGru 9 | 10 | 11 | class TestAttentiveGRU: 12 | def test_on_unmasked_input(self): 13 | 14 | sentence_length = 5 15 | embedding_dim = 10 16 | vocabulary_size = 15 17 | input_layer = Input(shape=(sentence_length,), dtype='int32') 18 | attention = Input(shape=(sentence_length,), dtype='float32') 19 | # Embedding does not mask zeros 20 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim) 21 | attentive_gru = AttentiveGru(output_dim=embedding_dim, 22 | input_length=sentence_length, 23 | return_sequences=True, 24 | name='attentive_gru_test') 25 | embedded_input = embedding(input_layer) 26 | concat_mode = lambda layer_outs: K.concatenate([K.expand_dims(layer_outs[0], axis=2), 27 | layer_outs[1]], 28 | axis=2) 29 | 30 | combined_sentence_with_attention = merge([attention, embedded_input], 31 | mode=concat_mode, 32 | output_shape=(5, 11)) 33 | 34 | sequence_of_outputs = attentive_gru(combined_sentence_with_attention) 35 | model = Model(inputs=[input_layer, attention], outputs=sequence_of_outputs) 36 | model.compile(loss="mse", optimizer="sgd") # Will not train this model 37 | test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32') 38 | attention_input = numpy.asarray([[1., 0., 0., 0., 0.]], dtype='float32') 39 | 40 | # To debug this model, we are going to check that if we pass an attention mask into 41 | # the attentive_gru which has all zeros apart from the first element which is one, 42 | # all the elements should be equal to the first output as the state won't change over 43 | # time, as we add in none of the memory. This is not the intended use of this class, 44 | # but if this works, the intended use will be correct. 45 | actual_sequence_of_outputs = numpy.squeeze(model.predict([test_input, attention_input])) 46 | for i in range(sentence_length - 1): 47 | assert numpy.array_equal(actual_sequence_of_outputs[i, :], actual_sequence_of_outputs[i+1, :]) 48 | -------------------------------------------------------------------------------- /tests/layers/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/backend/__init__.py -------------------------------------------------------------------------------- /tests/layers/backend/collapse_and_expand_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from numpy.testing import assert_allclose 5 | from keras.layers import Input, Dense 6 | from keras.models import Model 7 | 8 | from deep_qa.layers.backend import CollapseToBatch, ExpandFromBatch, AddMask 9 | 10 | 11 | class TestCollapseAndExpand: 12 | # We need to test CollapseToBatch and ExpandFromBatch together, because Keras doesn't like it 13 | # if you change the batch size between inputs and outputs. It makes sense to test them 14 | # together, anyway. 15 | def test_collapse_and_expand_works_with_dynamic_shape(self): 16 | batch_size = 3 17 | length1 = 5 18 | length2 = 7 19 | length3 = 2 20 | dense_units = 6 21 | input_layer = Input(shape=(length1, None, length3), dtype='float32') 22 | masked_input = AddMask(mask_value=1)(input_layer) 23 | collapsed_1 = CollapseToBatch(num_to_collapse=1)(masked_input) 24 | collapsed_2 = CollapseToBatch(num_to_collapse=2)(masked_input) 25 | dense = Dense(dense_units)(collapsed_2) 26 | expanded_1 = ExpandFromBatch(num_to_expand=1)([collapsed_1, masked_input]) 27 | expanded_2 = ExpandFromBatch(num_to_expand=2)([collapsed_2, masked_input]) 28 | expanded_dense = ExpandFromBatch(num_to_expand=2)([dense, masked_input]) 29 | model = Model(inputs=input_layer, outputs=[expanded_1, expanded_2, expanded_dense]) 30 | 31 | input_tensor = numpy.random.randint(0, 3, (batch_size, length1, length2, length3)) 32 | expanded_1_tensor, expanded_2_tensor, expanded_dense_tensor = model.predict(input_tensor) 33 | assert expanded_1_tensor.shape == input_tensor.shape 34 | assert expanded_2_tensor.shape == input_tensor.shape 35 | assert expanded_dense_tensor.shape == input_tensor.shape[:-1] + (dense_units,) 36 | assert_allclose(expanded_1_tensor, input_tensor) 37 | assert_allclose(expanded_2_tensor, input_tensor) 38 | -------------------------------------------------------------------------------- /tests/layers/backend/envelope_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.backend import Envelope 8 | 9 | class TestEnvelopeLayer: 10 | def test_call_works_on_simple_input(self): 11 | batch_size = 1 12 | sequence_length = 5 13 | span_begin_input = Input(shape=(sequence_length,), dtype='float32') 14 | span_end_input = Input(shape=(sequence_length,), dtype='float32') 15 | envelope = Envelope()([span_begin_input, span_end_input]) 16 | model = Model(inputs=[span_begin_input, span_end_input], outputs=[envelope]) 17 | span_begin_tensor = numpy.asarray([[0.01, 0.1, 0.8, 0.05, 0.04]]) 18 | span_end_tensor = numpy.asarray([[0.01, 0.04, 0.05, 0.2, 0.7]]) 19 | envelope_tensor = model.predict([span_begin_tensor, span_end_tensor]) 20 | assert envelope_tensor.shape == (batch_size, sequence_length) 21 | expected_envelope = [[0.01 * 0.99, 0.11 * 0.95, 0.91 * 0.9, 0.96 * 0.7, 1.0 * 0.0]] 22 | numpy.testing.assert_almost_equal(envelope_tensor, expected_envelope) 23 | -------------------------------------------------------------------------------- /tests/layers/backend/max_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.backend import Max 8 | 9 | class TestMaxLayer: 10 | def test_call_works_on_simple_input(self): 11 | batch_size = 2 12 | input_length = 5 13 | input_layer = Input(shape=(input_length,), dtype='float32') 14 | max_output = Max()(input_layer) 15 | model = Model(inputs=[input_layer], outputs=[max_output]) 16 | input_tensor = numpy.asarray([[2, 5, 3, 1, -4], [-1, -4, -2, -10, -4]]) 17 | max_tensor = model.predict([input_tensor]) 18 | assert max_tensor.shape == (batch_size,) 19 | numpy.testing.assert_almost_equal(max_tensor, [5, -1]) 20 | -------------------------------------------------------------------------------- /tests/layers/backend/permute_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.backend import Permute 8 | 9 | class TestPermuteLayer: 10 | def test_call_works_on_simple_input(self): 11 | batch_size = 2 12 | input_length_1 = 2 13 | input_length_2 = 1 14 | input_layer = Input(shape=(input_length_1, input_length_2), dtype='float32') 15 | permute_output = Permute(pattern=[0, 2, 1])(input_layer) 16 | model = Model(inputs=[input_layer], outputs=[permute_output]) 17 | input_tensor = numpy.asarray([[[2], [5]], [[-1], [-4]]]) 18 | permute_tensor = model.predict([input_tensor]) 19 | assert permute_tensor.shape == (batch_size, input_length_2, input_length_1) 20 | numpy.testing.assert_almost_equal(permute_tensor, [[[2, 5]], [[-1, -4]]]) 21 | -------------------------------------------------------------------------------- /tests/layers/backend/repeat_like_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.backend import RepeatLike 8 | 9 | class TestRepeatLikeLayer: 10 | def test_call_works_on_simple_input(self): 11 | batch_size = 2 12 | input_length = 3 13 | repetitions = 4 14 | input_layer = Input(shape=(input_length,), dtype='float32') 15 | input_layer_2 = Input(shape=(None,), dtype='float32') 16 | repeat_output = RepeatLike(axis=1, copy_from_axis=1)([input_layer, input_layer_2]) 17 | model = Model(inputs=[input_layer, input_layer_2], outputs=[repeat_output]) 18 | input_tensor = numpy.asarray([[2, 5, 3], [-1, -4, -2]]) 19 | input_tensor_2 = numpy.ones((batch_size, repetitions)) 20 | repeat_tensor = model.predict([input_tensor, input_tensor_2]) 21 | assert repeat_tensor.shape == (batch_size, repetitions, input_length) 22 | for i in range(repetitions): 23 | numpy.testing.assert_almost_equal(repeat_tensor[:, i, :], [[2, 5, 3], [-1, -4, -2]]) 24 | -------------------------------------------------------------------------------- /tests/layers/backend/repeat_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.backend import Repeat 8 | 9 | class TestRepeatLayer: 10 | def test_call_works_on_simple_input(self): 11 | batch_size = 2 12 | input_length = 3 13 | repetitions = 4 14 | input_layer = Input(shape=(input_length,), dtype='float32') 15 | repeat_output = Repeat(axis=1, repetitions=repetitions)(input_layer) 16 | model = Model(inputs=[input_layer], outputs=[repeat_output]) 17 | input_tensor = numpy.asarray([[2, 5, 3], [-1, -4, -2]]) 18 | repeat_tensor = model.predict([input_tensor]) 19 | assert repeat_tensor.shape == (batch_size, repetitions, input_length) 20 | for i in range(repetitions): 21 | numpy.testing.assert_almost_equal(repeat_tensor[:, i, :], [[2, 5, 3], [-1, -4, -2]]) 22 | -------------------------------------------------------------------------------- /tests/layers/backend/replace_masked_values_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from numpy.testing import assert_almost_equal 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from deep_qa.layers.backend import AddMask, ReplaceMaskedValues 9 | 10 | class TestReplaceMaskedValues: 11 | def test_call_works_on_simple_input(self): 12 | input_length = 3 13 | input_layer = Input(shape=(input_length,), dtype='float32') 14 | masked = AddMask(2)(input_layer) 15 | replaced = ReplaceMaskedValues(4)(masked) 16 | model = Model(inputs=[input_layer], outputs=[replaced]) 17 | input_tensor = numpy.asarray([[2, 5, 2], [2, -4, -2]]) 18 | replaced_tensor = model.predict([input_tensor]) 19 | assert_almost_equal(replaced_tensor, numpy.asarray([[4, 5, 4], [4, -4, -2]])) 20 | -------------------------------------------------------------------------------- /tests/layers/bigru_index_selector_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use 2 | import numpy 3 | from numpy.testing import assert_almost_equal 4 | 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from deep_qa.layers import BiGRUIndexSelector 9 | 10 | 11 | class TestBiGRUIndexSelector(): 12 | def test_batched_case(self): 13 | document_length = 5 14 | gru_hidden_dim = 2 15 | target = 8 16 | 17 | word_indices_input = Input(shape=(document_length,), 18 | dtype='int32', 19 | name="word_indices_input") 20 | gru_f_input = Input(shape=(document_length, gru_hidden_dim), 21 | dtype='float32', 22 | name="gru_f_input") 23 | gru_b_input = Input(shape=(document_length, gru_hidden_dim), 24 | dtype='float32', 25 | name="gru_b_input") 26 | index_bigru_output = BiGRUIndexSelector(target)([word_indices_input, 27 | gru_f_input, 28 | gru_b_input]) 29 | model = Model([word_indices_input, 30 | gru_f_input, 31 | gru_b_input], 32 | index_bigru_output) 33 | 34 | document_indices = numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]]) 35 | gru_f_input = numpy.array([[[0.1, 0.5], [0.3, 0.4], [0.4, 0.1], [0.9, 0.2], [0.1, 0.3]], 36 | [[0.4, 0.6], [0.7, 0.1], [0.3, 0.1], [0.9, 0.5], [0.4, 0.7]]]) 37 | gru_b_input = numpy.array([[[0.7, 0.2], [0.9, 0.1], [0.3, 0.8], [0.2, 0.6], [0.7, 0.2]], 38 | [[0.2, 0.1], [0.3, 0.6], [0.2, 0.8], [0.3, 0.6], [0.4, 0.4]]]) 39 | expected_output = numpy.array([[0.9, 0.2, 0.2, 0.6], [0.7, 0.1, 0.3, 0.6]]) 40 | 41 | # Testing the general single-batch case. 42 | result = model.predict([document_indices, gru_f_input, gru_b_input]) 43 | assert_almost_equal(result, expected_output) 44 | -------------------------------------------------------------------------------- /tests/layers/complex_concat_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from flaky import flaky 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from deep_qa.layers import ComplexConcat 9 | 10 | class TestComplexConcatLayer: 11 | def test_call_works_on_simple_input(self): 12 | input_shape = (3, 4, 5, 7) 13 | input_1 = Input(shape=input_shape[1:], dtype='float32') 14 | input_2 = Input(shape=input_shape[1:], dtype='float32') 15 | input_3 = Input(shape=input_shape[1:], dtype='float32') 16 | input_4 = Input(shape=input_shape[1:], dtype='float32') 17 | inputs = [input_1, input_2, input_3, input_4] 18 | concatenated = ComplexConcat(combination='1,2,3,4')(inputs) 19 | model = Model(inputs=inputs, outputs=[concatenated]) 20 | input_1_tensor = numpy.random.rand(*input_shape) 21 | input_2_tensor = numpy.random.rand(*input_shape) 22 | input_3_tensor = numpy.random.rand(*input_shape) 23 | input_4_tensor = numpy.random.rand(*input_shape) 24 | input_tensors = [input_1_tensor, input_2_tensor, input_3_tensor, input_4_tensor] 25 | concat_tensor = model.predict(input_tensors) 26 | assert concat_tensor.shape == (3, 4, 5, 7*4) 27 | numpy.testing.assert_almost_equal(concat_tensor, numpy.concatenate(input_tensors, axis=-1)) 28 | 29 | @flaky 30 | def test_call_handles_complex_combinations(self): 31 | input_shape = (3, 4, 5, 7) 32 | input_1 = Input(shape=input_shape[1:], dtype='float32') 33 | input_2 = Input(shape=input_shape[1:], dtype='float32') 34 | input_3 = Input(shape=input_shape[1:], dtype='float32') 35 | input_4 = Input(shape=input_shape[1:], dtype='float32') 36 | inputs = [input_1, input_2, input_3, input_4] 37 | concatenated = ComplexConcat(combination='1-2,2*4,3/1,4+3,3', axis=1)(inputs) 38 | model = Model(inputs=inputs, outputs=[concatenated]) 39 | input_1_tensor = numpy.random.rand(*input_shape) 40 | input_2_tensor = numpy.random.rand(*input_shape) 41 | input_3_tensor = numpy.random.rand(*input_shape) 42 | input_4_tensor = numpy.random.rand(*input_shape) 43 | input_tensors = [input_1_tensor, input_2_tensor, input_3_tensor, input_4_tensor] 44 | concat_tensor = model.predict(input_tensors) 45 | assert concat_tensor.shape == (3, 4*5, 5, 7) 46 | expected_tensor = numpy.concatenate([ 47 | input_1_tensor - input_2_tensor, 48 | input_2_tensor * input_4_tensor, 49 | input_3_tensor / input_1_tensor, 50 | input_4_tensor + input_3_tensor, 51 | input_3_tensor 52 | ], axis=1) 53 | numpy.testing.assert_almost_equal(concat_tensor, expected_tensor, decimal=3) 54 | -------------------------------------------------------------------------------- /tests/layers/decomposable_attention_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from keras.layers import Input, Embedding 5 | from keras.models import Model 6 | 7 | from deep_qa.layers.entailment_models import DecomposableAttentionEntailment 8 | 9 | class TestDecomposableAttention: 10 | def test_decomposable_attention_does_not_crash(self): 11 | sentence_length = 5 12 | embedding_dim = 10 13 | vocabulary_size = 15 14 | num_sentences = 7 15 | premise_input_layer = Input(shape=(sentence_length,), dtype='int32') 16 | hypothesis_input_layer = Input(shape=(sentence_length,), dtype='int32') 17 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, mask_zero=True) 18 | embedded_premise = embedding(premise_input_layer) 19 | embedded_hypothesis = embedding(hypothesis_input_layer) 20 | entailment_layer = DecomposableAttentionEntailment() 21 | entailment_scores = entailment_layer([embedded_premise, embedded_hypothesis]) 22 | model = Model(inputs=[premise_input_layer, hypothesis_input_layer], outputs=entailment_scores) 23 | premise_input = numpy.random.randint(0, vocabulary_size, (num_sentences, sentence_length)) 24 | hypothesis_input = numpy.random.randint(0, vocabulary_size, (num_sentences, sentence_length)) 25 | model.predict([premise_input, hypothesis_input]) 26 | -------------------------------------------------------------------------------- /tests/layers/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/encoders/__init__.py -------------------------------------------------------------------------------- /tests/layers/noisy_or_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use, invalid-name 2 | import numpy as np 3 | from numpy.testing import assert_array_almost_equal 4 | from deep_qa.layers import NoisyOr, BetweenZeroAndOne 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | from keras import backend as K 7 | from keras.layers import Input 8 | from keras.models import Model 9 | 10 | 11 | class TestNoisyOr(DeepQaTestCase): 12 | def test_general_case(self): 13 | 14 | input_layer = Input(shape=(3, 2,), dtype='float32', name="input") 15 | axis = 2 16 | noisy_or_layer = NoisyOr(axis=axis) 17 | output = noisy_or_layer(input_layer) 18 | model = Model([input_layer], output) 19 | 20 | # Testing general unmasked batched case. 21 | q = K.eval(noisy_or_layer.noise_parameter) 22 | batch_original_data = np.array([[[0.2, 0.1], 23 | [0.5, 0.3], 24 | [0.3, 0.7]], 25 | [[0.4, 0.55], 26 | [0.65, 0.8], 27 | [0.9, 0.15]]]) 28 | batch_result = model.predict([batch_original_data]) 29 | batch_desired_result = 1.0 - np.prod(1.0 - (q * batch_original_data), axis=axis) 30 | assert_array_almost_equal(batch_result, batch_desired_result) 31 | 32 | # Testing the masked case. 33 | # Here's a modified version of the batch_original_data, with extra probabilities. 34 | batch_data_with_masks = K.variable(np.array([[[0.2, 0.1, 0.7], [0.5, 0.3, 0.3], [0.3, 0.7, 0.2]], 35 | [[0.4, 0.55, 0.3], [0.65, 0.8, 0.1], [0.9, 0.15, 0.0]]]), 36 | dtype="float32") 37 | # Now here the added 3rd element is masked out, so the noisy_or probabilities resulting from the 38 | # masked version should be the same as the unmasked one (above). 39 | masks = K.variable(np.array([[[1, 1, 0], [1, 1, 0], [1, 1, 0]], 40 | [[1, 1, 0], [1, 1, 0], [1, 1, 0]]]), dtype="float32") 41 | 42 | masking_results = K.eval(noisy_or_layer.call(inputs=batch_data_with_masks, mask=masks)) 43 | assert_array_almost_equal(batch_result, masking_results) 44 | 45 | def test_between_zero_and_one_constraint(self): 46 | p = K.variable(np.asarray([0.35, -0.4, 1.0, 1.2]), dtype='float32') 47 | desired_result = np.asarray([0.35, K.epsilon(), 1.0, 1.0]) 48 | result = K.eval(BetweenZeroAndOne()(p)) 49 | assert_array_almost_equal(result, desired_result) 50 | -------------------------------------------------------------------------------- /tests/layers/overlap_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use 2 | import numpy 3 | from numpy.testing import assert_almost_equal 4 | import keras.backend as K 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from deep_qa.layers import Overlap 9 | 10 | 11 | class TestOverlap: 12 | def test_batched_case(self): 13 | tensor_a_len = 5 14 | tensor_b_len = 4 15 | 16 | tensor_a_input = Input(shape=(tensor_a_len,), 17 | dtype='int32', 18 | name="tensor_a") 19 | tensor_b_input = Input(shape=(tensor_b_len,), 20 | dtype='int32', 21 | name="tensor_b") 22 | overlap_output = Overlap()([tensor_a_input, 23 | tensor_b_input]) 24 | model = Model([tensor_a_input, 25 | tensor_b_input], 26 | overlap_output) 27 | 28 | tensor_a = numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]]) 29 | tensor_b = numpy.array([[9, 4, 2, 5], [6, 1, 2, 2]]) 30 | expected_output = numpy.array([[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]], 31 | [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]]) 32 | 33 | # Testing the general batched case 34 | result = model.predict([tensor_a, tensor_b]) 35 | assert_almost_equal(result, expected_output) 36 | 37 | def test_masked_batched_case(self): 38 | tensor_a = K.variable(numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]]), 39 | dtype="int32") 40 | tensor_b = K.variable(numpy.array([[9, 4, 2, 5], [6, 1, 2, 2]]), 41 | dtype="int32") 42 | mask_a = K.variable(numpy.array([[1, 1, 1, 0, 0], [1, 1, 1, 1, 0]])) 43 | mask_b = K.variable(numpy.array([[1, 1, 0, 0], [1, 1, 0, 0]])) 44 | expected_output = numpy.array([[[1.0, 0.0], [1.0, 0.0], 45 | [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]], 46 | [[1.0, 0.0], [1.0, 0.0], 47 | [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]]) 48 | 49 | # Testing the masked general batched case 50 | result = K.eval(Overlap()([tensor_a, tensor_b], mask=[mask_a, mask_b])) 51 | assert_almost_equal(result, expected_output) 52 | -------------------------------------------------------------------------------- /tests/layers/test_subtract_minimum.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use 2 | import numpy as np 3 | from numpy.testing import assert_array_almost_equal 4 | from keras.layers import Input 5 | from keras.models import Model 6 | from deep_qa.layers.backend.add_mask import AddMask 7 | from deep_qa.layers.subtract_minimum import SubtractMinimum 8 | from deep_qa.testing.test_case import DeepQaTestCase 9 | 10 | 11 | class TestSubtractMinimum(DeepQaTestCase): 12 | def test_general_case(self): 13 | 14 | input_layer = Input(shape=(4, 3,), dtype='float32', name="input") 15 | subtract_minimum_layer = SubtractMinimum(axis=1) 16 | normalized_input = subtract_minimum_layer(input_layer) 17 | 18 | model = Model([input_layer], normalized_input) 19 | # Testing general unmasked 1D case. 20 | unnormalized_tensor = np.array([[[0.1, 0.1, 0.1], 21 | [0.2, 0.3, 0.4], 22 | [0.5, 0.4, 0.6], 23 | [0.5, 0.4, 0.6]]]) 24 | result = model.predict([unnormalized_tensor]) 25 | 26 | assert_array_almost_equal(result, np.array([[[0.0, 0.0, 0.0], 27 | [0.1, 0.2, 0.3], 28 | [0.4, 0.3, 0.5], 29 | [0.4, 0.3, 0.5]]])) 30 | 31 | # Testing masked batched case. 32 | # By setting the mast value to 0.1. should ignore this value when deciding the minimum 33 | mask_layer = AddMask(mask_value=0.1) 34 | masked_input = mask_layer(input_layer) 35 | normalized_masked_input = subtract_minimum_layer(masked_input) 36 | masking_model = Model([input_layer], normalized_masked_input) 37 | 38 | masked_result = masking_model.predict([unnormalized_tensor]) 39 | 40 | assert_array_almost_equal(masked_result, np.array([[[-0.1, -0.2, -0.3], 41 | [0.0, 0.0, 0.0], 42 | [0.3, 0.1, 0.2], 43 | [0.3, 0.1, 0.2]]])) 44 | -------------------------------------------------------------------------------- /tests/layers/tuple_alignment_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from keras.layers import Embedding, Input 4 | from keras.models import Model 5 | 6 | from deep_qa.layers.entailment_models import MultipleChoiceTupleEntailment 7 | 8 | class TestTupleAlignment: 9 | def test_tuple_alignment_does_not_crash(self): 10 | question_length = 5 11 | num_options = 4 12 | tuple_size = 3 13 | num_tuples = 7 14 | embedding_dim = 10 15 | vocabulary_size = 15 16 | batch_size = 32 17 | question_input_layer = Input(shape=(question_length,), dtype='int32') 18 | answer_input_layer = Input(shape=(num_options,), dtype='int32') 19 | knowledge_input_layer = Input(shape=(num_tuples, tuple_size), dtype='int32') 20 | # Embedding does not mask zeros 21 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, 22 | mask_zero=True) 23 | embedded_question = embedding(question_input_layer) 24 | embedded_answer = embedding(answer_input_layer) 25 | embedded_knowledge = embedding(knowledge_input_layer) 26 | entailment_layer = MultipleChoiceTupleEntailment() 27 | entailment_scores = entailment_layer([embedded_knowledge, embedded_question, embedded_answer]) 28 | model = Model(inputs=[knowledge_input_layer, question_input_layer, answer_input_layer], 29 | outputs=entailment_scores) 30 | model.compile(loss="mse", optimizer="sgd") # Will not train this model 31 | knowledge_input = numpy.random.randint(0, vocabulary_size, (batch_size, num_tuples, tuple_size)) 32 | question_input = numpy.random.randint(0, vocabulary_size, (batch_size, question_length)) 33 | answer_input = numpy.random.randint(0, vocabulary_size, (batch_size, num_options)) 34 | model.predict([knowledge_input, question_input, answer_input]) 35 | -------------------------------------------------------------------------------- /tests/layers/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/wrappers/__init__.py -------------------------------------------------------------------------------- /tests/layers/wrappers/add_encoder_mask_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from deep_qa.layers.encoders import BOWEncoder 4 | from deep_qa.layers.wrappers import AddEncoderMask, OutputMask 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | from deep_qa.training.models import DeepQaModel 7 | from keras.layers import Embedding, Input 8 | 9 | 10 | class TestAddEncoderMask(DeepQaTestCase): 11 | def test_mask_is_computed_correctly(self): 12 | background_input = Input(shape=(None, 3), dtype='int32') 13 | embedding = Embedding(input_dim=3, output_dim=2, mask_zero=True) 14 | embedded_background = embedding(background_input) 15 | encoded_background = BOWEncoder(units=2)(embedded_background) 16 | encoded_background_with_mask = AddEncoderMask()([encoded_background, embedded_background]) 17 | 18 | mask_output = OutputMask()(encoded_background_with_mask) 19 | model = DeepQaModel(inputs=[background_input], outputs=mask_output) 20 | 21 | test_background = numpy.asarray([ 22 | [ 23 | [0, 0, 0], 24 | [2, 2, 2], 25 | [0, 0, 0], 26 | [0, 1, 2], 27 | [1, 0, 0], 28 | [0, 0, 0], 29 | [0, 1, 0], 30 | [1, 1, 1], 31 | ] 32 | ]) 33 | expected_mask = numpy.asarray([[0, 1, 0, 1, 1, 0, 1, 1]]) 34 | actual_mask = model.predict([test_background]) 35 | numpy.testing.assert_array_equal(expected_mask, actual_mask) 36 | -------------------------------------------------------------------------------- /tests/layers/wrappers/encoder_wrapper_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from deep_qa.layers.encoders import BOWEncoder 4 | from deep_qa.layers.wrappers import EncoderWrapper, OutputMask 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | from deep_qa.training.models import DeepQaModel 7 | from keras.layers import Embedding, Input 8 | 9 | 10 | class TestEncoderWrapper(DeepQaTestCase): 11 | def test_mask_is_computed_correctly(self): 12 | background_input = Input(shape=(3, 3), dtype='int32') 13 | embedding = Embedding(input_dim=3, output_dim=2, mask_zero=True) 14 | embedded_background = embedding(background_input) 15 | encoded_background = EncoderWrapper(BOWEncoder(units=2))(embedded_background) 16 | 17 | mask_output = OutputMask()(encoded_background) 18 | model = DeepQaModel(inputs=[background_input], outputs=mask_output) 19 | 20 | test_background = numpy.asarray([ 21 | [ 22 | [0, 0, 0], 23 | [2, 2, 2], 24 | [0, 0, 0], 25 | ] 26 | ]) 27 | expected_mask = numpy.asarray([[0, 1, 0]]) 28 | actual_mask = model.predict([test_background]) 29 | numpy.testing.assert_array_almost_equal(expected_mask, actual_mask) 30 | -------------------------------------------------------------------------------- /tests/layers/wrappers/time_distributed_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from numpy.testing import assert_array_almost_equal 4 | from keras.layers import Input, Lambda 5 | from keras.models import Model 6 | from deep_qa.layers.wrappers import TimeDistributed 7 | from deep_qa.testing.test_case import DeepQaTestCase 8 | 9 | 10 | class TestTimeDistributed(DeepQaTestCase): 11 | def test_handles_multiple_inputs(self): 12 | input_layer_1 = Input(shape=(3, 1), dtype='int32') 13 | input_layer_2 = Input(shape=(3, 1), dtype='int32') 14 | combine_layer = Lambda(lambda x: x[0] ** x[1] + 1, 15 | output_shape=lambda x: (x[0][0], 1), 16 | name="a^b + 1 Layer") 17 | td_combine = TimeDistributed(combine_layer) 18 | output = td_combine([input_layer_1, input_layer_2]) 19 | model = Model([input_layer_1, input_layer_2], output) 20 | 21 | batch_input_1 = numpy.array([[[4], [5], [6]], 22 | [[3], [3], [3]], 23 | [[0], [1], [2]]], dtype='float32') 24 | batch_input_2 = numpy.array([[[3], [2], [1]], 25 | [[1], [2], [3]], 26 | [[1], [0], [2]]], dtype='float32') 27 | 28 | expected_result = (batch_input_1 ** batch_input_2 + 1) 29 | # In TimeDistributed, we reshape tensors whose final dimension is 1, so we need to do that here. 30 | if numpy.shape(expected_result)[-1] == 1: 31 | expected_result = numpy.reshape(expected_result, numpy.shape(expected_result)[:-1]) 32 | result = model.predict([batch_input_1, batch_input_2]) 33 | assert_array_almost_equal(result, expected_result) 34 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/entailment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/entailment/__init__.py -------------------------------------------------------------------------------- /tests/models/entailment/decomposable_attention_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | from deep_qa.common.params import Params 4 | from deep_qa.models.entailment import DecomposableAttention 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestDecomposableAttentionModel(DeepQaTestCase): 9 | def test_trains_and_loads_correctly(self): 10 | self.write_snli_files() 11 | args = Params({ 12 | 'num_seq2seq_layers': 1, 13 | }) 14 | self.ensure_model_trains_and_loads(DecomposableAttention, args) 15 | -------------------------------------------------------------------------------- /tests/models/reading_comprehension/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/reading_comprehension/__init__.py -------------------------------------------------------------------------------- /tests/models/reading_comprehension/attention_sum_reader_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | from deep_qa.common.params import Params 4 | from deep_qa.models.reading_comprehension import AttentionSumReader 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestAttentionSumReader(DeepQaTestCase): 9 | def test_train_does_not_crash_and_load_works(self): 10 | self.write_who_did_what_files() 11 | args = Params({ 12 | "encoder": { 13 | "default": { 14 | "type": "bi_gru", 15 | "units": 7 16 | } 17 | }, 18 | "seq2seq_encoder": { 19 | "default": { 20 | "type": "bi_gru", 21 | "encoder_params": { 22 | "units": 7 23 | }, 24 | "wrapper_params": {} 25 | } 26 | }, 27 | }) 28 | self.ensure_model_trains_and_loads(AttentionSumReader, args) 29 | -------------------------------------------------------------------------------- /tests/models/reading_comprehension/bidirectional_attention_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from deep_qa.common.params import Params 4 | from deep_qa.models.reading_comprehension import BidirectionalAttentionFlow 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | from flaky import flaky 7 | 8 | 9 | class TestBidirectionalAttentionFlow(DeepQaTestCase): 10 | @flaky 11 | def test_trains_and_loads_correctly(self): 12 | self.write_span_prediction_files() 13 | args = Params({ 14 | 'embeddings': {'words': {'dimension': 8}, 'characters': {'dimension': 4}}, 15 | 'save_models': True, 16 | 'tokenizer': {'type': 'words and characters'}, 17 | 'show_summary_with_masking_info': True, 18 | }) 19 | model, _ = self.ensure_model_trains_and_loads(BidirectionalAttentionFlow, args) 20 | for layer in model.model.layers: 21 | if layer.name == 'characters_embedding': 22 | assert layer.get_output_shape_at(0)[-1] == 4 23 | break 24 | else: 25 | assert False, "couldn't find character embedding layer" 26 | 27 | def test_get_best_span(self): 28 | # Note that the best span cannot be (1, 0) since even though 0.3 * 0.5 is the greatest 29 | # value, the end span index is constrained to occur after the begin span index. 30 | span_begin_probs = numpy.array([0.1, 0.3, 0.05, 0.3, 0.25]) 31 | span_end_probs = numpy.array([0.5, 0.1, 0.2, 0.05, 0.15]) 32 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs, 33 | span_end_probs) 34 | assert begin_end_idxs == (1, 2) 35 | 36 | # Testing an edge case of the dynamic program here, for the order of when you update the 37 | # best previous span position. We should not get (1, 1), because that's an empty span. 38 | span_begin_probs = numpy.array([0.4, 0.5, 0.1]) 39 | span_end_probs = numpy.array([0.3, 0.6, 0.1]) 40 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs, 41 | span_end_probs) 42 | assert begin_end_idxs == (0, 1) 43 | 44 | # test higher-order input 45 | # Note that the best span cannot be (1, 1) since even though 0.3 * 0.5 is the greatest 46 | # value, the end span index is constrained to occur after the begin span index. 47 | span_begin_probs = numpy.array([[0.1, 0.3, 0.05, 0.3, 0.25]]) 48 | span_end_probs = numpy.array([[0.1, 0.5, 0.2, 0.05, 0.15]]) 49 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs, 50 | span_end_probs) 51 | assert begin_end_idxs == (1, 2) 52 | -------------------------------------------------------------------------------- /tests/models/sequence_tagging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/sequence_tagging/__init__.py -------------------------------------------------------------------------------- /tests/models/sequence_tagging/simple_tagger_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from deep_qa.common.params import Params 4 | from deep_qa.models.sequence_tagging import SimpleTagger 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | 7 | 8 | class TestSimpleTagger(DeepQaTestCase): 9 | def test_trains_and_loads_correctly(self): 10 | self.write_sequence_tagging_files() 11 | args = Params({ 12 | 'save_models': True, 13 | 'show_summary_with_masking_info': True, 14 | 'instance_type': 'PreTokenizedTaggingInstance', 15 | 'tokenizer': {'processor': {'word_splitter': 'no_op'}}, 16 | }) 17 | self.ensure_model_trains_and_loads(SimpleTagger, args) 18 | 19 | def test_loss_function_uses_mask(self): 20 | # We're going to make sure that the loss and accuracy computations are the same for any 21 | # permutation of labels on padded tokens. If not, the loss/accuracy function is paying 22 | # attention to the labels when it shouldn't be. We're not going to test for any particular 23 | # accuracy value, just that all of them are the same - I ran this a few times by hand to be 24 | # sure that we're getting different accuracy values, depending on the initialization. 25 | self.write_sequence_tagging_files() 26 | args = Params({ 27 | 'show_summary_with_masking_info': True, 28 | 'instance_type': 'PreTokenizedTaggingInstance', 29 | 'tokenizer': {'processor': {'word_splitter': 'no_op'}}, 30 | }) 31 | model = self.get_model(SimpleTagger, args) 32 | model.train() 33 | 34 | input_indices = [3, 2, 0, 0] 35 | labels = [[[0, 1], [1, 0], [1, 0], [1, 0]], 36 | [[0, 1], [1, 0], [1, 0], [0, 1]], 37 | [[0, 1], [1, 0], [0, 1], [1, 0]], 38 | [[0, 1], [1, 0], [0, 1], [0, 1]]] 39 | results = [model.model.evaluate(numpy.asarray([input_indices]), numpy.asarray([label])) 40 | for label in labels] 41 | loss, accuracy = zip(*results) 42 | assert len(set(loss)) == 1 43 | assert len(set(accuracy)) == 1 44 | -------------------------------------------------------------------------------- /tests/run_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name,no-self-use 2 | import json 3 | import os 4 | 5 | import numpy 6 | from numpy.testing import assert_almost_equal 7 | from deep_qa.run import compute_accuracy 8 | from deep_qa.run import run_model_from_file, load_model, evaluate_model 9 | from deep_qa.run import score_dataset, score_dataset_with_ensemble 10 | from deep_qa.testing.test_case import DeepQaTestCase 11 | 12 | 13 | class TestRun(DeepQaTestCase): 14 | # Our point here is mostly just to make sure the scripts don't crash. 15 | def setUp(self): 16 | super(TestRun, self).setUp() 17 | self.write_true_false_model_files() 18 | model_params = self.get_model_params({"model_class": "ClassificationModel", 19 | 'save_models': True}) 20 | self.param_path = os.path.join(self.TEST_DIR, "params.json") 21 | with open(self.param_path, "w") as file_path: 22 | json.dump(model_params.as_dict(), file_path) 23 | 24 | def test_run_model_does_not_crash(self): 25 | run_model_from_file(self.param_path) 26 | 27 | def test_load_model_does_not_crash(self): 28 | run_model_from_file(self.param_path) 29 | loaded_model = load_model(self.param_path) 30 | assert loaded_model.can_train() 31 | 32 | def test_score_dataset_does_not_crash(self): 33 | run_model_from_file(self.param_path) 34 | score_dataset(self.param_path, [self.TEST_FILE]) 35 | 36 | def test_evalaute_model_does_not_crash(self): 37 | run_model_from_file(self.param_path) 38 | evaluate_model(self.param_path, [self.TEST_FILE]) 39 | 40 | def test_score_dataset_with_ensemble_gives_same_predictions_as_score_dataset(self): 41 | # We're just going to test something simple here: that the methods don't crash, and that we 42 | # get the same result with an ensemble of one model that we do with `score_dataset`. 43 | run_model_from_file(self.param_path) 44 | predictions, _ = score_dataset(self.param_path, [self.TEST_FILE]) 45 | ensembled_predictions, _ = score_dataset_with_ensemble([self.param_path], [self.TEST_FILE]) 46 | assert_almost_equal(predictions, ensembled_predictions) 47 | 48 | def test_compute_accuracy_computes_a_correct_metric(self): 49 | predictions = numpy.asarray([[.5, .5, .6], [.1, .4, .0]]) 50 | labels = numpy.asarray([[1, 0, 0], [0, 1, 0]]) 51 | assert compute_accuracy(predictions, labels) == .5 52 | -------------------------------------------------------------------------------- /tests/tensors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/tensors/__init__.py -------------------------------------------------------------------------------- /tests/tensors/backend_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | import numpy 3 | from deep_qa.tensors.backend import hardmax 4 | from deep_qa.testing.test_case import DeepQaTestCase 5 | from keras import backend as K 6 | 7 | 8 | class TestBackendTensorFunctions(DeepQaTestCase): 9 | def test_hardmax(self): 10 | batch_size = 3 11 | knowledge_length = 5 12 | unnormalized_attention = K.variable(numpy.random.rand(batch_size, knowledge_length)) 13 | hardmax_output = hardmax(unnormalized_attention, knowledge_length) 14 | input_value = K.eval(unnormalized_attention) 15 | output_value = K.eval(hardmax_output) 16 | assert output_value.shape == (batch_size, knowledge_length) # pylint: disable=no-member 17 | # Assert all elements other than the ones are zeros 18 | assert numpy.count_nonzero(output_value) == batch_size 19 | # Assert the max values in all rows are ones 20 | assert numpy.all(numpy.equal(numpy.max(output_value, axis=1), 21 | numpy.ones((batch_size,)))) 22 | # Assert ones are in the right places 23 | assert numpy.all(numpy.equal(numpy.argmax(output_value, axis=1), 24 | numpy.argmax(input_value, axis=1))) 25 | -------------------------------------------------------------------------------- /tests/tensors/similarity_functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/tensors/similarity_functions/__init__.py -------------------------------------------------------------------------------- /tests/tensors/similarity_functions/bilinear_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from numpy.testing import assert_almost_equal 5 | import keras.backend as K 6 | 7 | from deep_qa.tensors.similarity_functions.bilinear import Bilinear 8 | 9 | class TestBilinearSimilarityFunction: 10 | def test_initialize_weights_returns_correct_weight_sizes(self): 11 | bilinear = Bilinear(name='bilinear') 12 | weights = bilinear.initialize_weights(3, 3) 13 | assert isinstance(weights, list) and len(weights) == 2 14 | weight_vector, bias = weights 15 | assert K.int_shape(weight_vector) == (3, 3) 16 | assert K.int_shape(bias) == (1,) 17 | 18 | weights = bilinear.initialize_weights(2, 5) 19 | assert isinstance(weights, list) and len(weights) == 2 20 | weight_vector, bias = weights 21 | assert K.int_shape(weight_vector) == (2, 5) 22 | assert K.int_shape(bias) == (1,) 23 | 24 | def test_compute_similarity_does_a_bilinear_product(self): 25 | bilinear = Bilinear(name='bilinear') 26 | weights = numpy.asarray([[-.3, .5], [2.0, -1.0]]) 27 | bilinear.weight_matrix = K.variable(weights) 28 | bilinear.bias = K.variable(numpy.asarray([.1])) 29 | a_vectors = numpy.asarray([[1, 1], [-1, -1]]) 30 | b_vectors = numpy.asarray([[1, 0], [0, 1]]) 31 | result = K.eval(bilinear.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 32 | assert result.shape == (2,) 33 | assert_almost_equal(result, [1.8, .6]) 34 | 35 | def test_compute_similarity_works_with_higher_order_tensors(self): 36 | bilinear = Bilinear(name='bilinear') 37 | weights = numpy.random.rand(4, 7) 38 | bilinear.weight_matrix = K.variable(weights) 39 | bilinear.bias = K.variable(numpy.asarray([0])) 40 | a_vectors = numpy.random.rand(5, 4, 3, 6, 4) 41 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7) 42 | result = K.eval(bilinear.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 43 | assert result.shape == (5, 4, 3, 6) 44 | expected_result = numpy.dot(numpy.dot(numpy.transpose(a_vectors[3, 2, 1, 3]), weights), 45 | b_vectors[3, 2, 1, 3]) 46 | assert_almost_equal(result[3, 2, 1, 3], expected_result, decimal=5) 47 | -------------------------------------------------------------------------------- /tests/tensors/similarity_functions/cosine_similarity_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from numpy.testing import assert_almost_equal 5 | import keras.backend as K 6 | 7 | from deep_qa.tensors.similarity_functions.cosine_similarity import CosineSimilarity 8 | from deep_qa.tensors.similarity_functions.dot_product import DotProduct 9 | 10 | class TestCosineSimilarityFunction: 11 | cosine_similarity = CosineSimilarity(name='cosine_similarity') 12 | dot_product = DotProduct(name="dot_product") 13 | 14 | def test_initialize_weights_returns_empty(self): 15 | weights = self.cosine_similarity.initialize_weights(3, 3) 16 | assert isinstance(weights, list) and len(weights) == 0 17 | 18 | def test_compute_similarity_does_a_cosine_similarity(self): 19 | a_vectors = numpy.asarray([[numpy.random.random(3) for _ in range(2)]], dtype="float32") 20 | b_vectors = numpy.asarray([[numpy.random.random(3) for _ in range(2)]], dtype="float32") 21 | normed_a = K.l2_normalize(K.variable(a_vectors), axis=-1) 22 | normed_b = K.l2_normalize(K.variable(b_vectors), axis=-1) 23 | desired_result = K.eval(self.dot_product.compute_similarity(normed_a, normed_b)) 24 | result = K.eval(self.cosine_similarity.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 25 | assert result.shape == (1, 2) # batch_size = 1 26 | assert numpy.all(result == desired_result) 27 | 28 | def test_compute_similarity_works_with_higher_order_tensors(self): 29 | a_vectors = numpy.random.rand(5, 4, 3, 6, 7) 30 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7) 31 | normed_a = K.eval(K.l2_normalize(K.variable(a_vectors), axis=-1)) 32 | normed_b = K.eval(K.l2_normalize(K.variable(b_vectors), axis=-1)) 33 | result = K.eval(self.cosine_similarity.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 34 | assert result.shape == (5, 4, 3, 6) 35 | assert_almost_equal(result[3, 2, 1, 3], 36 | numpy.dot(normed_a[3, 2, 1, 3], normed_b[3, 2, 1, 3]), 37 | decimal=6) 38 | -------------------------------------------------------------------------------- /tests/tensors/similarity_functions/dot_product_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | 3 | import numpy 4 | from numpy.testing import assert_almost_equal 5 | import keras.backend as K 6 | 7 | from deep_qa.tensors.similarity_functions.dot_product import DotProduct 8 | 9 | class TestDotProductSimilarityFunction: 10 | dot_product = DotProduct(name='dot_product') 11 | def test_initialize_weights_returns_empty(self): 12 | weights = self.dot_product.initialize_weights(3, 3) 13 | assert isinstance(weights, list) and len(weights) == 0 14 | 15 | def test_compute_similarity_does_a_dot_product(self): 16 | a_vectors = numpy.asarray([[1, 1, 1], [-1, -1, -1]]) 17 | b_vectors = numpy.asarray([[1, 0, 1], [1, 0, 0]]) 18 | result = K.eval(self.dot_product.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 19 | assert result.shape == (2,) 20 | assert numpy.all(result == [2, -1]) 21 | 22 | def test_compute_similarity_works_with_higher_order_tensors(self): 23 | a_vectors = numpy.random.rand(5, 4, 3, 6, 7) 24 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7) 25 | result = K.eval(self.dot_product.compute_similarity(K.variable(a_vectors), K.variable(b_vectors))) 26 | assert result.shape == (5, 4, 3, 6) 27 | assert_almost_equal(result[3, 2, 1, 3], 28 | numpy.dot(a_vectors[3, 2, 1, 3], b_vectors[3, 2, 1, 3]), 29 | decimal=6) 30 | -------------------------------------------------------------------------------- /tests/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/training/__init__.py -------------------------------------------------------------------------------- /tests/training/losses_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name,no-self-use 2 | import numpy 3 | from numpy.testing import assert_almost_equal 4 | from keras import backend as K 5 | from deep_qa.testing.test_case import DeepQaTestCase 6 | from deep_qa.training.losses import ranking_loss, ranking_loss_with_margin 7 | 8 | 9 | class TestLosses(DeepQaTestCase): 10 | def test_ranking_loss_is_computed_correctly(self): 11 | predictions = numpy.asarray([[.1, .4, .8], [-.1, -.2, .1]]) 12 | labels = numpy.asarray([[0, 0, 1], [1, 0, 0]]) 13 | sigmoid = lambda x: 1.0 / (1.0 + numpy.exp(-x)) 14 | expected_result = numpy.mean(-sigmoid(numpy.asarray([.8 - .4, -.1 - .1]))) 15 | result = K.eval(ranking_loss(K.variable(predictions), K.variable(labels))) 16 | assert_almost_equal(expected_result, result) 17 | 18 | def test_ranking_loss_with_margin_is_computed_correctly(self): 19 | predictions = numpy.asarray([[.1, .4, .8], [-.1, -.2, .1]]) 20 | labels = numpy.asarray([[0, 0, 1], [1, 0, 0]]) 21 | expected_result = numpy.mean(numpy.maximum(0, numpy.asarray([1 + .4 - .8, 1 + .1 - -.1]))) 22 | result = K.eval(ranking_loss_with_margin(K.variable(predictions), K.variable(labels))) 23 | assert_almost_equal(expected_result, result) 24 | -------------------------------------------------------------------------------- /tests/training/multi_gpu_test.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=no-self-use,invalid-name 2 | from copy import deepcopy 3 | 4 | import keras.backend as K 5 | from deep_qa.common.params import Params 6 | from deep_qa.models.text_classification import ClassificationModel 7 | from deep_qa.testing.test_case import DeepQaTestCase 8 | 9 | 10 | class TestMultiGpu(DeepQaTestCase): 11 | 12 | def setUp(self): 13 | super(TestMultiGpu, self).setUp() 14 | self.write_true_false_model_files() 15 | self.args = Params({ 16 | 'num_gpus': 2, 17 | }) 18 | 19 | def test_model_can_train_and_load(self): 20 | self.ensure_model_trains_and_loads(ClassificationModel, self.args) 21 | 22 | def test_model_can_train_and_load_with_generator(self): 23 | args = self.args 24 | 25 | args["data_generator"] = {"dynamic_batching": True, "padding_noise": 0.4} 26 | self.ensure_model_trains_and_loads(ClassificationModel, args) 27 | 28 | def test_variables_live_on_cpu(self): 29 | model = self.get_model(ClassificationModel, self.args) 30 | model.train() 31 | 32 | trainable_variables = model.model.trainable_weights 33 | for variable in trainable_variables: 34 | # This is an odd quirk of tensorflow - the devices are actually named 35 | # slightly differently from their scopes ... (i.e != "/cpu:0") 36 | assert variable.device == "/cpu:0" or variable.device == "" 37 | 38 | def test_multi_gpu_shares_variables(self): 39 | multi_gpu_model = self.get_model(ClassificationModel, self.args) 40 | 41 | single_gpu_args = deepcopy(self.args) 42 | single_gpu_args["num_gpus"] = 1 43 | single_gpu_model = self.get_model(ClassificationModel, single_gpu_args) 44 | 45 | multi_gpu_model.train() 46 | multi_gpu_variables = [x.name for x in multi_gpu_model.model.trainable_weights] 47 | 48 | K.clear_session() 49 | single_gpu_model.train() 50 | single_gpu_variables = ["tower_0/" + x.name for x in single_gpu_model.model.trainable_weights] 51 | 52 | assert single_gpu_variables == multi_gpu_variables 53 | --------------------------------------------------------------------------------