├── .gitignore
├── .pylintrc
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── build_tools
├── circle
│ ├── build_doc.sh
│ └── install_doc_requirements.sh
└── travis
│ ├── after_success.sh
│ ├── install.sh
│ └── test_script.sh
├── circle.yml
├── codecov.yml
├── deep_qa
├── README.md
├── __init__.py
├── common
│ ├── __init__.py
│ ├── checks.py
│ ├── models.py
│ ├── params.py
│ ├── tee_logger.py
│ └── util.py
├── data
│ ├── README.md
│ ├── __init__.py
│ ├── data_generator.py
│ ├── data_indexer.py
│ ├── dataset_readers
│ │ ├── __init__.py
│ │ └── squad_sentence_selection_reader.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── dataset.py
│ │ ├── entailment
│ │ │ ├── __init__.py
│ │ │ └── snli_dataset.py
│ │ └── language_modeling
│ │ │ ├── __init__.py
│ │ │ └── language_modeling_dataset.py
│ ├── embeddings.py
│ ├── instances
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── entailment
│ │ │ ├── __init__.py
│ │ │ ├── sentence_pair_instance.py
│ │ │ └── snli_instance.py
│ │ ├── instance.py
│ │ ├── language_modeling
│ │ │ ├── __init__.py
│ │ │ └── sentence_instance.py
│ │ ├── reading_comprehension
│ │ │ ├── __init__.py
│ │ │ ├── character_span_instance.py
│ │ │ ├── mc_question_passage_instance.py
│ │ │ └── question_passage_instance.py
│ │ ├── sequence_tagging
│ │ │ ├── __init__.py
│ │ │ ├── pretokenized_tagging_instance.py
│ │ │ └── tagging_instance.py
│ │ └── text_classification
│ │ │ ├── __init__.py
│ │ │ └── text_classification_instance.py
│ └── tokenizers
│ │ ├── __init__.py
│ │ ├── character_tokenizer.py
│ │ ├── tokenizer.py
│ │ ├── word_and_character_tokenizer.py
│ │ ├── word_filter.py
│ │ ├── word_processor.py
│ │ ├── word_splitter.py
│ │ ├── word_stemmer.py
│ │ └── word_tokenizer.py
├── layers
│ ├── README.md
│ ├── __init__.py
│ ├── additive.py
│ ├── attention
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── gated_attention.py
│ │ ├── masked_softmax.py
│ │ ├── matrix_attention.py
│ │ ├── max_similarity_softmax.py
│ │ └── weighted_sum.py
│ ├── backend
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── add_mask.py
│ │ ├── batch_dot.py
│ │ ├── collapse_to_batch.py
│ │ ├── envelope.py
│ │ ├── expand_from_batch.py
│ │ ├── max.py
│ │ ├── multiply.py
│ │ ├── permute.py
│ │ ├── repeat.py
│ │ ├── repeat_like.py
│ │ ├── replace_masked_values.py
│ │ └── squeeze.py
│ ├── bigru_index_selector.py
│ ├── complex_concat.py
│ ├── encoders
│ │ ├── __init__.py
│ │ ├── attentive_gru.py
│ │ ├── bag_of_words.py
│ │ ├── convolutional_encoder.py
│ │ ├── positional_encoder.py
│ │ └── shareable_gru.py
│ ├── entailment_models
│ │ ├── __init__.py
│ │ ├── decomposable_attention.py
│ │ ├── multiple_choice_tuple_entailment.py
│ │ └── word_alignment.py
│ ├── highway.py
│ ├── l1_normalize.py
│ ├── masked_layer.py
│ ├── noisy_or.py
│ ├── option_attention_sum.py
│ ├── overlap.py
│ ├── subtract_minimum.py
│ ├── vector_matrix_merge.py
│ ├── vector_matrix_split.py
│ └── wrappers
│ │ ├── __init__.py
│ │ ├── add_encoder_mask.py
│ │ ├── encoder_wrapper.py
│ │ ├── output_mask.py
│ │ └── time_distributed.py
├── models
│ ├── README.md
│ ├── __init__.py
│ ├── entailment
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── decomposable_attention.py
│ ├── reading_comprehension
│ │ ├── __init__.py
│ │ ├── attention_sum_reader.py
│ │ ├── bidirectional_attention.py
│ │ └── gated_attention_reader.py
│ ├── sequence_tagging
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── simple_tagger.py
│ └── text_classification
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── classification_model.py
├── run.py
├── tensors
│ ├── README.md
│ ├── __init__.py
│ ├── backend.py
│ ├── masked_operations.py
│ └── similarity_functions
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── bilinear.py
│ │ ├── cosine_similarity.py
│ │ ├── dot_product.py
│ │ ├── linear.py
│ │ └── similarity_function.py
├── testing
│ ├── __init__.py
│ └── test_case.py
└── training
│ ├── README.md
│ ├── __init__.py
│ ├── losses.py
│ ├── models.py
│ ├── multi_gpu.py
│ ├── optimizers.py
│ ├── step.py
│ ├── text_trainer.py
│ ├── train_utils.py
│ └── trainer.py
├── doc
├── Makefile
├── _static
│ └── custom.css
├── _templates
│ └── layout.html
├── common
│ ├── about_common.rst
│ ├── checks.rst
│ └── params.rst
├── conf.py
├── data
│ ├── about_data.rst
│ ├── data_generator.rst
│ ├── datasets.rst
│ ├── entailment.rst
│ ├── general_data_utils.rst
│ ├── instances.rst
│ ├── reading_comprehension.rst
│ ├── sequence_tagging.rst
│ ├── text_classification.rst
│ └── tokenizers.rst
├── img
│ └── module_breakdown.png
├── index.rst
├── layers
│ ├── about_layers.rst
│ ├── attention.rst
│ ├── backend.rst
│ ├── core_layers.rst
│ ├── encoders.rst
│ ├── entailment_models.rst
│ └── wrappers.rst
├── models
│ ├── about_models.rst
│ ├── entailment.rst
│ ├── reading_comprehension.rst
│ └── text_classification.rst
├── run.rst
├── tensors
│ ├── about_tensors.rst
│ ├── core_tensors.rst
│ └── similarity_functions.rst
└── training
│ ├── about_trainers.rst
│ ├── misc.rst
│ ├── multi_gpu.rst
│ ├── text_trainer.rst
│ └── trainer.rst
├── example_experiments
├── entailment
│ └── snli_decomposable_attention.json
├── reading_comprehension
│ ├── asreader_who_did_what.json
│ ├── bidaf_squad.json
│ └── gareader_who_did_what.json
└── sequence_tagging
│ └── simple_tagger.json
├── pytest.ini
├── requirements.txt
├── scripts
├── clean_newsqa.py
├── clean_raw_omnibus.py
├── install_requirements.sh
├── pylint.sh
├── run_ensemble.py
├── run_model.py
└── set_processor.sh
├── setup.cfg
├── setup.py
└── tests
├── __init__.py
├── common
├── __init__.py
├── pythonhashseed_test.py
└── test_util.py
├── data
├── __init__.py
├── data_generator_test.py
├── data_indexer_test.py
├── dataset_readers
│ ├── __init__.py
│ └── squad_sentence_selection_reader_test.py
├── datasets
│ ├── __init__.py
│ ├── dataset_test.py
│ ├── language_modeling_dataset_test.py
│ └── snli_dataset_test.py
├── embeddings_test.py
├── instances
│ ├── __init__.py
│ ├── entailment
│ │ ├── __init__.py
│ │ ├── sentence_pair_instance_test.py
│ │ └── snli_instance_test.py
│ ├── language_modeling
│ │ ├── __init__.py
│ │ └── sentence_instance_test.py
│ ├── reading_comprehension
│ │ ├── __init__.py
│ │ ├── character_span_instance_test.py
│ │ └── mc_question_passage_instance_test.py
│ ├── sequence_tagging
│ │ ├── __init__.py
│ │ ├── pretokenized_tagging_instance_test.py
│ │ └── test_tagging_instance.py
│ ├── text_classification
│ │ ├── __init__.py
│ │ └── text_classification_instance_test.py
│ └── text_instance_test.py
└── tokenizers
│ ├── tokenizer_test.py
│ ├── word_processor_test.py
│ └── word_splitter_test.py
├── example_experiments_test.py
├── layers
├── __init__.py
├── attention
│ ├── __init__.py
│ ├── attention_test.py
│ ├── gated_attention_test.py
│ ├── masked_softmax_test.py
│ ├── matrix_attention_test.py
│ └── weighted_sum_test.py
├── attentive_gru_test.py
├── backend
│ ├── __init__.py
│ ├── batch_dot_test.py
│ ├── collapse_and_expand_test.py
│ ├── envelope_test.py
│ ├── max_test.py
│ ├── multiply_test.py
│ ├── permute_test.py
│ ├── repeat_like_test.py
│ ├── repeat_test.py
│ └── replace_masked_values_test.py
├── bigru_index_selector_test.py
├── complex_concat_test.py
├── decomposable_attention_test.py
├── encoders
│ ├── __init__.py
│ └── bow_encoder_test.py
├── noisy_or_test.py
├── overlap_test.py
├── positional_encoder_test.py
├── test_l1_normalize.py
├── test_option_attention_sum.py
├── test_subtract_minimum.py
├── tuple_alignment_test.py
├── vector_matrix_merge_test.py
├── vector_matrix_split_test.py
└── wrappers
│ ├── __init__.py
│ ├── add_encoder_mask_test.py
│ ├── encoder_wrapper_test.py
│ └── time_distributed_test.py
├── models
├── __init__.py
├── entailment
│ ├── __init__.py
│ └── decomposable_attention_test.py
├── reading_comprehension
│ ├── __init__.py
│ ├── attention_sum_reader_test.py
│ ├── bidirectional_attention_test.py
│ └── gated_attention_reader_test.py
└── sequence_tagging
│ ├── __init__.py
│ └── simple_tagger_test.py
├── run_test.py
├── tensors
├── __init__.py
├── backend_test.py
├── masked_operations_test.py
└── similarity_functions
│ ├── __init__.py
│ ├── bilinear_test.py
│ ├── cosine_similarity_test.py
│ ├── dot_product_test.py
│ └── linear_test.py
└── training
├── __init__.py
├── losses_test.py
├── multi_gpu_test.py
├── text_trainer_test.py
└── train_utils_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | /data/
3 | /models/
4 | /experiments/
5 | .cache/
6 | .coverage
7 | _build
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # use new container-based travis workers
2 | sudo: false
3 | dist: trusty
4 |
5 | language: python
6 |
7 | cache:
8 | directories:
9 | - /home/travis/nltk_data
10 | - /home/travis/download
11 | - /home/travis/miniconda3
12 | - /home/travis/.cache/pip
13 |
14 | env:
15 | matrix:
16 | # run tests, submit coverage info
17 | - COVERAGE="true"
18 | # pylint checks skip the tests
19 | - RUN_PYLINT="true" SKIP_TESTS="true"
20 |
21 | notifications:
22 | email: false
23 |
24 | install: source build_tools/travis/install.sh
25 | script: bash build_tools/travis/test_script.sh
26 | after_success: source build_tools/travis/after_success.sh
27 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include requirements.txt
3 | include scripts/*
4 | include example_experiments/*.json
5 | recursive-exclude tests *
6 |
--------------------------------------------------------------------------------
/build_tools/circle/build_doc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -x
3 | set -e
4 |
5 | MAKE_TARGET=html-strict
6 |
7 | source activate testenv
8 |
9 | # The pipefail is requested to propagate exit code
10 | set -o pipefail && cd doc && make $MAKE_TARGET 2>&1 | tee ~/log.txt
11 |
12 | echo "Finished building docs."
13 | echo "Artifacts in $CIRCLE_ARTIFACTS"
14 |
--------------------------------------------------------------------------------
/build_tools/circle/install_doc_requirements.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -x
3 | set -e
4 |
5 | # Installing required system packages to support the rendering of math
6 | # notation in the HTML documentation
7 | rm -rf ~/.pyenv && rm -rf ~/virtualenvs
8 | sudo -E apt-get -yq remove texlive-binaries --purge
9 | sudo -E apt-get -yq update
10 | sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes \
11 | install dvipng texlive-latex-base texlive-latex-extra \
12 | texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended
13 |
14 | # deactivate circleci virtualenv and setup a miniconda env instead
15 | if [[ `type -t deactivate` ]]; then
16 | deactivate
17 | fi
18 |
19 | # Install dependencies with miniconda
20 | pushd .
21 | cd
22 | mkdir -p download
23 | cd download
24 | echo "Cached in $HOME/download :"
25 | ls -l
26 | if [[ ! -f miniconda.sh ]]
27 | then
28 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh \
29 | -O miniconda.sh
30 | fi
31 | chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
32 | cd ..
33 | export PATH="$MINICONDA_PATH/bin:$PATH"
34 | conda update --yes --quiet conda
35 | popd
36 |
37 | # Configure the conda environment and put it in the path using the
38 | # provided versions.
39 | conda create -n $CONDA_ENV_NAME --yes --quiet python=3.5.2
40 | source activate testenv
41 |
42 | # Install pip dependencies.
43 | pip install --quiet -r requirements.txt
44 |
--------------------------------------------------------------------------------
/build_tools/travis/after_success.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called by the "after_success" step defined in
3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
4 |
5 | set -e
6 |
7 | if [[ "$COVERAGE" == "true" ]]; then
8 | # Ignore codecov failures as the codecov server is not
9 | # very reliable but we don't want travis to report a failure
10 | # in the github UI just because the coverage report failed to
11 | # be published.
12 | codecov || echo "codecov upload failed"
13 | fi
14 |
--------------------------------------------------------------------------------
/build_tools/travis/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | echo 'List files from cached directories'
5 | echo 'pip:'
6 | ls $HOME/.cache/pip
7 |
8 | # Deactivate the travis-provided virtual environment and setup a
9 | # conda-based environment instead
10 | deactivate
11 |
12 | # Add the miniconda bin directory to $PATH
13 | export PATH=/home/travis/miniconda3/bin:$PATH
14 | echo $PATH
15 |
16 | # Use the miniconda installer for setup of conda itself
17 | pushd .
18 | cd
19 | mkdir -p download
20 | cd download
21 | if [[ ! -f /home/travis/miniconda3/bin/activate ]]
22 | then
23 | if [[ ! -f miniconda.sh ]]
24 | then
25 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
26 | -O miniconda.sh
27 | fi
28 | chmod +x miniconda.sh && ./miniconda.sh -b -f
29 | conda update --yes conda
30 | # If we are running pylint, use Python 3.5.2 due to
31 | # bug in pylint. https://github.com/PyCQA/pylint/issues/1295
32 | conda create -n testenv352 --yes python=3.5.2
33 | conda create -n testenv --yes python=3.5
34 | fi
35 | cd ..
36 | popd
37 |
38 | # Activate the python environment we created.
39 | if [[ "$RUN_PYLINT" == "true" ]]; then
40 | source activate testenv352
41 | else
42 | source activate testenv
43 | fi
44 |
45 | # Install requirements via pip in our conda environment
46 | pip install -U -r requirements.txt
47 |
48 | # List the packages to get their versions for debugging
49 | pip list
50 |
51 | # Install punkt tokenizer
52 | python -m nltk.downloader punkt
53 |
54 | # Install spacy data
55 | python -m spacy.en.download all
56 |
--------------------------------------------------------------------------------
/build_tools/travis/test_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called by the "script" step defined in
3 | # .travis.yml. See http://docs.travis-ci.com/ for more details.
4 | # The behavior of the script is controlled by environment variabled defined
5 | # in the .travis.yml in the top level folder of the project.
6 |
7 | # License: 3-clause BSD
8 |
9 | set -e
10 |
11 | python --version
12 |
13 | export PYTHONHASHSEED=2157
14 |
15 | run_tests() {
16 | KERAS_BACKEND=tensorflow py.test -v --cov=deep_qa --durations=20
17 | }
18 |
19 | if [[ "$RUN_PYLINT" == "true" ]]; then
20 | source scripts/pylint.sh
21 | fi
22 |
23 | if [[ "$SKIP_TESTS" != "true" ]]; then
24 | run_tests
25 | fi
26 |
--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
1 | machine:
2 | environment:
3 | MINICONDA_PATH: $HOME/miniconda
4 | CONDA_ENV_NAME: testenv
5 | PATH: $MINICONDA_PATH/bin:$PATH
6 |
7 | dependencies:
8 | cache_directories:
9 | - "~/download"
10 | # Check whether the doc build is required, install build dependencies and
11 | # run sphinx to build the doc.
12 | override:
13 | - bash build_tools/circle/install_doc_requirements.sh
14 | - bash build_tools/circle/build_doc.sh
15 |
16 | test:
17 | override:
18 | - |
19 | export PATH="$MINICONDA_PATH/bin:$PATH"
20 | source activate $CONDA_ENV_NAME
21 | # we could run doctests here in the future
22 | # make doctests
23 | general:
24 | # Open the built docs to the CircleCI API
25 | artifacts:
26 | - "doc/_build/html"
27 | - "~/log.txt"
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | precision: 0
3 | round: down
4 | status:
5 | patch:
6 | default:
7 | target: 90
8 | project:
9 | default:
10 | threshold: 1%
11 | changes: false
12 | comment: false
13 | ignore:
14 | - "deep_qa/contrib"
15 |
--------------------------------------------------------------------------------
/deep_qa/README.md:
--------------------------------------------------------------------------------
1 | # DeepQA
2 |
3 | DeepQA is organised into the following main sections:
4 |
5 | - `common`: Code for parameter parsing, logging and runtime checks.
6 | - `data`: Indexing, padding, tokenisation, stemming, embedding and general dataset manipulation
7 | happens here.
8 | - `layers`: The bulk of the library. Use these Layers to compose new models. Some of these Layers
9 | are very similar to what you might find in Keras, but altered slightly to support arbitrary
10 | dimensions or correct masking.
11 | - `models`: Frameworks for different types of task. These generally all extend the TextTrainer
12 | class which provides training capabilities to a DeepQaModel. We have models for Sequence
13 | Tagging, Entailment, Multiple Choice QA, Reading Comprehension and more. Take a look at the READMEs
14 | under `model` for more details - each task typically has a README describing the task definition.
15 | - `tensors`: Convenience functions for writing the internals of Layers. Will almost exclusively be
16 | used inside Layer implementations.
17 | - `training`: This module does the heavy lifting for training and optimisation. We also wrap the
18 | Keras Model class to give it some useful debugging functionality.
19 |
20 | The `data` and `models` sections are, in turn, structured according to what task they are intended
21 | for (e.g., text classification, reading comprehension, sequence tagging, etc.). This should make
22 | it easy to see if something you are trying to do is already implemented in DeepQA or not.
23 |
24 |
--------------------------------------------------------------------------------
/deep_qa/__init__.py:
--------------------------------------------------------------------------------
1 | from .run import run_model, evaluate_model, load_model, score_dataset, score_dataset_with_ensemble
2 | from .run import compute_accuracy, run_model_from_file
3 |
--------------------------------------------------------------------------------
/deep_qa/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/common/__init__.py
--------------------------------------------------------------------------------
/deep_qa/common/checks.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | REQUIRED_PYTHONHASHSEED = '2157'
5 |
6 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
7 |
8 | class ConfigurationError(Exception):
9 | def __init__(self, message):
10 | super(ConfigurationError, self).__init__()
11 | self.message = message
12 |
13 | def __str__(self):
14 | return repr(self.message)
15 |
16 |
17 | def ensure_pythonhashseed_set():
18 | message = """You must set PYTHONHASHSEED to %s so we get repeatable results and tests pass.
19 | You can do this with the command `export PYTHONHASHSEED=%s`.
20 | See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONHASHSEED for more info.
21 | """
22 | assert os.environ.get('PYTHONHASHSEED', None) == REQUIRED_PYTHONHASHSEED, \
23 | message % (REQUIRED_PYTHONHASHSEED, REQUIRED_PYTHONHASHSEED)
24 |
25 |
26 | def log_keras_version_info():
27 | import keras
28 | logger.info("Keras version: " + keras.__version__)
29 | from keras import backend as K
30 | try:
31 | backend = K.backend()
32 | except AttributeError:
33 | backend = K._BACKEND # pylint: disable=protected-access
34 | if backend == 'theano':
35 | import theano
36 | logger.info("Theano version: " + theano.__version__)
37 | logger.warning("Using Keras' theano backend is not supported! Expect to crash...")
38 | elif backend == 'tensorflow':
39 | import tensorflow
40 | logger.info("Tensorflow version: " + tensorflow.__version__) # pylint: disable=no-member
41 |
--------------------------------------------------------------------------------
/deep_qa/common/models.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from keras.models import Model
4 |
5 | from ..training.models import DeepQaModel
6 |
7 |
8 | def get_submodel(model: Model,
9 | input_layer_names: List[str],
10 | output_layer_names: List[str],
11 | train_model: bool=False,
12 | name=None):
13 | """
14 | Returns a new model constructed from ``model``. This model will be a subset of the given
15 | ``Model``, with the inputs specified by ``input_layer_names`` and the outputs specified by
16 | ``output_layer_names``. For example, if the input model is :class:`BiDAF
17 | .models.reading_comprehens.bidirectional_attention.BidirectionalAttentionFlow`, you can use
18 | this to get a model that outputs the passage embedding, just before the span prediction
19 | layers, by calling
20 | ``get_submodel(bidaf.model, ['question_input', 'passage_input'], ['final_merged_passage'])``.
21 | """
22 | layer_input_dict = {}
23 | layer_output_dict = {}
24 | for layer in model.layers:
25 | layer_input_dict[layer.name] = layer.get_input_at(0)
26 | layer_output_dict[layer.name] = layer.get_output_at(0)
27 | input_layers = [layer_input_dict[name] for name in input_layer_names]
28 | output_layers = [layer_output_dict[name] for name in output_layer_names]
29 | submodel = DeepQaModel(inputs=input_layers, outputs=output_layers, name=name)
30 | if not train_model:
31 | submodel.trainable = False
32 | return submodel
33 |
--------------------------------------------------------------------------------
/deep_qa/common/tee_logger.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 |
4 |
5 | class TeeLogger:
6 | """
7 | This class is an attempt to maintain logs of both stdout and stderr for when models are run.
8 | To use this class, at the beginning of your script insert these lines::
9 |
10 | sys.stdout = TeeLogger("stdout.log", sys.stdout)
11 | sys.stderr = TeeLogger("stdout.log", sys.stderr)
12 | """
13 | def __init__(self, filename: str, terminal: io.TextIOWrapper):
14 | self.terminal = terminal
15 | parent_directory = os.path.dirname(filename)
16 | os.makedirs(parent_directory, exist_ok=True)
17 | self.log = open(filename, 'a')
18 |
19 | def write(self, message):
20 | self.terminal.write(message)
21 | # We'll special case a particular thing that keras does, to make the log file more
22 | # readable. Keras uses ^H characters to get the training line to update for each batch
23 | # without adding more lines to the terminal output. Displaying those in a file won't work
24 | # correctly, so we'll just make sure that each batch shows up on its own line.
25 | if '\x08' in message:
26 | message = message.replace('\x08', '')
27 | if len(message) == 0 or message[-1] != '\n':
28 | message += '\n'
29 | self.log.write(message)
30 |
31 | def flush(self):
32 | self.terminal.flush()
33 | self.log.flush()
34 |
--------------------------------------------------------------------------------
/deep_qa/common/util.py:
--------------------------------------------------------------------------------
1 | from itertools import zip_longest
2 | from typing import Any, Dict, List
3 | import random
4 |
5 |
6 | def group_by_count(iterable: List[Any], count: int, default_value: Any) -> List[List[Any]]:
7 | """
8 | Takes a list and groups it into sublists of size ``count``, using ``default_value`` to pad the
9 | list at the end if the list is not divisable by ``count``.
10 |
11 | For example:
12 | >>> group_by_count([1, 2, 3, 4, 5, 6, 7], 3, 0)
13 | [[1, 2, 3], [4, 5, 6], [7, 0, 0]]
14 |
15 | This is a short method, but it's complicated and hard to remember as a one-liner, so we just
16 | make a function out of it.
17 | """
18 | return [list(l) for l in zip_longest(*[iter(iterable)] * count, fillvalue=default_value)]
19 |
20 |
21 | def add_noise_to_dict_values(dictionary: Dict[Any, float], noise_param: float) -> Dict[Any, float]:
22 | """
23 | Returns a new dictionary with noise added to every key in ``dictionary``. The noise is
24 | uniformly distributed within ``noise_param`` percent of the value for every value in the
25 | dictionary.
26 | """
27 | new_dict = {}
28 | for key, value in dictionary.items():
29 | noise_value = value * noise_param
30 | noise = random.uniform(-noise_value, noise_value)
31 | new_dict[key] = value + noise
32 | return new_dict
33 |
34 |
35 | def clean_layer_name(input_name: str,
36 | strip_right_of_last_backslash: bool=True,
37 | strip_numerics_after_underscores: bool=True):
38 | """
39 | There exist cases when layer names need to be concatenated in order to create new, unique
40 | layer names. However, the indices added to layer names designating the ith output of calling
41 | the layer cannot occur within a layer name apart from at the end, so this utility function
42 | removes these.
43 |
44 | Parameters
45 | ----------
46 |
47 | input_name: str, required
48 | A Keras layer name.
49 | strip_right_of_last_backslash: bool, optional, (default = True)
50 | Should we strip anything past the last backslash in the name?
51 | This can be useful for controlling scopes.
52 | strip_numerics_after_underscores: bool, optional, (default = True)
53 | If there are numerical values after an underscore at the end of the layer name,
54 | this flag specifies whether or not to remove them.
55 | """
56 | # Always strip anything after :, as these will be numerical
57 | # counts of the number of times the layer has been called,
58 | # which cannot be included in a layer name.
59 | if ':' in input_name:
60 | input_name = input_name.split(':')[0]
61 | if '/' in input_name and strip_right_of_last_backslash:
62 | input_name = input_name.rsplit('/', 1)[0]
63 | if input_name.split('_')[-1].isdigit() and strip_numerics_after_underscores:
64 | input_name = '_'.join(input_name.split('_')[:-1])
65 |
66 | return input_name
67 |
--------------------------------------------------------------------------------
/deep_qa/data/README.md:
--------------------------------------------------------------------------------
1 | # Data
2 |
3 | This module contains code for processing data. There's a `DataIndexer`, whose job it is to convert
4 | from strings to word (or character) indices suitable for use with an embedding matrix. There's
5 | code to load pre-trained embeddings from a file, to tokenize sentences, and, most importantly, to
6 | convert training and testing examples into numpy arrays that can be used with Keras.
7 |
8 | The most important thing to understand about the data processing code is the `Dataset` object. A
9 | `Dataset` is a collection of `Instances`, which are the individual examples used for training and
10 | testing. `Dataset` has two subclasses: `TextDataset`, which contains `Instances` with raw strings
11 | and can be read directly from a file, and `IndexedDataset`, which contains `Instances` whose raw
12 | strings have been converted to word (or character) indices. The `IndexedDataset` has methods for
13 | padding sequences to a consistent length, so that models can be compiled, and for converting the
14 | `Instances` to numpy arrays. The file formats read by `TextDataset`, and the format of the numpy
15 | arrays produced by `IndexedDataset`, are determined by the underlying `Instance` type used by the
16 | `Dataset`. See the `instances` module for more detail on this.
17 |
--------------------------------------------------------------------------------
/deep_qa/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets.dataset import Dataset, IndexedDataset, TextDataset
2 |
3 | from .data_generator import DataGenerator
4 | from .data_indexer import DataIndexer
5 | from .tokenizers import tokenizers
6 |
--------------------------------------------------------------------------------
/deep_qa/data/dataset_readers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/dataset_readers/__init__.py
--------------------------------------------------------------------------------
/deep_qa/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | from .entailment.snli_dataset import SnliDataset
4 | from .language_modeling.language_modeling_dataset import LanguageModelingDataset
5 | from .dataset import Dataset, TextDataset, IndexedDataset
6 |
7 |
8 | concrete_datasets = OrderedDict() # pylint: disable=invalid-name
9 | concrete_datasets["text"] = TextDataset
10 | concrete_datasets["language_modeling"] = LanguageModelingDataset
11 | concrete_datasets["snli"] = SnliDataset
12 |
--------------------------------------------------------------------------------
/deep_qa/data/datasets/entailment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/datasets/entailment/__init__.py
--------------------------------------------------------------------------------
/deep_qa/data/datasets/entailment/snli_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import json
3 |
4 | from overrides import overrides
5 |
6 | from ..dataset import TextDataset, log_label_counts
7 | from ...instances import TextInstance
8 | from ....common.params import Params
9 |
10 |
11 | class SnliDataset(TextDataset):
12 |
13 | def __init__(self, instances: List[TextInstance], params: Params=None):
14 | super(SnliDataset, self).__init__(instances, params)
15 |
16 | @staticmethod
17 | @overrides
18 | def read_from_file(filename: str, instance_class, params: Params=None):
19 |
20 | instances = []
21 | for line in open(filename, 'r'):
22 | example = json.loads(line)
23 |
24 | # TODO(mark) why does this not match snli? Fix.
25 | label = example["gold_label"]
26 | if label == "entailment":
27 | label = "entails"
28 | elif label == "contradiction":
29 | label = "contradicts"
30 |
31 | text = example["sentence1"]
32 | hypothesis = example["sentence2"]
33 | instances.append(instance_class(text, hypothesis, label))
34 | log_label_counts(instances)
35 | return SnliDataset(instances, params)
36 |
--------------------------------------------------------------------------------
/deep_qa/data/datasets/language_modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/data/datasets/language_modeling/__init__.py
--------------------------------------------------------------------------------
/deep_qa/data/datasets/language_modeling/language_modeling_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from overrides import overrides
4 |
5 | from ..dataset import TextDataset, log_label_counts
6 | from ...instances import TextInstance
7 | from ...instances.language_modeling import SentenceInstance
8 | from ....common.params import Params
9 |
10 |
11 | class LanguageModelingDataset(TextDataset):
12 |
13 | def __init__(self, instances: List[TextInstance], params: Params=None):
14 | # TODO(Mark): We are splitting on spaces below, so this won't end up being
15 | # the exact sequence length. This could be solved by passing the tokeniser
16 | # to the dataset.
17 | self.sequence_length = params.pop("sequence_length")
18 | super(LanguageModelingDataset, self).__init__(instances)
19 |
20 | @staticmethod
21 | @overrides
22 | def read_from_file(filename: str, instance_class, params: Params=None):
23 |
24 | sequence_length = params.get("sequence_length", 20)
25 | with open(filename, "r") as text_file:
26 | text = text_file.readlines()
27 | text = " ".join([x.replace("\n", " ").strip() for x in text]).split(" ")
28 |
29 | instances = []
30 | for index in range(0, len(text) - sequence_length, sequence_length):
31 | word_sequence = " ".join(text[index: index + sequence_length])
32 | instances.append(SentenceInstance(word_sequence))
33 |
34 | log_label_counts(instances)
35 | return LanguageModelingDataset(instances, params)
36 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/README.md:
--------------------------------------------------------------------------------
1 | # Instances
2 |
3 | An `Instance` is a single training or testing example for a Keras model. The base classes for
4 | working with `Instances` are found in `instance.py`. There are two subclasses: (1) `TextInstance`,
5 | which is a raw instance that contains actual strings, and can be used to determine a vocabulary
6 | for a model, or read directly from a file; and (2) `IndexedInstance`, which has had its raw
7 | strings converted to word (or character) indices, and can be padded to a consistent length and
8 | converted to numpy arrays for use with Keras.
9 |
10 | There are a lot of different concrete `Instance` objects you can use. Some examples:
11 |
12 | - A `TrueFalseInstance`, that contains a single sentence with a true/false label. The numpy array
13 | for this instance is just a single word index sequence.
14 | - A `MultipleTrueFalseInstance`, which contains several `TrueFalseInstances`, only one of which is
15 | true. The numpy array here has shape `(num_options, sentence_length)`, and the label is a
16 | one-hot vector of length `num_options`.
17 | - A `BackgroundInstance`, which wraps another `Instance` type with a set of background sentences,
18 | adding an additional input of size `(knowledge_length, sentence_length)`.
19 | - A `LogicalFormInstance`, which is a `TrueFalseInstance` where the "sentence" is actually a
20 | tree-structured logical form (hmm, maybe we should call this a `TreeInstance` instead...
21 | TODO(matt).). In addition to the numpy array containing the word index sequence, there's another
22 | array containing shift / reduce operations so that you can construct a tree-structured network
23 | using a sequence, like in the [SPINN
24 | paper](https://www.semanticscholar.org/paper/A-Fast-Unified-Model-for-Parsing-and-Sentence-Bowman-Gauthier/23c141141f4f63c061d3cce14c71893959af5721)
25 | by Sam Bowman and others (see the [TreeCompositionLSTM
26 | encoder](https://github.com/allenai/deep_qa/blob/master/src/main/python/deep_qa/layers/encoders/tree_composition_lstm.py)
27 | for a way to actually use this in a model).
28 |
29 | A lot of the magic of how the DeepQA library works happens here, in the concrete `Instance`
30 | classes in this module. Most of the code can be totally agnostic to how exactly the input is
31 | structured, because the conversion to numpy arrays happens here, not in the `Trainer` or `Solver`
32 | classes, with only the specific `_build_model()` methods needing to know about the format of their
33 | input and output.
34 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/__init__.py:
--------------------------------------------------------------------------------
1 | from .instance import Instance, TextInstance, IndexedInstance
2 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/entailment/__init__.py:
--------------------------------------------------------------------------------
1 | from .sentence_pair_instance import SentencePairInstance, IndexedSentencePairInstance
2 | from .snli_instance import SnliInstance
3 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/language_modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .sentence_instance import SentenceInstance, IndexedSentenceInstance
2 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/reading_comprehension/__init__.py:
--------------------------------------------------------------------------------
1 | from .character_span_instance import CharacterSpanInstance, IndexedCharacterSpanInstance
2 | from .mc_question_passage_instance import McQuestionPassageInstance, IndexedMcQuestionPassageInstance
3 | from .question_passage_instance import QuestionPassageInstance, IndexedQuestionPassageInstance
4 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/sequence_tagging/__init__.py:
--------------------------------------------------------------------------------
1 | from .pretokenized_tagging_instance import PreTokenizedTaggingInstance
2 | from .tagging_instance import TaggingInstance, IndexedTaggingInstance
3 |
4 | concrete_instances = { # pylint: disable=invalid-name
5 | 'PreTokenizedTaggingInstance': PreTokenizedTaggingInstance,
6 | }
7 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/sequence_tagging/pretokenized_tagging_instance.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy
4 | from overrides import overrides
5 |
6 | from .tagging_instance import TaggingInstance
7 | from ...data_indexer import DataIndexer
8 |
9 | class PreTokenizedTaggingInstance(TaggingInstance):
10 | """
11 | This is a ``TaggingInstance`` where the text has been pre-tokenized. Thus the ``text`` member
12 | variable here is actually a ``List[str]``, instead of a ``str``.
13 |
14 | When using this ``Instance``, you `must` use the ``NoOpWordSplitter`` as well, or things will
15 | break. You probably also do not want any kind of filtering (though stemming is ok), because
16 | only the words will get filtered, not the labels.
17 | """
18 | def __init__(self, text: List[str], label: List[str], index: int=None):
19 | super(PreTokenizedTaggingInstance, self).__init__(text, label, index)
20 |
21 | @classmethod
22 | @overrides
23 | def read_from_line(cls, line: str):
24 | """
25 | Reads a ``PreTokenizedTaggingInstance`` from a line. The format has one of two options:
26 |
27 | 1. [example index][token1]###[tag1][tab][token2]###[tag2][tab]...
28 | 2. [token1]###[tag1][tab][token2]###[tag2][tab]...
29 | """
30 | fields = line.split("\t")
31 |
32 | if fields[0].isdigit():
33 | index = int(fields[0])
34 | fields = fields[1:]
35 | else:
36 | index = None
37 | tokens = []
38 | tags = []
39 | for field in fields:
40 | token, tag = field.rsplit("###", 1)
41 | tokens.append(token)
42 | tags.append(tag)
43 | return cls(tokens, tags, index)
44 |
45 | @overrides
46 | def tags_in_label(self):
47 | return [tag for tag in self.label]
48 |
49 | @overrides
50 | def _index_label(self, label: List[str], data_indexer: DataIndexer) -> List[int]:
51 | tag_indices = [data_indexer.get_word_index(tag, namespace='tags') for tag in label]
52 | indexed_label = []
53 | for tag_index in tag_indices:
54 | # We subtract 2 here to account for the unknown and padding tokens that the DataIndexer
55 | # uses.
56 | tag_one_hot = numpy.zeros(data_indexer.get_vocab_size(namespace='tags') - 2)
57 | tag_one_hot[tag_index - 2] = 1
58 | indexed_label.append(tag_one_hot)
59 | return indexed_label
60 |
--------------------------------------------------------------------------------
/deep_qa/data/instances/text_classification/__init__.py:
--------------------------------------------------------------------------------
1 | from .text_classification_instance import TextClassificationInstance, IndexedTextClassificationInstance
2 |
--------------------------------------------------------------------------------
/deep_qa/data/tokenizers/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | from .character_tokenizer import CharacterTokenizer
4 | from .word_and_character_tokenizer import WordAndCharacterTokenizer
5 | from .word_tokenizer import WordTokenizer
6 |
7 | # The first item added here will be used as the default in some cases.
8 | tokenizers = OrderedDict() # pylint: disable=invalid-name
9 | tokenizers['words'] = WordTokenizer
10 | tokenizers['characters'] = CharacterTokenizer
11 | tokenizers['words and characters'] = WordAndCharacterTokenizer
12 |
--------------------------------------------------------------------------------
/deep_qa/data/tokenizers/word_processor.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from .word_splitter import word_splitters
4 | from .word_stemmer import word_stemmers
5 | from .word_filter import word_filters
6 | from ...common.params import Params
7 |
8 |
9 | class WordProcessor:
10 | """
11 | A WordProcessor handles the splitting of strings into words (with the use of a WordSplitter) as well as any
12 | desired post-processing (e.g., stemming, filtering, etc.)
13 |
14 | Parameters
15 | ----------
16 | word_splitter: str, default="simple"
17 | The string name of the ``WordSplitter`` of choice (see the options at the bottom of
18 | ``word_splitter.py``).
19 |
20 | word_filter: str, default="pass_through"
21 | The name of the ``WordFilter`` to use (see the options at the bottom of
22 | ``word_filter.py``).
23 |
24 | word_stemmer: str, default="pass_through"
25 | The name of the ``WordStemmer`` to use (see the options at the bottom of
26 | ``word_stemmer.py``).
27 | """
28 | def __init__(self, params: Params):
29 | word_splitter_choice = params.pop_choice('word_splitter', list(word_splitters.keys()),
30 | default_to_first_choice=True)
31 | self.word_splitter = word_splitters[word_splitter_choice]()
32 | word_filter_choice = params.pop_choice('word_filter', list(word_filters.keys()),
33 | default_to_first_choice=True)
34 | self.word_filter = word_filters[word_filter_choice]()
35 | word_stemmer_choice = params.pop_choice('word_stemmer', list(word_stemmers.keys()),
36 | default_to_first_choice=True)
37 | self.word_stemmer = word_stemmers[word_stemmer_choice]()
38 | params.assert_empty("WordProcessor")
39 |
40 | def get_tokens(self, sentence: str) -> List[str]:
41 | """
42 | Does whatever processing is required to convert a string of text into a sequence of tokens.
43 |
44 | At a minimum, this uses a ``WordSplitter`` to split words into text. It may also do
45 | stemming or stopword removal, depending on the parameters given to the constructor.
46 | """
47 | words = self.word_splitter.split_words(sentence)
48 | filtered_words = self.word_filter.filter_words(words)
49 | stemmed_words = [self.word_stemmer.stem_word(word) for word in filtered_words]
50 | return stemmed_words
51 |
--------------------------------------------------------------------------------
/deep_qa/data/tokenizers/word_stemmer.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | from nltk.stem import PorterStemmer as NltkPorterStemmer
4 | from overrides import overrides
5 |
6 |
7 | class WordStemmer:
8 | """
9 | A ``WordStemmer`` lemmatizes words. This means that we map words to their root form, so that,
10 | e.g., "have", "has", and "had" all have the same internal representation.
11 |
12 | You should think carefully about whether and how much stemming you want in your model. Kind of
13 | the whole point of using word embeddings is so that you don't have to do this, but in a highly
14 | inflected language, or in a low-data setting, you might need it anyway. The default
15 | ``WordStemmer`` does nothing, just returning the work token as-is.
16 | """
17 | def stem_word(self, word: str) -> str:
18 | """Converts a word to its lemma"""
19 | raise NotImplementedError
20 |
21 |
22 | class PassThroughWordStemmer(WordStemmer):
23 | """
24 | Does not stem words; it's a no-op. This is the default word stemmer.
25 | """
26 | @overrides
27 | def stem_word(self, word: str) -> str:
28 | return word
29 |
30 |
31 | class PorterStemmer(WordStemmer):
32 | """
33 | Uses NLTK's PorterStemmer to stem words.
34 | """
35 | def __init__(self):
36 | self.stemmer = NltkPorterStemmer()
37 |
38 | @overrides
39 | def stem_word(self, word: str) -> str:
40 | return self.stemmer.stem(word)
41 |
42 |
43 | word_stemmers = OrderedDict() # pylint: disable=invalid-name
44 | word_stemmers['pass_through'] = PassThroughWordStemmer
45 | word_stemmers['porter'] = PorterStemmer
46 |
--------------------------------------------------------------------------------
/deep_qa/data/tokenizers/word_tokenizer.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Dict, List, Tuple
2 |
3 | from overrides import overrides
4 | from keras.layers import Layer
5 |
6 | from .tokenizer import Tokenizer
7 | from .word_processor import WordProcessor
8 | from ..data_indexer import DataIndexer
9 | from ...common.params import Params
10 |
11 |
12 | class WordTokenizer(Tokenizer):
13 | """
14 | A ``WordTokenizer`` splits strings into word tokens.
15 |
16 | There are several ways that you can split a string into words, so we rely on a
17 | ``WordProcessor`` to do that work for us. Note that we're using the word "tokenizer" here for
18 | something different than is typical in NLP - we're referring here to how strings are
19 | represented as numpy arrays, not the linguistic notion of splitting sentences into tokens.
20 | Those things are handled in the ``WordProcessor``, which is a common dependency in several
21 | ``Tokenizers``.
22 |
23 | Parameters
24 | ----------
25 | processor: Dict[str, Any], default={}
26 | Contains parameters for processing text strings into word tokens, including, e.g.,
27 | splitting, stemming, and filtering words. See ``WordProcessor`` for a complete description
28 | of available parameters.
29 | """
30 | def __init__(self, params: Params):
31 | self.word_processor = WordProcessor(params.pop('processor', {}))
32 | super(WordTokenizer, self).__init__(params)
33 |
34 | @overrides
35 | def tokenize(self, text: str) -> List[str]:
36 | return self.word_processor.get_tokens(text)
37 |
38 | @overrides
39 | def get_words_for_indexer(self, text: str) -> Dict[str, List[str]]:
40 | return {'words': self.tokenize(text)}
41 |
42 | @overrides
43 | def index_text(self, text: str, data_indexer: DataIndexer) -> List:
44 | return [data_indexer.get_word_index(word, namespace='words') for word in self.tokenize(text)]
45 |
46 | @overrides
47 | def embed_input(self,
48 | input_layer: Layer,
49 | embed_function: Callable[[Layer, str, str], Layer],
50 | text_trainer,
51 | embedding_suffix: str=""):
52 | # pylint: disable=protected-access
53 | return embed_function(input_layer,
54 | embedding_name='words' + embedding_suffix,
55 | vocab_name='words')
56 |
57 | @overrides
58 | def get_sentence_shape(self, sentence_length: int, word_length: int) -> Tuple[int]:
59 | return (sentence_length,)
60 |
61 | @overrides
62 | def get_padding_lengths(self, sentence_length: int, word_length: int) -> Dict[str, int]:
63 | return {'num_sentence_words': sentence_length}
64 |
--------------------------------------------------------------------------------
/deep_qa/layers/README.md:
--------------------------------------------------------------------------------
1 | # Layers
2 |
3 | Custom layers that we have implemented belong here. These include things like knowledge encoders
4 | (which encode the memory component of a memory network), knowledge selectors (which perform an
5 | attention over the memory), and entailment models. There's also an `encoders` submodule,
6 | containing sentence encoders that convert an embedded word (or character) sequence into a vector.
7 |
--------------------------------------------------------------------------------
/deep_qa/layers/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | # Individual layers.
3 | from .additive import Additive
4 | from .bigru_index_selector import BiGRUIndexSelector
5 | from .complex_concat import ComplexConcat
6 | from .highway import Highway
7 | from .l1_normalize import L1Normalize
8 | from .masked_layer import MaskedLayer
9 | from .noisy_or import BetweenZeroAndOne, NoisyOr
10 | from .option_attention_sum import OptionAttentionSum
11 | from .overlap import Overlap
12 | from .vector_matrix_merge import VectorMatrixMerge
13 | from .vector_matrix_split import VectorMatrixSplit
14 |
--------------------------------------------------------------------------------
/deep_qa/layers/additive.py:
--------------------------------------------------------------------------------
1 | from overrides import overrides
2 |
3 | from .masked_layer import MaskedLayer
4 |
5 | class Additive(MaskedLayer):
6 | """
7 | This ``Layer`` `adds` a parameter value to each cell in the input tensor, similar to a bias
8 | vector in a ``Dense`` layer, but this `only` adds, one value per cell. The value to add is
9 | learned.
10 |
11 | Parameters
12 | ----------
13 | initializer: str, optional (default='glorot_uniform')
14 | Keras initializer for the additive weight.
15 | """
16 | def __init__(self, initializer='glorot_uniform', **kwargs):
17 | super(Additive, self).__init__(**kwargs)
18 |
19 | self.initializer = initializer
20 | self._additive_weight = None
21 |
22 | @overrides
23 | def build(self, input_shape):
24 | super(Additive, self).build(input_shape)
25 | self._additive_weight = self.add_weight(shape=input_shape[1:],
26 | name='%s_additive' % self.name,
27 | initializer=self.initializer)
28 |
29 | @overrides
30 | def call(self, inputs, mask=None):
31 | return inputs + self._additive_weight
32 |
33 | @overrides
34 | def get_config(self):
35 | base_config = super(Additive, self).get_config()
36 | config = {
37 | 'initializer': self.initializer,
38 | }
39 | config.update(base_config)
40 | return config
41 |
--------------------------------------------------------------------------------
/deep_qa/layers/attention/README.md:
--------------------------------------------------------------------------------
1 | Layers in this module compute some kind of "attention" over a vector or matrix. "Attention"
2 | typically means a normalized probability distribution, and is typically computed using a softmax
3 | after some similarity computation, so we're also grouping layers that do some kind of specialized
4 | softmax in this module.
5 |
--------------------------------------------------------------------------------
/deep_qa/layers/attention/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .attention import Attention
3 | from .gated_attention import GatedAttention
4 | from .masked_softmax import MaskedSoftmax
5 | from .matrix_attention import MatrixAttention
6 | from .max_similarity_softmax import MaxSimilaritySoftmax
7 | from .weighted_sum import WeightedSum
8 |
--------------------------------------------------------------------------------
/deep_qa/layers/attention/masked_softmax.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 | from ...tensors.backend import last_dim_flatten
6 | from ...tensors.masked_operations import masked_softmax
7 |
8 | class MaskedSoftmax(MaskedLayer):
9 | '''
10 | This Layer performs a masked softmax. This could just be a `Lambda` layer that calls our
11 | `tensors.masked_softmax` function, except that `Lambda` layers do not properly handle masked
12 | input.
13 |
14 | The expected input to this layer is a tensor of shape `(batch_size, num_options)`, with a mask
15 | of the same shape. We also accept an input tensor of shape `(batch_size, num_options, 1)`,
16 | which we will squeeze to be `(batch_size, num_options)` (though the mask must still be
17 | `(batch_size, num_options)`).
18 |
19 | While we give the expected input as having two modes, we also accept higher-order tensors. In
20 | those cases, we'll first perform a `last_dim_flatten` on both the input and the mask, so that
21 | we always do the softmax over a single dimension (the last one).
22 |
23 | We give no output mask, as we expect this to only be used at the end of the model, to get a
24 | final probability distribution over class labels (and it's a softmax, so you'll have zeros in
25 | the tensor itself; do you really still need a mask?). If you need this to propagate the mask
26 | for whatever reason, it would be pretty easy to change it to optionally do so - submit a PR.
27 | '''
28 | def __init__(self, **kwargs):
29 | super(MaskedSoftmax, self).__init__(**kwargs)
30 |
31 | @overrides
32 | def compute_mask(self, inputs, mask=None):
33 | # pylint: disable=unused-argument
34 | # We do not need a mask beyond this layer.
35 | return None
36 |
37 | @overrides
38 | def compute_output_shape(self, input_shape):
39 | if input_shape[-1] == 1:
40 | return input_shape[:-1]
41 | else:
42 | return input_shape
43 |
44 | @overrides
45 | def call(self, inputs, mask=None):
46 | input_shape = K.int_shape(inputs)
47 | if input_shape[-1] == 1:
48 | inputs = K.squeeze(inputs, axis=-1)
49 | input_shape = input_shape[:-1]
50 | if len(input_shape) > 2:
51 | original_inputs = inputs
52 | inputs = last_dim_flatten(inputs)
53 | if mask is not None:
54 | mask = last_dim_flatten(mask)
55 | # Now we have both inputs and mask with shape (?, num_options), and can do a softmax.
56 | softmax_result = masked_softmax(inputs, mask)
57 | if len(input_shape) > 2:
58 | original_shape = K.shape(original_inputs)
59 | input_shape = K.concatenate([[-1], original_shape[1:]], 0)
60 | softmax_result = K.reshape(softmax_result, input_shape)
61 | return softmax_result
62 |
--------------------------------------------------------------------------------
/deep_qa/layers/attention/max_similarity_softmax.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 | from ...tensors.masked_operations import masked_batch_dot, masked_softmax
6 |
7 |
8 | class MaxSimilaritySoftmax(MaskedLayer):
9 | '''
10 | This layer takes encoded questions and knowledge in a multiple choice
11 | setting and computes the similarity between each of the question embeddings
12 | and the background knowledge, and returns a softmax over the options.
13 |
14 | Inputs:
15 |
16 | - encoded_questions (batch_size, num_options, encoding_dim)
17 | - encoded_knowledge (batch_size, num_options, knowledge_length, encoding_dim)
18 |
19 | Output:
20 |
21 | - option_probabilities (batch_size, num_options)
22 |
23 | This is a pretty niche layer that does a very specific computation. We only
24 | made it its own class instead of a ``Lambda`` layer so that we could handle
25 | masking correctly, which ``Lambda`` does not.
26 |
27 | '''
28 | def __init__(self, knowledge_axis, max_knowledge_length, **kwargs):
29 | self.knowledge_axis = knowledge_axis
30 | self.max_knowledge_length = max_knowledge_length
31 | super(MaxSimilaritySoftmax, self).__init__(**kwargs)
32 |
33 | @overrides
34 | def compute_mask(self, inputs, mask=None):
35 | # pylint: disable=unused-argument
36 | return None
37 |
38 | @overrides
39 | def compute_output_shape(self, input_shapes):
40 | # (batch_size, num_options)
41 | return (input_shapes[0][0], input_shapes[0][1])
42 |
43 | @overrides
44 | def call(self, inputs, mask=None):
45 | questions, knowledge = inputs
46 | question_mask, knowledge_mask = mask
47 | question_knowledge_similarity = masked_batch_dot(questions, knowledge, question_mask, knowledge_mask)
48 | max_knowledge_similarity = K.max(question_knowledge_similarity, axis=-1) # (samples, num_options)
49 | return masked_softmax(max_knowledge_similarity, question_mask)
50 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/README.md:
--------------------------------------------------------------------------------
1 | Layers in this module generally just implement some simple operation from the Keras backend as a
2 | Layer. The reason we have these as Layers is largely so that we can properly handle masking.
3 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/__init__.py:
--------------------------------------------------------------------------------
1 | from .add_mask import AddMask
2 | from .batch_dot import BatchDot
3 | from .collapse_to_batch import CollapseToBatch
4 | from .envelope import Envelope
5 | from .expand_from_batch import ExpandFromBatch
6 | from .max import Max
7 | from .multiply import Multiply
8 | from .permute import Permute
9 | from .replace_masked_values import ReplaceMaskedValues
10 | from .repeat import Repeat
11 | from .repeat_like import RepeatLike
12 | from .squeeze import Squeeze
13 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/add_mask.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class AddMask(MaskedLayer):
8 | """
9 | This ``Layer`` adds a mask to a tensor. It is intended solely for testing, though if you have
10 | a use case for this outside of testing, feel free to use it. The ``call()`` method just
11 | returns the inputs, and the ``compute_mask`` method calls ``K.not_equal(inputs, mask_value)``,
12 | and that's it. This is different from Keras' ``Masking`` layer, which assumes higher-order
13 | input and does a ``K.any()`` call in ``compute_mask``.
14 |
15 | Input:
16 | - tensor: a tensor of arbitrary shape
17 |
18 | Output:
19 | - the same tensor, now with a mask attached of the same shape
20 |
21 | Parameters
22 | ----------
23 | mask_value: float, optional (default=0.0)
24 | This is the value that we will compare to in ``compute_mask``.
25 | """
26 | def __init__(self, mask_value: float=0.0, **kwargs):
27 | self.mask_value = mask_value
28 | super(AddMask, self).__init__(**kwargs)
29 |
30 | @overrides
31 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument
32 | return K.cast(K.not_equal(inputs, self.mask_value), 'bool')
33 |
34 | @overrides
35 | def compute_output_shape(self, input_shape):
36 | return input_shape
37 |
38 | @overrides
39 | def call(self, inputs, mask=None):
40 | # It turns out that Keras doesn't like it if you just return inputs, so we need to return a
41 | # different tensor object. Just doing a cast apparently doesn't work, either, so we'll
42 | # add 0.
43 | return inputs + 0.0
44 |
45 | @overrides
46 | def get_config(self):
47 | config = {'mask_value': self.mask_value}
48 | base_config = super(AddMask, self).get_config()
49 | config.update(base_config)
50 | return config
51 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/envelope.py:
--------------------------------------------------------------------------------
1 | from overrides import overrides
2 |
3 | from keras import backend as K
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class Envelope(MaskedLayer):
8 | """
9 | Given a probability distribution over a begin index and an end index of some sequence, this
10 | ``Layer`` computes an envelope over the sequence, a probability that each element lies within
11 | "begin" and "end".
12 |
13 | Specifically, the computation done here is the following::
14 |
15 | after_span_begin = K.cumsum(span_begin, axis=-1)
16 | after_span_end = K.cumsum(span_end, axis=-1)
17 | before_span_end = 1 - after_span_end
18 | envelope = after_span_begin * before_span_end
19 |
20 | Inputs:
21 | - span_begin: tensor with shape ``(batch_size, sequence_length)``, representing a
22 | probability distribution over a start index in the sequence
23 | - span_end: tensor with shape ``(batch_size, sequence_length)``, representing a probability
24 | distribution over an end index in the sequence
25 |
26 | Outputs:
27 | - envelope: tensor with shape ``(batch_size, sequence_length)``, representing a probability
28 | for each index of the sequence belonging in the span
29 |
30 | If there is a mask associated with either of the inputs, we ignore it, assuming that you used
31 | the mask correctly when you computed your probability distributions. But we support masking in
32 | this layer, so that you have an output mask if you really need it. We just return the first
33 | mask that is not ``None`` (or ``None``, if both are ``None``).
34 |
35 | """
36 | def __init__(self, **kwargs):
37 | super(Envelope, self).__init__(**kwargs)
38 |
39 | @overrides
40 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument
41 | span_begin_mask, span_end_mask = mask
42 | return span_begin_mask if span_begin_mask is not None else span_end_mask
43 |
44 | @overrides
45 | def compute_output_shape(self, input_shape):
46 | span_begin_shape, _ = input_shape
47 | return span_begin_shape
48 |
49 | @overrides
50 | def call(self, inputs, mask=None):
51 | span_begin, span_end = inputs
52 | after_span_begin = K.cumsum(span_begin, axis=-1)
53 | after_span_end = K.cumsum(span_end, axis=-1)
54 | before_span_end = 1.0 - after_span_end
55 | return after_span_begin * before_span_end
56 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/max.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 | from ...tensors.backend import switch, very_negative_like
6 |
7 |
8 | class Max(MaskedLayer):
9 | """
10 | This ``Layer`` performs a max over some dimension. Keras has a similar layer called
11 | ``GlobalMaxPooling1D``, but it is not as configurable as this one, and it does not support
12 | masking.
13 |
14 | If the mask is not ``None``, it must be the same shape as the input.
15 |
16 | Input:
17 | - A tensor of arbitrary shape (having at least 3 dimensions).
18 |
19 | Output:
20 | - A tensor with one less dimension, where we have taken a max over one of the dimensions.
21 | """
22 | def __init__(self, axis: int=-1, **kwargs):
23 | self.axis = axis
24 | super(Max, self).__init__(**kwargs)
25 |
26 | @overrides
27 | def compute_mask(self, inputs, mask=None):
28 | # pylint: disable=unused-argument
29 | if mask is None:
30 | return None
31 | return K.any(mask, axis=self.axis)
32 |
33 | @overrides
34 | def compute_output_shape(self, input_shape):
35 | axis = self.axis
36 | if axis < 0:
37 | axis += len(input_shape)
38 | return input_shape[:axis] + input_shape[axis+1:]
39 |
40 | @overrides
41 | def call(self, inputs, mask=None):
42 | if mask is not None:
43 | inputs = switch(mask, inputs, very_negative_like(inputs))
44 | return K.max(inputs, axis=self.axis)
45 |
46 | @overrides
47 | def get_config(self):
48 | config = {'axis': self.axis}
49 | base_config = super(Max, self).get_config()
50 | config.update(base_config)
51 | return config
52 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/multiply.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class Multiply(MaskedLayer):
8 | """
9 | This ``Layer`` performs elementwise multiplication between two tensors, supporting masking. We
10 | literally just call ``tensor_1 * tensor_2``; the only reason this is a ``Layer`` is so that we
11 | can support masking (and because it's slightly nicer to read in a model definition than a
12 | lambda layer).
13 |
14 | We also try to be a little bit smart if you're wanting to broadcast the multiplication, by
15 | having the tensors differ in the number of dimensions by one.
16 |
17 | Input:
18 | - tensor_1: a tensor of arbitrary shape, with an optional mask of the same shape
19 | - tensor_2: a tensor with the same shape as ``tensor_1`` (or one less or one more
20 | dimension), with an optional mask of the same shape
21 |
22 | Output:
23 | - ``tensor_1 * tensor_2``.
24 | """
25 | def __init__(self, **kwargs):
26 | super(Multiply, self).__init__(**kwargs)
27 |
28 | @overrides
29 | def compute_mask(self, inputs, mask=None):
30 | # pylint: disable=unused-argument
31 | tensor_1, tensor_2 = inputs
32 | tensor_1_mask, tensor_2_mask = mask
33 | if tensor_1_mask is None:
34 | tensor_1_mask = K.ones_like(tensor_1)
35 | if tensor_2_mask is None:
36 | tensor_2_mask = K.ones_like(tensor_2)
37 | tensor_1_mask, tensor_2_mask = self.expand_dims_if_necessary(tensor_1_mask, tensor_2_mask)
38 | return K.cast(tensor_1_mask, 'uint8') * K.cast(tensor_2_mask, 'uint8')
39 |
40 | @overrides
41 | def compute_output_shape(self, input_shape):
42 | return input_shape[0]
43 |
44 | @overrides
45 | def call(self, inputs, mask=None):
46 | tensor_1, tensor_2 = inputs
47 | tensor_1, tensor_2 = self.expand_dims_if_necessary(tensor_1, tensor_2)
48 | return tensor_1 * tensor_2
49 |
50 | @staticmethod
51 | def expand_dims_if_necessary(tensor_1, tensor_2):
52 | tensor_1_ndim = K.ndim(tensor_1)
53 | tensor_2_ndim = K.ndim(tensor_2)
54 | if tensor_1_ndim == tensor_2_ndim:
55 | return tensor_1, tensor_2
56 | elif tensor_1_ndim == tensor_2_ndim - 1:
57 | return K.expand_dims(tensor_1), tensor_2
58 | elif tensor_2_ndim == tensor_1_ndim - 1:
59 | return tensor_1, K.expand_dims(tensor_2)
60 | else:
61 | raise RuntimeError("Can't multiply two tensors with ndims "
62 | "{} and {}".format(tensor_1_ndim, tensor_2_ndim))
63 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/permute.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | from keras import backend as K
4 | from overrides import overrides
5 |
6 | from ..masked_layer import MaskedLayer
7 |
8 |
9 | class Permute(MaskedLayer):
10 | """
11 | This ``Layer`` calls ``K.permute_dimensions`` on both the input and the mask.
12 |
13 | If the mask is not ``None``, it must have the same shape as the input.
14 |
15 | Input:
16 | - A tensor of arbitrary shape.
17 |
18 | Output:
19 | - A tensor with permuted dimensions.
20 | """
21 | def __init__(self, pattern: Tuple[int], **kwargs):
22 | self.pattern = pattern
23 | super(Permute, self).__init__(**kwargs)
24 |
25 | @overrides
26 | def compute_mask(self, inputs, mask=None):
27 | # pylint: disable=unused-argument
28 | if mask is None:
29 | return None
30 | return K.permute_dimensions(mask, self.pattern)
31 |
32 | @overrides
33 | def compute_output_shape(self, input_shape):
34 | return tuple([input_shape[i] for i in self.pattern])
35 |
36 | @overrides
37 | def call(self, inputs, mask=None):
38 | return K.permute_dimensions(inputs, pattern=self.pattern)
39 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/repeat.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class Repeat(MaskedLayer):
8 | """
9 | This ``Layer`` calls ``K.repeat_elements`` on both the input and the mask, after calling
10 | ``K.expand_dims``.
11 |
12 | If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis
13 | parameter as we do for the input.
14 |
15 | Input:
16 | - A tensor of arbitrary shape.
17 |
18 | Output:
19 | - The input tensor repeated along one of the dimensions.
20 |
21 | Parameters
22 | ----------
23 | axis: int
24 | We will add a dimension to the input tensor at this axis.
25 | repetitions: int
26 | The new dimension will have this size to it, with each slice being identical to the
27 | original input tensor.
28 | """
29 | def __init__(self, axis: int, repetitions: int, **kwargs):
30 | self.axis = axis
31 | self.repetitions = repetitions
32 | super(Repeat, self).__init__(**kwargs)
33 |
34 | @overrides
35 | def compute_mask(self, inputs, mask=None):
36 | # pylint: disable=unused-argument
37 | if mask is None:
38 | return None
39 | return self.__repeat_tensor(mask)
40 |
41 | @overrides
42 | def compute_output_shape(self, input_shape):
43 | return input_shape[:self.axis] + (self.repetitions,) + input_shape[self.axis:]
44 |
45 | @overrides
46 | def call(self, inputs, mask=None):
47 | return self.__repeat_tensor(inputs)
48 |
49 | def __repeat_tensor(self, tensor):
50 | return K.repeat_elements(K.expand_dims(tensor, self.axis), self.repetitions, self.axis)
51 |
52 | @overrides
53 | def get_config(self):
54 | base_config = super(Repeat, self).get_config()
55 | config = {'axis': self.axis, 'repetitions': self.repetitions}
56 | config.update(base_config)
57 | return config
58 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/repeat_like.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class RepeatLike(MaskedLayer):
8 | """
9 | This ``Layer`` is like :class:`~.repeat.Repeat`, but gets the number of repetitions to use from
10 | a second input tensor. This allows doing a number of repetitions that is unknown at graph
11 | compilation time, and is necessary when the ``repetitions`` argument to ``Repeat`` would be
12 | ``None``.
13 |
14 | If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis
15 | parameter as we do for the input.
16 |
17 | Input:
18 | - A tensor of arbitrary shape, which we will expand and tile.
19 | - A second tensor whose shape along one dimension we will copy
20 |
21 | Output:
22 | - The input tensor repeated along one of the dimensions.
23 |
24 | Parameters
25 | ----------
26 | axis: int
27 | We will add a dimension to the input tensor at this axis.
28 | copy_from_axis: int
29 | We will copy the dimension from the second tensor at this axis.
30 | """
31 | def __init__(self, axis: int, copy_from_axis: int, **kwargs):
32 | self.axis = axis
33 | self.copy_from_axis = copy_from_axis
34 | super(RepeatLike, self).__init__(**kwargs)
35 |
36 | @overrides
37 | def compute_mask(self, inputs, mask=None):
38 | # pylint: disable=unused-argument
39 | if mask is None or mask[0] is None:
40 | return None
41 | return self.__repeat_tensor(mask[0], inputs[1])
42 |
43 | @overrides
44 | def compute_output_shape(self, input_shape):
45 | return input_shape[0][:self.axis] + (input_shape[1][self.copy_from_axis],) + input_shape[0][self.axis:]
46 |
47 | @overrides
48 | def call(self, inputs, mask=None):
49 | return self.__repeat_tensor(inputs[0], inputs[1])
50 |
51 | def __repeat_tensor(self, to_repeat, to_copy):
52 | expanded = K.expand_dims(to_repeat, self.axis)
53 | ones = [1] * K.ndim(expanded)
54 | num_repetitions = K.shape(to_copy)[self.copy_from_axis]
55 | tile_shape = K.concatenate([ones[:self.axis], [num_repetitions], ones[self.axis+1:]], 0)
56 | return K.tile(expanded, tile_shape)
57 |
58 | @overrides
59 | def get_config(self):
60 | base_config = super(RepeatLike, self).get_config()
61 | config = {'axis': self.axis, 'copy_from_axis': self.copy_from_axis}
62 | config.update(base_config)
63 | return config
64 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/replace_masked_values.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ...tensors.backend import switch
5 | from ..masked_layer import MaskedLayer
6 |
7 |
8 | class ReplaceMaskedValues(MaskedLayer):
9 | """
10 | This ``Layer`` replaces all masked values in a tensor with some value. You might want to do
11 | this before passing the tensor into a layer that does a max or a min, for example, to replace
12 | all masked values with something very large or very negative. We basically just call
13 | ``switch`` on the mask.
14 |
15 | Input:
16 | - tensor: a tensor of arbitrary shape
17 |
18 | Output:
19 | - the same tensor, with masked values replaced by some input value
20 |
21 | Parameters
22 | ----------
23 | replace_with: float
24 | We will replace all masked values in the tensor with this value.
25 | """
26 | def __init__(self, replace_with: float, **kwargs):
27 | self.replace_with = replace_with
28 | super(ReplaceMaskedValues, self).__init__(**kwargs)
29 |
30 | @overrides
31 | def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument
32 | return mask
33 |
34 | @overrides
35 | def compute_output_shape(self, input_shape):
36 | return input_shape
37 |
38 | @overrides
39 | def call(self, inputs, mask=None):
40 | if mask is None:
41 | # It turns out that Keras doesn't like it if you just return inputs, so we need to
42 | # return a different tensor object. Just doing a cast apparently doesn't work, either,
43 | # so we'll add 0.
44 | return inputs + 0.0
45 | return switch(mask, inputs, K.ones_like(inputs) * self.replace_with)
46 |
47 | @overrides
48 | def get_config(self):
49 | config = {'replace_with': self.replace_with}
50 | base_config = super(ReplaceMaskedValues, self).get_config()
51 | config.update(base_config)
52 | return config
53 |
--------------------------------------------------------------------------------
/deep_qa/layers/backend/squeeze.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class Squeeze(MaskedLayer):
8 | """
9 | This ``Layer`` removes a 1-D dimension from the tensor at index ``axis``, acting as simply
10 | a layer version of the backend squeeze function.
11 |
12 | If the mask is not ``None``, it must be the same shape as the input.
13 |
14 | Input:
15 | - A tensor of arbitrary shape (having at least 3 dimensions).
16 |
17 | Output:
18 | - A tensor with the same data as ``inputs`` but reduced dimensions.
19 |
20 | Parameters
21 | ----------
22 | axis: int, optional (default=-1)
23 | The axis that we should squeeze.
24 | """
25 | def __init__(self, axis: int=-1, **kwargs):
26 | self.axis = axis
27 | super(Squeeze, self).__init__(**kwargs)
28 |
29 | @overrides
30 | def compute_mask(self, inputs, mask=None):
31 | # pylint: disable=unused-argument
32 | if mask is None:
33 | return None
34 | return K.squeeze(mask, axis=self.axis)
35 |
36 | @overrides
37 | def compute_output_shape(self, input_shape):
38 | axis = self.axis
39 | if axis < 0:
40 | axis += len(input_shape)
41 | return input_shape[:axis] + input_shape[axis+1:]
42 |
43 | @overrides
44 | def call(self, inputs, mask=None):
45 | return K.squeeze(inputs, axis=self.axis)
46 |
47 | @overrides
48 | def get_config(self):
49 | base_config = super(Squeeze, self).get_config()
50 | config = {'axis': self.axis}
51 | config.update(base_config)
52 | return config
53 |
--------------------------------------------------------------------------------
/deep_qa/layers/encoders/bag_of_words.py:
--------------------------------------------------------------------------------
1 | from overrides import overrides
2 |
3 | from keras import backend as K
4 | from keras.engine import InputSpec
5 |
6 | from ..masked_layer import MaskedLayer
7 |
8 | class BOWEncoder(MaskedLayer):
9 | '''
10 | Bag of Words Encoder takes a matrix of shape (num_words, word_dim) and returns a vector of size (word_dim),
11 | which is an average of the (unmasked) rows in the input matrix. This could have been done using a Lambda
12 | layer, except that Lambda layer does not support masking (as of Keras 1.0.7).
13 | '''
14 | def __init__(self, **kwargs):
15 | self.input_spec = [InputSpec(ndim=3)]
16 |
17 | # For consistency of handling sentence encoders, we will often get passed this parameter.
18 | # We don't use it, but Layer will complain if it's there, so we get rid of it here.
19 | kwargs.pop('units', None)
20 | super(BOWEncoder, self).__init__(**kwargs)
21 |
22 | @overrides
23 | def compute_output_shape(self, input_shape):
24 | return (input_shape[0], input_shape[2]) # removing second dimension
25 |
26 | @overrides
27 | def call(self, inputs, mask=None):
28 | # pylint: disable=redefined-variable-type
29 | if mask is None:
30 | return K.mean(inputs, axis=1)
31 | else:
32 | # Compute weights such that masked elements have zero weights and the remaining
33 | # weight is ditributed equally among the unmasked elements.
34 | # Mask (samples, num_words) has 0s for masked elements and 1s everywhere else.
35 | # Mask is of type int8. While theano would automatically make weighted_mask below
36 | # of type float32 even if mask remains int8, tensorflow would complain. Let's cast it
37 | # explicitly to remain compatible with tf.
38 | float_mask = K.cast(mask, 'float32')
39 | # Expanding dims of the denominator to make it the same shape as the numerator, epsilon added to avoid
40 | # division by zero.
41 | # (samples, num_words)
42 | weighted_mask = float_mask / (K.sum(float_mask, axis=1, keepdims=True) + K.epsilon())
43 | if K.ndim(weighted_mask) < K.ndim(inputs):
44 | weighted_mask = K.expand_dims(weighted_mask)
45 | return K.sum(inputs * weighted_mask, axis=1) # (samples, word_dim)
46 |
47 | @overrides
48 | def compute_mask(self, inputs, mask=None):
49 | # We need to override this method because Layer passes the input mask unchanged since this layer
50 | # supports masking. We don't want that. After the input is averaged, we can stop propagating
51 | # the mask.
52 | return None
53 |
--------------------------------------------------------------------------------
/deep_qa/layers/encoders/shareable_gru.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from keras.layers import GRU, InputSpec
3 |
4 |
5 | class ShareableGRU(GRU):
6 | def __init__(self, *args, **kwargs):
7 | super(ShareableGRU, self).__init__(*args, **kwargs)
8 |
9 | def call(self, x, mask=None, **kwargs):
10 | input_shape = K.int_shape(x)
11 | res = super(ShareableGRU, self).call(x, mask, **kwargs)
12 | self.input_spec = [InputSpec(shape=(self.input_spec[0].shape[0],
13 | None,
14 | self.input_spec[0].shape[2]))]
15 | if K.ndim(x) == K.ndim(res):
16 | # A recent change in Keras
17 | # (https://github.com/fchollet/keras/commit/a9b6bef0624c67d6df1618ca63d8e8141b0df4d0)
18 | # made it so that K.rnn with a tensorflow backend does not retain shape information for
19 | # the sequence length, even if it's present in the input. We need to fix that here so
20 | # that our models have the right shape information. A simple K.reshape is good enough
21 | # to fix this.
22 | result_shape = K.int_shape(res)
23 | if input_shape[1] is not None and result_shape[1] is None:
24 | shape = (input_shape[0] if input_shape[0] is not None else -1,
25 | input_shape[1], result_shape[2])
26 | res = K.reshape(res, shape=shape)
27 | return res
28 |
--------------------------------------------------------------------------------
/deep_qa/layers/entailment_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .decomposable_attention import DecomposableAttentionEntailment
2 | from .multiple_choice_tuple_entailment import MultipleChoiceTupleEntailment
3 |
4 | entailment_models = { # pylint: disable=invalid-name
5 | 'decomposable_attention': DecomposableAttentionEntailment,
6 | 'multiple_choice_tuple_attention': MultipleChoiceTupleEntailment,
7 | }
8 |
--------------------------------------------------------------------------------
/deep_qa/layers/highway.py:
--------------------------------------------------------------------------------
1 | from keras.layers import Highway as KerasHighway
2 |
3 | class Highway(KerasHighway):
4 | """
5 | Keras' `Highway` layer does not support masking, but it easily could, just by returning the
6 | mask. This `Layer` makes this possible.
7 | """
8 | def __init__(self, **kwargs):
9 | super(Highway, self).__init__(**kwargs)
10 | self.supports_masking = True
11 |
--------------------------------------------------------------------------------
/deep_qa/layers/l1_normalize.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from .masked_layer import MaskedLayer
5 | from ..tensors.backend import l1_normalize
6 |
7 |
8 | class L1Normalize(MaskedLayer):
9 | """
10 | This Layer normalizes a tensor by its L1 norm. This could just be a
11 | ``Lambda`` layer that calls our ``tensors.l1_normalize`` function,
12 | except that ``Lambda`` layers do not properly handle masked input.
13 |
14 | The expected input to this layer is a tensor of shape
15 | ``(batch_size, x)``, with an optional mask of the same shape.
16 | We also accept as input a tensor of shape ``(batch_size, x, 1)``,
17 | which will be squeezed to shape ``(batch_size, x)``
18 | (though the mask must still be of shape ``(batch_size, x)``).
19 |
20 | We give no output mask, as we expect this to only be used at the end of
21 | the model, to get a final probability distribution over class labels. If
22 | you need this to propagate the mask for your model, it would be pretty
23 | easy to change it to optionally do so - submit a PR.
24 | """
25 |
26 | def __init__(self, **kwargs):
27 | super(L1Normalize, self).__init__(**kwargs)
28 |
29 | @overrides
30 | def compute_mask(self, inputs, mask=None):
31 | # pylint: disable=unused-argument
32 | # We do not need a mask beyond this layer.
33 | return None
34 |
35 | @overrides
36 | def compute_output_shape(self, input_shape):
37 | return (input_shape[0], input_shape[1])
38 |
39 | @overrides
40 | def call(self, inputs, mask=None):
41 | if K.ndim(inputs) == 3:
42 | inputs = K.squeeze(inputs, axis=2)
43 | if K.ndim(inputs) != 2:
44 | raise ValueError("L1Normalize layer only supports inputs of shape "
45 | "(batch_size, x) or (batch_size, x, 1)")
46 | return l1_normalize(inputs, mask)
47 |
--------------------------------------------------------------------------------
/deep_qa/layers/masked_layer.py:
--------------------------------------------------------------------------------
1 | from keras.layers import Layer
2 |
3 |
4 | class MaskedLayer(Layer):
5 | """
6 | Keras 2.0 allowed for arbitrary differences in arguments to the ``call`` method of ``Layers``.
7 | As part of this, they removed the default ``mask=None`` argument, which means that if you want
8 | to implement ``call`` with a mask, you need to disable a pylint warning. Instead of disabling
9 | it in every single layer in our codebase, which could lead to uncaught errors, we'll have a
10 | single place where we disable it, and have other layers inherit from this class.
11 | """
12 | def __init__(self, **kwargs):
13 | self.supports_masking = True
14 | super(MaskedLayer, self).__init__(**kwargs)
15 |
16 | def call(self, inputs, mask=None): # pylint: disable=arguments-differ
17 | raise NotImplementedError
18 |
--------------------------------------------------------------------------------
/deep_qa/layers/subtract_minimum.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from deep_qa.layers.masked_layer import MaskedLayer
5 | from deep_qa.tensors.backend import VERY_LARGE_NUMBER
6 |
7 | class SubtractMinimum(MaskedLayer):
8 | '''
9 | This layer is used to normalize across a tensor axis. Normalization is done by finding the
10 | minimum value across the specified axis, and then subtracting that value from all values
11 | (again, across the spcified axis). Note that this also works just fine if you want to find the
12 | minimum across more than one axis.
13 |
14 | Inputs:
15 | - A tensor with arbitrary dimension, and a mask of the same shape (currently doesn't
16 | support masks with other shapes).
17 |
18 | Output:
19 | - The same tensor, with the minimum across one (or more) of the dimensions subtracted.
20 |
21 | Parameters
22 | ----------
23 | axis: int
24 | The axis (or axes) across which to find the minimum. Can be a single int, a list of ints,
25 | or None. We just call `K.min` with this parameter, so anything that's valid there works
26 | here too.
27 | '''
28 | def __init__(self, axis: int, **kwargs):
29 | self.axis = axis
30 | super(SubtractMinimum, self).__init__(**kwargs)
31 |
32 | @overrides
33 | def compute_output_shape(self, input_shape): # pylint: disable=no-self-use
34 | return input_shape
35 |
36 | @overrides
37 | def compute_mask(self, inputs, mask=None):
38 | return mask
39 |
40 | @overrides
41 | def call(self, inputs, mask=None):
42 | if mask is not None:
43 | mask_value = False if K.dtype(mask) == 'bool' else 0
44 | # Make sure masked values don't affect the input, by adding a very large number.
45 | mask_flipped_and_scaled = K.cast(K.equal(mask, mask_value), "float32") * VERY_LARGE_NUMBER
46 | minimums = K.min(inputs + mask_flipped_and_scaled, axis=self.axis, keepdims=True)
47 | else:
48 | minimums = K.min(inputs, axis=self.axis, keepdims=True)
49 | normalized = inputs - minimums
50 | return normalized
51 |
52 | @overrides
53 | def get_config(self):
54 | base_config = super(SubtractMinimum, self).get_config()
55 | config = {'axis': self.axis}
56 | config.update(base_config)
57 | return config
58 |
--------------------------------------------------------------------------------
/deep_qa/layers/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from .add_encoder_mask import AddEncoderMask
2 | from .encoder_wrapper import EncoderWrapper
3 | from .output_mask import OutputMask
4 | from .time_distributed import TimeDistributed
5 |
--------------------------------------------------------------------------------
/deep_qa/layers/wrappers/add_encoder_mask.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from overrides import overrides
3 |
4 | from ..masked_layer import MaskedLayer
5 |
6 |
7 | class AddEncoderMask(MaskedLayer):
8 | """
9 | This ``Layer`` handles masking for ``TimeDistributed`` encoders, like LSTMs, that condense
10 | sequences of vectors into single vectors (not LSTMs that return sequences; masking is already
11 | handled there correctly). Our :class:`~.encoder_wrapper.EncoderWrapper` class does the correct
12 | masking computation, but it inherits from ``TimeDistributed``, which does not work with unknown
13 | dimensions at run-time. If you want to wrap an encoder using
14 | :class:`~..backend.CollapseToBatch` and :class:`~..backend.ExpandFromBatch`, you need a way to
15 | get the mask back into the right form after running your encoder. This is an issue because
16 | Keras' encoders don't return masks when they output single vectors.
17 |
18 | For example, say you have a list of sentences, like [[5, 2, 1, 0], [2, 3, 1, 1], [0, 0, 0, 0]]
19 | (using word indices instead of embeddings for simplicity), which has been padded to be three
20 | sentences, even though only two of them are actually used. After passing it though an encoder,
21 | you'll have something like [[vector], [vector], [vector]], and you want a mask that looks like
22 | [1, 1, 0]. Keras' LSTMs and such won't give this to you. This method adds it back.
23 |
24 | Inputs:
25 | - A tensor with shape ``(batch_size, ..., encoding_dim)`` that is the output of some
26 | encoder that you got with
27 | :func:`~deep_qa.training.text_trainer.TextTrainer._get_encoder()` (not a seq2seq encoder
28 | that returns sequences).
29 | The mask for this tensor must be ``None``.
30 | - A tensor with shape ``(batch_size, ..., num_words, embedding_dim)`` that was the `input`
31 | to that encoder. The mask for this tensor must have shape ``(batch_size, ...,
32 | num_words)``.
33 |
34 | Output:
35 | - The first input tensor, with a mask computed from the second input tensor. The
36 | computation is just ``K.any()`` on the last dimension.
37 | """
38 | @overrides
39 | def compute_output_shape(self, input_shape):
40 | return input_shape[0]
41 |
42 | @overrides
43 | def compute_mask(self, inputs, mask=None):
44 | encoder_mask, embedding_mask = mask
45 | if encoder_mask is not None:
46 | raise RuntimeError("Refusing to add an encoder mask, because the tensor already has one")
47 | return K.any(embedding_mask, axis=-1)
48 |
49 | @overrides
50 | def call(self, inputs, mask=None): # pylint: disable=unused-argument
51 | # It turns out that Keras doesn't like it if you just return inputs, so we need to return a
52 | # different tensor object. Just doing a cast apparently doesn't work, either, so we'll
53 | # add 0.
54 | return inputs[0] + 0.0
55 |
--------------------------------------------------------------------------------
/deep_qa/layers/wrappers/encoder_wrapper.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from deep_qa.layers.wrappers.time_distributed import TimeDistributed
3 |
4 |
5 | class EncoderWrapper(TimeDistributed):
6 | '''
7 | This class TimeDistributes a sentence encoder, applying the encoder to several word sequences.
8 | The only difference between this and the regular TimeDistributed is in how we handle the mask.
9 | Typically, an encoder will handle masked embedded input, and return None as its mask, as it
10 | just returns a vector and no more masking is necessary. However, if the encoder is
11 | TimeDistributed, we might run into a situation where _all_ of the words in a given sequence are
12 | masked (because we padded the number of sentences, for instance). In this case, we just want
13 | to mask the entire sequence. EncoderWrapper returns a mask with the same dimension as the
14 | input sequences, where sequences are masked if _all_ of their words were masked.
15 |
16 | Notes
17 | -----
18 | For seq2seq encoders, one should use either ``TimeDistributed`` or
19 | ``TimeDistributedWithMask`` since ``EncoderWrapper`` reduces the dimensionality
20 | of the input mask.
21 | '''
22 | def compute_mask(self, x, input_mask=None):
23 | # pylint: disable=unused-argument
24 | # Input mask (coming from Embedding) will be of shape (batch_size, knowledge_length, num_words).
25 | # Output mask should be of shape (batch_size, knowledge_length) with 0s for background sentences that
26 | # are all padding.
27 | if input_mask is None:
28 | return None
29 | else:
30 | # An output bit is 0 only if the bits corresponding to all input words are 0.
31 | return K.any(input_mask, axis=-1)
32 |
--------------------------------------------------------------------------------
/deep_qa/layers/wrappers/output_mask.py:
--------------------------------------------------------------------------------
1 | from overrides import overrides
2 |
3 | from ..masked_layer import MaskedLayer
4 |
5 |
6 | class OutputMask(MaskedLayer):
7 | """
8 | This Layer is purely for debugging. You can wrap this on a layer's output to get the mask
9 | output by that layer as a model output, for easier visualization of what the model is actually
10 | doing.
11 |
12 | Don't try to use this in an actual model.
13 | """
14 | @overrides
15 | def compute_mask(self, inputs, mask=None):
16 | return None
17 |
18 | @overrides
19 | def call(self, inputs, mask=None): # pylint: disable=unused-argument
20 | return mask
21 |
--------------------------------------------------------------------------------
/deep_qa/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 |
3 | In this module we define a number of concrete models. The models are grouped by task, where each
4 | task has a roughly coherent input/output specification. See the README in each submodule for a
5 | description of the task models in that submodule are designed to solve.
6 |
7 | You should think of these models as more of "model families" than actual models, though, as there
8 | are typically options left unspecified in the models themselves. For example, models in this
9 | module might have a layer that encodes word sequences into vectors; they just call a method on
10 | `TextTrainer` to get an encoder, and the decision for which actual encoder is used (an LSTM, a
11 | CNN, or something else) happens in the parameters passed to `TextTrainer`. If you really want to,
12 | you can hard-code specific decisions for these things, but most models we have here use the
13 | `TextTrainer` API to abstract away these decisions, giving implementations of a class of similar
14 | models, instead of a single model.
15 |
16 | We also define a few general `Pretrainers` in a submodule here. The `Pretrainers` in this
17 | top-level submodule are suitable to pre-train a large class of models (e.g., any model that
18 | encodes sentences), while more task-specific `Pretrainers` are found in that task's submodule.
19 |
--------------------------------------------------------------------------------
/deep_qa/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .entailment import concrete_models as entailment_models
2 | from .sequence_tagging import concrete_models as sequence_tagging_models
3 | from .reading_comprehension import concrete_models as reading_comprehension_models
4 | from .text_classification import concrete_models as text_classification_models
5 |
6 | concrete_models = {} # pylint: disable=invalid-name
7 | __concrete_task_models = [ # pylint: disable=invalid-name
8 | entailment_models,
9 | sequence_tagging_models,
10 | reading_comprehension_models,
11 | text_classification_models,
12 | ]
13 | for models_for_task in __concrete_task_models:
14 | for model_name, model_class in models_for_task.items():
15 | if model_name in concrete_models:
16 | raise RuntimeError("Duplicate model name found: " + model_name)
17 | concrete_models[model_name] = model_class
18 |
--------------------------------------------------------------------------------
/deep_qa/models/entailment/README.md:
--------------------------------------------------------------------------------
1 | Entailment models take two sequences of text as input and make a classification decision on the
2 | pair. Typically that decision represents whether one sentence entails the other, but we'll use
3 | this family of models to represent any kind of classification decision over pairs of text.
4 |
5 | *Inputs:* Two text sequences
6 |
7 | *Output:* Some classification decision (typically "entails/not entails",
8 | "entails/neutral/contradicts", or similar)
9 |
--------------------------------------------------------------------------------
/deep_qa/models/entailment/__init__.py:
--------------------------------------------------------------------------------
1 | from .decomposable_attention import DecomposableAttention
2 |
3 | concrete_models = { # pylint: disable=invalid-name
4 | 'DecomposableAttention': DecomposableAttention,
5 | }
6 |
--------------------------------------------------------------------------------
/deep_qa/models/reading_comprehension/__init__.py:
--------------------------------------------------------------------------------
1 | from .attention_sum_reader import AttentionSumReader
2 | from .bidirectional_attention import BidirectionalAttentionFlow
3 | from .gated_attention_reader import GatedAttentionReader
4 |
5 | concrete_models = { # pylint: disable=invalid-name
6 | 'AttentionSumReader': AttentionSumReader,
7 | 'BidirectionalAttentionFlow': BidirectionalAttentionFlow,
8 | 'GatedAttentionReader': GatedAttentionReader,
9 | }
10 |
--------------------------------------------------------------------------------
/deep_qa/models/sequence_tagging/README.md:
--------------------------------------------------------------------------------
1 | Sequence tagging models take a sequence of text as input and produce as output a label for each
2 | token in the sequence. These models could do named entity recognition with BIO tags, or part of
3 | speech tagging, or other similar tasks.
4 |
--------------------------------------------------------------------------------
/deep_qa/models/sequence_tagging/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_tagger import SimpleTagger
2 |
3 | concrete_models = { # pylint: disable=invalid-name
4 | 'SimpleTagger': SimpleTagger,
5 | }
6 |
--------------------------------------------------------------------------------
/deep_qa/models/sequence_tagging/simple_tagger.py:
--------------------------------------------------------------------------------
1 | from keras.layers import Dense, Input, TimeDistributed
2 | from overrides import overrides
3 |
4 | from ...common.params import Params
5 | from ...data.instances.sequence_tagging import concrete_instances
6 | from ...training.text_trainer import TextTrainer
7 | from ...training.models import DeepQaModel
8 |
9 |
10 | class SimpleTagger(TextTrainer):
11 | """
12 | This ``SimpleTagger`` simply encodes a sequence of text with some number of stacked
13 | ``seq2seq_encoders``, then predicts a tag at each index.
14 |
15 | Parameters
16 | ----------
17 | num_stacked_rnns : int, optional (default: ``1``)
18 | The number of ``seq2seq_encoders`` that we should stack on top of each other before
19 | predicting tags.
20 | instance_type : str
21 | Specifies the particular subclass of ``TaggedSequenceInstance`` to use for loading data,
22 | which in turn defines things like how the input data is formatted and tokenized.
23 | """
24 | def __init__(self, params: Params):
25 | self.num_stacked_rnns = params.pop('num_stacked_rnns', 1)
26 | instance_type_choice = params.pop_choice("instance_type", concrete_instances.keys())
27 | self.instance_type = concrete_instances[instance_type_choice]
28 | super(SimpleTagger, self).__init__(params)
29 |
30 | @overrides
31 | def _instance_type(self): # pylint: disable=no-self-use
32 | return self.instance_type
33 |
34 | @overrides
35 | def _build_model(self):
36 | # shape: (batch_size, text_length)
37 | text_input = Input(shape=self._get_sentence_shape(), dtype='int32', name='text_input')
38 | # shape: (batch_size, text_length, embedding_dim)
39 | text_embedding = self._embed_input(text_input)
40 | for i in range(self.num_stacked_rnns):
41 | encoder = self._get_seq2seq_encoder(name="encoder_{}".format(i),
42 | fallback_behavior="use default params")
43 | # shape still (batch_size, text_length, embedding_dim)
44 | text_embedding = encoder(text_embedding)
45 | # The -2 below is because we are ignoring the padding and unknown tokens that the
46 | # DataIndexer has by default.
47 | predicted_tags = TimeDistributed(Dense(self.data_indexer.get_vocab_size('tags') - 2,
48 | activation='softmax'))(text_embedding)
49 | return DeepQaModel(input=text_input, output=predicted_tags)
50 |
51 | @overrides
52 | def _set_padding_lengths_from_model(self):
53 | self._set_text_lengths_from_model_input(self.model.get_input_shape_at(0)[1:])
54 |
--------------------------------------------------------------------------------
/deep_qa/models/text_classification/README.md:
--------------------------------------------------------------------------------
1 | Text classification models take a sequence of text as input and classify it into one of several
2 | classes.
3 |
4 | *Input:* Text sequence
5 |
6 | *Output:* Class label
7 |
--------------------------------------------------------------------------------
/deep_qa/models/text_classification/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification_model import ClassificationModel
2 |
3 | concrete_models = { # pylint: disable=invalid-name
4 | 'ClassificationModel': ClassificationModel,
5 | }
6 |
--------------------------------------------------------------------------------
/deep_qa/tensors/README.md:
--------------------------------------------------------------------------------
1 | This module contains convenience functions for working with Keras tensors. Typically these
2 | functions will be called inside some `Layer` class.
3 |
--------------------------------------------------------------------------------
/deep_qa/tensors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/tensors/__init__.py
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/README.md:
--------------------------------------------------------------------------------
1 | Similarity functions take a pair of tensors with the same shape, and compute a similarity function
2 | on the vectors in the last dimension. For example, the tensors might both have shape
3 | `(batch_size, sentence_length, embedding_dim)`, and we will compute some function of the two
4 | vectors of length `embedding_dim` for each position `(batch_size, sentence_length)`, returning a
5 | tensor of shape `(batch_size, sentence_length)`.
6 |
7 | The similarity function could be as simple as a dot product, or it could be a more complex,
8 | parameterized function. The SimilarityFunction class exposes an API for a Layer that wants to
9 | allow for multiple similarity functions, such as for initializing and returning weights.
10 |
11 | If you want to compute a similarity between tensors of different sizes, you need to first tile them
12 | in the appropriate dimensions to make them the same before you can use these functions. The
13 | Attention and MatrixAttention layers do this.
14 |
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | from .bilinear import Bilinear
4 | from .dot_product import DotProduct
5 | from .linear import Linear
6 | from .cosine_similarity import CosineSimilarity
7 |
8 | # The first item added here will be used as the default in some cases.
9 | similarity_functions = OrderedDict() # pylint: disable=invalid-name
10 | similarity_functions['dot_product'] = DotProduct
11 | similarity_functions['bilinear'] = Bilinear
12 | similarity_functions['linear'] = Linear
13 | similarity_functions['cosine_similarity'] = CosineSimilarity
14 |
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/bilinear.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from keras import backend as K
4 | from overrides import overrides
5 |
6 | from .similarity_function import SimilarityFunction
7 |
8 |
9 | class Bilinear(SimilarityFunction):
10 | """
11 | This similarity function performs a bilinear transformation of the two input vectors. This
12 | function has a matrix of weights W and a bias b, and the similarity between two vectors x and y
13 | is computed as `x^T W y + b`.
14 | """
15 | def __init__(self, **kwargs):
16 | super(Bilinear, self).__init__(**kwargs)
17 | self.weight_matrix = None
18 | self.bias = None
19 |
20 | @overrides
21 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']:
22 | self.weight_matrix = K.variable(self.init((tensor_1_dim, tensor_2_dim)),
23 | name=self.name + "_weights")
24 | self.bias = K.variable(self.init((1,)), name=self.name + "_bias")
25 | return [self.weight_matrix, self.bias]
26 |
27 | @overrides
28 | def compute_similarity(self, tensor_1, tensor_2):
29 | dot_product = K.sum(K.dot(tensor_1, self.weight_matrix) * tensor_2, axis=-1)
30 | return self.activation(dot_product + self.bias)
31 |
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/cosine_similarity.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from keras import backend as K
4 | from overrides import overrides
5 |
6 | from ...common.checks import ConfigurationError
7 | from .similarity_function import SimilarityFunction
8 |
9 |
10 | class CosineSimilarity(SimilarityFunction):
11 | """
12 | This similarity function simply computes the cosine similarity between each pair of vectors. It has
13 | no parameters.
14 | """
15 | def __init__(self, **kwargs):
16 | super(CosineSimilarity, self).__init__(**kwargs)
17 |
18 | @overrides
19 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']:
20 | if tensor_1_dim != tensor_2_dim:
21 | raise ConfigurationError("Tensor dims must match for cosine product similarity, but "
22 | "were {} and {}".format(tensor_1_dim, tensor_2_dim))
23 | return []
24 |
25 | @overrides
26 | def compute_similarity(self, tensor_1, tensor_2):
27 | return K.sum(K.l2_normalize(tensor_1, axis=-1) * K.l2_normalize(tensor_2, axis=-1),
28 | axis=-1)
29 |
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/dot_product.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from keras import backend as K
4 | from overrides import overrides
5 |
6 | from ...common.checks import ConfigurationError
7 | from .similarity_function import SimilarityFunction
8 |
9 |
10 | class DotProduct(SimilarityFunction):
11 | """
12 | This similarity function simply computes the dot product between each pair of vectors. It has
13 | no parameters.
14 | """
15 | def __init__(self, **kwargs):
16 | super(DotProduct, self).__init__(**kwargs)
17 |
18 | @overrides
19 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']:
20 | if tensor_1_dim != tensor_2_dim:
21 | raise ConfigurationError("Tensor dims must match for dot product similarity, but "
22 | "were {} and {}".format(tensor_1_dim, tensor_2_dim))
23 | return []
24 |
25 | @overrides
26 | def compute_similarity(self, tensor_1, tensor_2):
27 | return K.sum(tensor_1 * tensor_2, axis=-1)
28 |
--------------------------------------------------------------------------------
/deep_qa/tensors/similarity_functions/similarity_function.py:
--------------------------------------------------------------------------------
1 | """
2 | Similarity functions take a pair of tensors with the same shape, and compute a similarity function
3 | on the vectors in the last dimension. For example, the tensors might both have shape
4 | `(batch_size, sentence_length, embedding_dim)`, and we will compute some function of the two
5 | vectors of length `embedding_dim` for each position `(batch_size, sentence_length)`, returning a
6 | tensor of shape `(batch_size, sentence_length)`.
7 |
8 | The similarity function could be as simple as a dot product, or it could be a more complex,
9 | parameterized function. The SimilarityFunction class exposes an API for a Layer that wants to
10 | allow for multiple similarity functions, such as for initializing and returning weights.
11 |
12 | If you want to compute a similarity between tensors of different sizes, you need to first tile them
13 | in the appropriate dimensions to make them the same before you can use these functions. The
14 | Attention and MatrixAttention layers do this.
15 | """
16 | from typing import List
17 |
18 | from keras import activations, initializers
19 |
20 | class SimilarityFunction:
21 | def __init__(self, name: str, initialization: str='glorot_uniform', activation: str='linear'):
22 | self.name = name
23 | self.init = initializers.get(initialization)
24 | self.activation = activations.get(activation)
25 |
26 | def initialize_weights(self, tensor_1_dim: int, tensor_2_dim: int) -> List['K.variable']:
27 | """
28 | Called in a `Layer.build()` method that uses this SimilarityFunction, here we both
29 | initialize whatever weights are necessary for this similarity function, and return them so
30 | they can be included in `Layer.trainable_weights`.
31 |
32 |
33 | Parameters
34 | ----------
35 | tensor_1_dim : int
36 | The last dimension (typically ``embedding_dim``) of the first input tensor. We need
37 | this so we can initialize weights appropriately.
38 | tensor_2_dim : int
39 | The last dimension (typically ``embedding_dim``) of the second input tensor. We need
40 | this so we can initialize weights appropriately.
41 | """
42 | raise NotImplementedError
43 |
44 | def compute_similarity(self, tensor_1, tensor_2):
45 | """
46 | Takes two tensors of the same shape, such as (batch_size, length_1, length_2,
47 | embedding_dim). Computes a (possibly parameterized) similarity on the final dimension and
48 | returns a tensor with one less dimension, such as (batch_size, length_1, length_2).
49 | """
50 | raise NotImplementedError
51 |
--------------------------------------------------------------------------------
/deep_qa/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/deep_qa/testing/__init__.py
--------------------------------------------------------------------------------
/deep_qa/training/README.md:
--------------------------------------------------------------------------------
1 | # Trainers
2 |
3 | `Trainers` specify data, a model, and a way to train the model with the data. This module groups
4 | all of the common code related to these things, making only minimal assumptions about what kind of
5 | data you're using or what the structure of your model is. Really, a `Trainer` is just a nicer
6 | interface to a Keras `Model`, we just call it something else to not create too much naming
7 | confusion, and because the `Trainer` class provides a lot of functionality around training the
8 | model that a Keras `Model` doesn't.
9 |
10 | We also deal with the notion of _pre-training_ in this module. A `Pretrainer` is a `Trainer` that
11 | depends on another `Trainer`, building its model using pieces of the enclosed `Trainer`, so that
12 | training the `Pretrainer` updates the weights in the enclosed `Trainer` object.
13 |
--------------------------------------------------------------------------------
/deep_qa/training/__init__.py:
--------------------------------------------------------------------------------
1 | from .text_trainer import TextTrainer
2 | from .trainer import Trainer
3 |
--------------------------------------------------------------------------------
/deep_qa/training/optimizers.py:
--------------------------------------------------------------------------------
1 | r"""
2 | It turns out that Keras' design is somewhat crazy\*, and there is no list of
3 | optimizers that you can just import from Keras. So, this module specifies a
4 | list, and a helper function or two for dealing with optimizer parameters.
5 | Unfortunately, this means that we have a list that must be kept in sync with
6 | Keras. Oh well.
7 |
8 | \* Have you seen their get_from_module() method? See here:
9 | https://github.com/fchollet/keras/blob/6e42b0e4a77fb171295b541a6ae9a3a4a79f9c87/keras/utils/generic_utils.py#L10.
10 | That method means I could pass in 'clip_norm' as an optimizer, and it would try
11 | to use that function as an optimizer. It also means there is no simple list of
12 | implemented optimizers I can grab.
13 |
14 | \* I should also note that Keras is an incredibly useful library that does a lot
15 | of things really well. It just has a few quirks...
16 | """
17 | import logging
18 | from typing import Union
19 |
20 | # pylint: disable=no-name-in-module
21 | from tensorflow.python.training.gradient_descent import GradientDescentOptimizer
22 | from tensorflow.python.training.rmsprop import RMSPropOptimizer
23 | from tensorflow.python.training.adadelta import AdadeltaOptimizer
24 | from tensorflow.python.training.adagrad import AdagradOptimizer
25 | from tensorflow.python.training.adam import AdamOptimizer
26 | # pylint: enable=no-name-in-module
27 | from ..common.params import Params
28 |
29 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
30 |
31 |
32 | optimizers = { # pylint: disable=invalid-name
33 | 'sgd': GradientDescentOptimizer,
34 | 'rmsprop': RMSPropOptimizer,
35 | 'adagrad': AdagradOptimizer,
36 | 'adadelta': AdadeltaOptimizer,
37 | 'adam': AdamOptimizer
38 | }
39 |
40 |
41 | def optimizer_from_params(params: Union[Params, str]):
42 | """
43 | This method converts from a parameter object like we use in our Trainer
44 | code into an optimizer object suitable for use with Keras. The simplest
45 | case for both of these is a string that shows up in `optimizers` above - if
46 | `params` is just one of those strings, we return it, and everyone is happy.
47 | If not, we assume `params` is a Dict[str, Any], with a "type" key, where
48 | the value for "type" must be one of those strings above. We take the rest
49 | of the parameters and pass them to the optimizer's constructor.
50 |
51 | """
52 | if isinstance(params, str):
53 | optimizer = params
54 | params = {}
55 | else:
56 | optimizer = params.pop_choice("type", optimizers.keys())
57 | return optimizers[optimizer](**params)
58 |
--------------------------------------------------------------------------------
/doc/_static/custom.css:
--------------------------------------------------------------------------------
1 | .toggle .header {
2 | display: block;
3 | clear: both;
4 | }
5 |
6 | .toggle .header:after {
7 | content: " ▼";
8 | }
9 |
10 | .toggle .header.open:after {
11 | content: " ▲";
12 | }
13 |
14 | .wy-nav-content a.internal code span.pre {
15 | color: blue;
16 | text-decoration: underline;
17 | }
18 |
--------------------------------------------------------------------------------
/doc/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {% set css_files = css_files + ["_static/custom.css"] %}
4 |
5 | {% block footer %}
6 |
16 | {% endblock %}
17 |
--------------------------------------------------------------------------------
/doc/common/about_common.rst:
--------------------------------------------------------------------------------
1 | Common Utils
2 | ============
3 |
4 | Here are some general utilities that we've written to help in other parts of the
5 | code base.
6 |
--------------------------------------------------------------------------------
/doc/common/checks.rst:
--------------------------------------------------------------------------------
1 | Checks
2 | ======
3 |
4 | .. automodule:: deep_qa.common.checks
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/doc/common/params.rst:
--------------------------------------------------------------------------------
1 | Parameter Utils
2 | ===============
3 |
4 | .. automodule:: deep_qa.common.params
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/doc/data/about_data.rst:
--------------------------------------------------------------------------------
1 | About Data
2 | ==========
3 |
4 | This module contains code for processing data. There's a DataIndexer, whose job
5 | it is to convert from strings to word (or character) indices suitable for use
6 | with an embedding matrix. There's code to load pre-trained embeddings from a
7 | file, to tokenize sentences, and, most importantly, to convert training and
8 | testing examples into numpy arrays that can be used with Keras.
9 |
10 | The most important thing to understand about the data processing code is the
11 | Dataset object. A Dataset is a collection of Instances, which are the individual
12 | examples used for training and testing. Dataset has two subclasses: TextDataset,
13 | which contains Instances with raw strings and can be read directly from a file,
14 | and IndexedDataset, which contains Instances whose raw strings have been
15 | converted to word (or character) indices. The IndexedDataset has methods for
16 | padding sequences to a consistent length, so that models can be compiled, and
17 | for converting the Instances to numpy arrays. The file formats read by
18 | TextDataset, and the format of the numpy arrays produced by IndexedDataset, are
19 | determined by the underlying Instance type used by the Dataset. See the
20 | instances module for more detail on this.
21 |
--------------------------------------------------------------------------------
/doc/data/data_generator.rst:
--------------------------------------------------------------------------------
1 | Data Generators
2 | ***************
3 |
4 | .. automodule:: deep_qa.data.data_generator
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
--------------------------------------------------------------------------------
/doc/data/datasets.rst:
--------------------------------------------------------------------------------
1 | Datasets
2 | ========
3 |
4 |
5 | deep_qa.data.dataset
6 | --------------------
7 |
8 | .. automodule:: deep_qa.data.datasets.dataset
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 | Entailment
14 | ----------
15 |
16 | .. automodule:: deep_qa.data.datasets.entailment.snli_dataset
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 |
21 | Language Modeling
22 | -----------------
23 |
24 | .. automodule:: deep_qa.data.datasets.language_modeling.language_modeling_dataset
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
--------------------------------------------------------------------------------
/doc/data/entailment.rst:
--------------------------------------------------------------------------------
1 | Entailment Instances
2 | ====================
3 |
4 | These ``Instances`` are designed for an entailment task, where the input is a pair of sentences
5 | (or larger text sequences) and the output is a classification decision.
6 |
7 | SentencePairInstances
8 | ---------------------
9 |
10 | .. automodule:: deep_qa.data.instances.entailment.sentence_pair_instance
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | SnliInstances
16 | -------------
17 |
18 | .. automodule:: deep_qa.data.instances.entailment.snli_instance
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
--------------------------------------------------------------------------------
/doc/data/general_data_utils.rst:
--------------------------------------------------------------------------------
1 | General Data Utils
2 | ==================
3 |
4 | deep_qa.data.data_indexer
5 | -------------------------
6 |
7 | .. automodule:: deep_qa.data.data_indexer
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | deep_qa.data.embeddings
13 | -----------------------
14 |
15 | .. automodule:: deep_qa.data.embeddings
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/doc/data/instances.rst:
--------------------------------------------------------------------------------
1 | Base Instances
2 | ==============
3 |
4 | An :class:`~deep_qa.data.instances.instance.Instance` is a single training or testing example for a Keras model. The base classes for
5 | working with ``Instances`` are found in instance.py. There are two subclasses: (1)
6 | :class:`~deep_qa.data.instances.instance.TextInstance`, which is a raw instance that contains
7 | actual strings, and can be used to determine a vocabulary for a model, or read directly from a
8 | file; and (2) :class:`~deep_qa.data.instances.instance.IndexedInstance`, which has had its raw
9 | strings converted to word (or character) indices, and can be padded to a consistent length and
10 | converted to numpy arrays for use with Keras.
11 |
12 | Concrete ``Instance`` classes are organized in the code by the task they are designed for (e.g.,
13 | text classification, reading comprehension, sequence tagging, etc.).
14 |
15 | A lot of the magic of how the DeepQA library works happens here, in the concrete Instance classes
16 | in this module. Most of the code can be totally agnostic to how exactly the input is structured,
17 | because the conversion to numpy arrays happens here, not in the Trainer or TextTrainer classes,
18 | with only the specific ``_build_model()`` methods needing to know about the format of their input
19 | and output (and even some of the details there are transparent to the model class).
20 |
21 | .. automodule:: deep_qa.data.instances.instance
22 | :members:
23 | :undoc-members:
24 | :show-inheritance:
25 |
--------------------------------------------------------------------------------
/doc/data/reading_comprehension.rst:
--------------------------------------------------------------------------------
1 | Reading Comprehension Instances
2 | ===============================
3 |
4 | These ``Instances`` are designed for the set of tasks known today as "reading comprehension", where
5 | the input is a natural language question, a passage, and (optionally) some number of answer
6 | options, and the output is either a (span begin index, span end index) decision over the passage,
7 | or a classification decision over the answer options (if provided).
8 |
9 |
10 | QuestionPassageInstances
11 | ------------------------
12 |
13 | .. automodule:: deep_qa.data.instances.reading_comprehension.question_passage_instance
14 | :members:
15 | :undoc-members:
16 | :show-inheritance:
17 |
18 | McQuestionPassageInstances
19 | --------------------------
20 |
21 | .. automodule:: deep_qa.data.instances.reading_comprehension.mc_question_passage_instance
22 | :members:
23 | :undoc-members:
24 | :show-inheritance:
25 |
26 | CharacterSpanInstances
27 | ----------------------
28 |
29 | .. automodule:: deep_qa.data.instances.reading_comprehension.character_span_instance
30 | :members:
31 | :undoc-members:
32 | :show-inheritance:
33 |
--------------------------------------------------------------------------------
/doc/data/sequence_tagging.rst:
--------------------------------------------------------------------------------
1 | Sequence Tagging Instances
2 | ==========================
3 |
4 | These ``Instances`` are designed for a sequence tagging task, where the input is a passage of
5 | natural language (e.g., a sentence), and the output is some classification decision for each token
6 | in that passage (e.g., part-of-speech tags, any kind of BIO tagging like NER or chunking, etc.).
7 |
8 | TaggingInstances
9 | ----------------
10 |
11 | .. automodule:: deep_qa.data.instances.sequence_tagging.tagging_instance
12 | :members:
13 | :undoc-members:
14 | :show-inheritance:
15 |
16 | PretokenizedTaggingInstances
17 | ----------------------------
18 |
19 | .. automodule:: deep_qa.data.instances.sequence_tagging.pretokenized_tagging_instance
20 | :members:
21 | :undoc-members:
22 | :show-inheritance:
23 |
--------------------------------------------------------------------------------
/doc/data/text_classification.rst:
--------------------------------------------------------------------------------
1 | Text Classification Instances
2 | *****************************
3 |
4 | These ``Instances`` are designed for any classification task over a single passage of text. The
5 | input is the passage (e.g., a sentence, a document, etc.), and the output is a single label (e.g.,
6 | positive / negative sentiment, spam / not spam, essay grade, etc.).
7 |
8 | TextClassificationInstances
9 | ---------------------------
10 |
11 | .. automodule:: deep_qa.data.instances.text_classification.text_classification_instance
12 | :members:
13 | :undoc-members:
14 | :show-inheritance:
15 |
--------------------------------------------------------------------------------
/doc/data/tokenizers.rst:
--------------------------------------------------------------------------------
1 | Tokenizers
2 | ==========
3 |
4 |
5 | character_tokenizer
6 | -------------------
7 |
8 | .. automodule:: deep_qa.data.tokenizers.character_tokenizer
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 | tokenizer
14 | ---------
15 |
16 | .. automodule:: deep_qa.data.tokenizers.tokenizer
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 |
21 | word_and_character_tokenizer
22 | ----------------------------
23 |
24 | .. automodule:: deep_qa.data.tokenizers.word_and_character_tokenizer
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
28 |
29 | word_splitter
30 | -------------
31 |
32 | .. automodule:: deep_qa.data.tokenizers.word_splitter
33 | :members:
34 | :undoc-members:
35 | :show-inheritance:
36 |
37 | tokenizers.word_tokenizer
38 | -------------------------
39 |
40 | .. automodule:: deep_qa.data.tokenizers.word_tokenizer
41 | :members:
42 | :undoc-members:
43 | :show-inheritance:
44 |
--------------------------------------------------------------------------------
/doc/img/module_breakdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/doc/img/module_breakdown.png
--------------------------------------------------------------------------------
/doc/layers/about_layers.rst:
--------------------------------------------------------------------------------
1 | About Layers
2 | ============
3 |
4 | Custom layers that we have implemented belong here. These include things like
5 | knowledge encoders (which encode the memory component of a memory network),
6 | knowledge selectors (which perform an attention over the memory), and entailment
7 | models. There's also an encoders submodule, containing sentence encoders that
8 | convert an embedded word (or character) sequence into a vector.
9 |
--------------------------------------------------------------------------------
/doc/layers/attention.rst:
--------------------------------------------------------------------------------
1 | Attention
2 | =========
3 |
4 | Attention
5 | ---------
6 |
7 | .. automodule:: deep_qa.layers.attention.attention
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | GatedAttention
13 | --------------
14 |
15 | .. automodule:: deep_qa.layers.attention.gated_attention
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | MaskedSoftmax
21 | -------------
22 |
23 | .. automodule:: deep_qa.layers.attention.masked_softmax
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | MatrixAttention
29 | ---------------
30 |
31 | .. automodule:: deep_qa.layers.attention.matrix_attention
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 | MaxSimilaritySoftmax
37 | --------------------
38 |
39 | .. automodule:: deep_qa.layers.attention.max_similarity_softmax
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 | WeightedSum
45 | -----------
46 |
47 | .. automodule:: deep_qa.layers.attention.weighted_sum
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
51 |
--------------------------------------------------------------------------------
/doc/layers/backend.rst:
--------------------------------------------------------------------------------
1 | Backend Layers
2 | ==============
3 |
4 | Layers in this module generally just implement some simple operation from the Keras backend as a
5 | Layer. The reason we have these as Layers is largely so that we can properly handle masking.
6 |
7 | AddMask
8 | -------
9 |
10 | .. automodule:: deep_qa.layers.backend.add_mask
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | BatchDot
16 | --------
17 |
18 | .. automodule:: deep_qa.layers.backend.batch_dot
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | CollapseToBatch
24 | ---------------
25 |
26 | .. automodule:: deep_qa.layers.backend.collapse_to_batch
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | ExpandFromBatch
32 | ---------------
33 |
34 | .. automodule:: deep_qa.layers.backend.expand_from_batch
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | Envelope
40 | --------
41 |
42 | .. automodule:: deep_qa.layers.backend.envelope
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 | Max
48 | ---
49 |
50 | .. automodule:: deep_qa.layers.backend.max
51 | :members:
52 | :undoc-members:
53 | :show-inheritance:
54 |
55 | Permute
56 | -------
57 |
58 | .. automodule:: deep_qa.layers.backend.permute
59 | :members:
60 | :undoc-members:
61 | :show-inheritance:
62 |
63 | Repeat
64 | ------
65 |
66 | .. automodule:: deep_qa.layers.backend.repeat
67 | :members:
68 | :undoc-members:
69 | :show-inheritance:
70 |
71 | RepeatLike
72 | ----------
73 |
74 | .. automodule:: deep_qa.layers.backend.repeat_like
75 | :members:
76 | :undoc-members:
77 | :show-inheritance:
78 |
--------------------------------------------------------------------------------
/doc/layers/core_layers.rst:
--------------------------------------------------------------------------------
1 | Core Layers
2 | ===========
3 |
4 | Additive
5 | --------
6 |
7 | .. automodule:: deep_qa.layers.additive
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | BiGRUIndexSelector
13 | ------------------
14 |
15 | .. automodule:: deep_qa.layers.bigru_index_selector
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | ComplexConcat
21 | -------------
22 |
23 | .. automodule:: deep_qa.layers.complex_concat
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | Highway
29 | -------
30 |
31 | .. automodule:: deep_qa.layers.highway
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 | L1Normalize
37 | -----------
38 |
39 | .. automodule:: deep_qa.layers.l1_normalize
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 | NoisyOr
45 | -------
46 |
47 | .. automodule:: deep_qa.layers.noisy_or
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
51 |
52 | OptionAttentionSum
53 | ------------------
54 |
55 | .. automodule:: deep_qa.layers.option_attention_sum
56 | :members:
57 | :undoc-members:
58 | :show-inheritance:
59 |
60 | Overlap
61 | -------
62 |
63 | .. automodule:: deep_qa.layers.overlap
64 | :members:
65 | :undoc-members:
66 | :show-inheritance:
67 |
68 | SubtractMinimum
69 | ---------------
70 |
71 | .. automodule:: deep_qa.layers.subtract_minimum
72 | :members:
73 | :undoc-members:
74 | :show-inheritance:
75 |
76 | VectorMatrixMerge
77 | -----------------
78 |
79 | .. automodule:: deep_qa.layers.vector_matrix_merge
80 | :members:
81 | :undoc-members:
82 | :show-inheritance:
83 |
84 | VectorMatrixSplit
85 | -----------------
86 |
87 | .. automodule:: deep_qa.layers.vector_matrix_split
88 | :members:
89 | :undoc-members:
90 | :show-inheritance:
91 |
--------------------------------------------------------------------------------
/doc/layers/encoders.rst:
--------------------------------------------------------------------------------
1 | Encoders
2 | ========
3 |
4 | BagOfWords
5 | ----------
6 |
7 | .. automodule:: deep_qa.layers.encoders.bag_of_words
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | ConvolutionalEncoder
13 | --------------------
14 |
15 | .. automodule:: deep_qa.layers.encoders.convolutional_encoder
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | PositionalEncoder
21 | -----------------
22 |
23 | .. automodule:: deep_qa.layers.encoders.positional_encoder
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | AttentiveGRU
29 | ------------
30 |
31 | .. automodule:: deep_qa.layers.encoders.attentive_gru
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 |
--------------------------------------------------------------------------------
/doc/layers/entailment_models.rst:
--------------------------------------------------------------------------------
1 | Entailment Model Layers
2 | =======================
3 |
4 | DecomposableAttention
5 | ---------------------
6 |
7 | .. automodule:: deep_qa.layers.entailment_models.decomposable_attention
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | MultipleChoiceTupleEntailment
13 | -----------------------------
14 |
15 | .. automodule:: deep_qa.layers.entailment_models.multiple_choice_tuple_entailment
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | WordAlignment
21 | -------------
22 |
23 | .. automodule:: deep_qa.layers.entailment_models.word_alignment
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
--------------------------------------------------------------------------------
/doc/layers/wrappers.rst:
--------------------------------------------------------------------------------
1 | Wrappers
2 | ========
3 |
4 | EncoderWrapper
5 | --------------
6 |
7 | .. automodule:: deep_qa.layers.wrappers.encoder_wrapper
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | OutputMask
13 | ----------
14 |
15 | .. automodule:: deep_qa.layers.wrappers.output_mask
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | TimeDistributed
21 | ---------------
22 |
23 | .. automodule:: deep_qa.layers.wrappers.time_distributed
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
--------------------------------------------------------------------------------
/doc/models/entailment.rst:
--------------------------------------------------------------------------------
1 | Entailment Models
2 | =================
3 |
4 | Entailment models take two sequences of text as input and make a classification
5 | decision on the pair. Typically that decision represents whether one sentence
6 | entails the other, but we'll use this family of models to represent any kind of
7 | classification decision over pairs of text.
8 |
9 | Inputs: Two text sequences
10 |
11 | Output: Some classification decision (typically "entails/not entails", "entails/neutral/contradicts", or similar)
12 |
13 |
14 | DecomposableAttention
15 | ---------------------
16 |
17 | .. automodule:: deep_qa.models.entailment.decomposable_attention
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
--------------------------------------------------------------------------------
/doc/models/reading_comprehension.rst:
--------------------------------------------------------------------------------
1 | Reading Comprehension
2 | =====================
3 |
4 | AttentionSumReader
5 | ------------------
6 |
7 | .. automodule:: deep_qa.models.reading_comprehension.attention_sum_reader
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | BidirectionalAttentionFlow
13 | --------------------------
14 |
15 | .. automodule:: deep_qa.models.reading_comprehension.bidirectional_attention
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | GatedAttentionReader
21 | --------------------
22 |
23 | .. automodule:: deep_qa.models.reading_comprehension.gated_attention_reader
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
--------------------------------------------------------------------------------
/doc/models/text_classification.rst:
--------------------------------------------------------------------------------
1 | Text Classification
2 | ===================
3 |
4 | Text classification models take a sequence of text as input and classify it into
5 | one of several classes.
6 |
7 | Input: Text sequence
8 |
9 | Output: Class label
10 |
11 |
12 | ClassificationModel
13 | -------------------
14 |
15 | .. automodule:: deep_qa.models.text_classification.classification_model
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/doc/run.rst:
--------------------------------------------------------------------------------
1 | Running Models
2 | ==============
3 |
4 | .. automodule:: deep_qa.run
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
--------------------------------------------------------------------------------
/doc/tensors/about_tensors.rst:
--------------------------------------------------------------------------------
1 | Tensor Utils
2 | ============
3 |
4 | Here are some general tensor manipulation utilities that we've written to help
5 | in other parts of the code base.
6 |
--------------------------------------------------------------------------------
/doc/tensors/core_tensors.rst:
--------------------------------------------------------------------------------
1 | Core Tensor Utils
2 | =================
3 |
4 | backend
5 | -------
6 |
7 | .. automodule:: deep_qa.tensors.backend
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | masked_operations
13 | -----------------
14 |
15 | .. automodule:: deep_qa.tensors.masked_operations
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/doc/tensors/similarity_functions.rst:
--------------------------------------------------------------------------------
1 | Similarity Functions
2 | ====================
3 |
4 | .. automodule:: deep_qa.tensors.similarity_functions
5 |
6 | bilinear
7 | --------
8 |
9 | .. automodule:: deep_qa.tensors.similarity_functions.bilinear
10 | :members:
11 | :undoc-members:
12 | :show-inheritance:
13 |
14 | cosine_similarity
15 | -----------------
16 |
17 | .. automodule:: deep_qa.tensors.similarity_functions.cosine_similarity
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 |
22 | dot_product
23 | -----------
24 |
25 | .. automodule:: deep_qa.tensors.similarity_functions.dot_product
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 |
30 | linear
31 | ------
32 |
33 | .. automodule:: deep_qa.tensors.similarity_functions.linear
34 | :members:
35 | :undoc-members:
36 | :show-inheritance:
37 |
38 | similarity_function
39 | -------------------
40 |
41 | .. automodule:: deep_qa.tensors.similarity_functions.similarity_function
42 | :members:
43 | :undoc-members:
44 | :show-inheritance:
45 |
--------------------------------------------------------------------------------
/doc/training/about_trainers.rst:
--------------------------------------------------------------------------------
1 | About Trainers
2 | ==============
3 |
4 | A :class:`~deep_qa.training.trainer.Trainer` is the core interface to the DeepQA code. Trainers
5 | specify data, a model, and a way to train the model with the data. This module groups all of the
6 | common code related to these things, making only minimal assumptions about what kind of data you're
7 | using or what the structure of your model is. Really, a ``Trainer`` is just a nicer interface to a
8 | Keras ``Model``, we just call it something else to not create too much naming confusion, and
9 | because the Trainer class provides a lot of functionality around training the model that a Keras
10 | ``Model`` doesn't.
11 |
12 | On top of ``Trainer``, which is a nicer interface to a Keras ``Model``, this module provides a
13 | ``TextTrainer``, which adds a lot of functionality for building Keras ``Models`` that work with
14 | text. We provide APIs around word embeddings, sentence encoding, reading and padding datasets, and
15 | similar things. All of the concrete models that we have so far in DeepQA inherit from
16 | ``TextTrainer``, so understanding how to use this class is pretty important to understanding
17 | DeepQA.
18 |
19 | We also deal with the notion of pre-training in this module. A Pretrainer is a Trainer that depends
20 | on another Trainer, building its model using pieces of the enclosed Trainer, so that training the
21 | Pretrainer updates the weights in the enclosed Trainer object.
22 |
--------------------------------------------------------------------------------
/doc/training/misc.rst:
--------------------------------------------------------------------------------
1 | Misc
2 | ====
3 |
4 | Models
5 | ------
6 |
7 | .. automodule:: deep_qa.training.models
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | Optimizers
13 | ----------
14 |
15 | .. automodule:: deep_qa.training.optimizers
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/doc/training/multi_gpu.rst:
--------------------------------------------------------------------------------
1 | Multi GPU Training
2 | ==================
3 |
4 | .. automodule:: deep_qa.training.multi_gpu
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
--------------------------------------------------------------------------------
/doc/training/text_trainer.rst:
--------------------------------------------------------------------------------
1 | TextTrainer
2 | ===========
3 |
4 | .. module:: deep_qa.training.text_trainer
5 |
6 | .. autoclass:: TextTrainer
7 |
8 | Utility methods
9 | ~~~~~~~~~~~~~~~
10 |
11 | These methods are intended for use by subclasses, mostly in your ``_build_model`` implementation.
12 |
13 | .. automethod:: TextTrainer._get_sentence_shape
14 | .. automethod:: TextTrainer._embed_input
15 | .. automethod:: TextTrainer._get_encoder
16 | .. automethod:: TextTrainer._get_seq2seq_encoder
17 | .. automethod:: TextTrainer._set_text_lengths_from_model_input
18 |
19 | Abstract methods
20 | ~~~~~~~~~~~~~~~~
21 |
22 | You `must` implement these methods in your model (along with
23 | :func:`~deep_qa.training.trainer._build_model`). The simplest concrete ``TextTrainer``
24 | implementations only have four methods: ``__init__``, ``_instance_type`` (typically one line),
25 | ``_set_padding_lengths_from_model`` (also typically one line, for simple models), and
26 | ``_build_model``. See
27 | :class:`~deep_qa.models.text_classification.true_false_model.TrueFalseModel` and
28 | :class:`~deep_qa.models.sequence_tagging.simple_tagger.SimpleTagger` for examples.
29 |
30 | .. automethod:: TextTrainer._instance_type
31 | .. automethod:: TextTrainer._set_padding_lengths_from_model
32 |
33 | Semi-abstract methods
34 | ~~~~~~~~~~~~~~~~~~~~~
35 |
36 | You'll likely need to override these methods, if you have anything more complex than a single sentence
37 | as input.
38 |
39 | .. automethod:: TextTrainer.get_padding_lengths
40 | .. automethod:: TextTrainer.get_instance_sorting_keys
41 | .. automethod:: TextTrainer.get_padding_memory_scaling
42 | .. automethod:: TextTrainer._set_padding_lengths
43 |
44 | Overridden ``Trainer`` methods
45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 |
47 | You probably don't need to override these, except for probably ``_get_custom_objects``. The rest
48 | of them you shouldn't need to worry about at all (except to call them, if they are part of the
49 | external ``Trainer`` API), but we document them here for completeness.
50 |
51 | .. automethod:: TextTrainer.create_data_arrays
52 | .. automethod:: TextTrainer.load_dataset_from_files
53 | .. automethod:: TextTrainer.score_dataset
54 | .. automethod:: TextTrainer.set_model_state_from_dataset
55 | .. automethod:: TextTrainer.set_model_state_from_indexed_dataset
56 | .. automethod:: TextTrainer._get_custom_objects
57 | .. automethod:: TextTrainer._dataset_indexing_kwargs
58 | .. automethod:: TextTrainer._load_auxiliary_files
59 | .. automethod:: TextTrainer._overall_debug_output
60 | .. automethod:: TextTrainer._save_auxiliary_files
61 | .. automethod:: TextTrainer._set_params_from_model
62 | .. automethod:: TextTrainer._uses_data_generators
63 |
--------------------------------------------------------------------------------
/doc/training/trainer.rst:
--------------------------------------------------------------------------------
1 | Trainer
2 | =======
3 |
4 | .. module:: deep_qa.training.trainer
5 |
6 | .. autoclass:: Trainer
7 |
8 | Public methods
9 | ~~~~~~~~~~~~~~
10 |
11 | .. automethod:: Trainer.can_train
12 | .. automethod:: Trainer.evaluate_model
13 | .. automethod:: Trainer.load_data_arrays
14 | .. automethod:: Trainer.load_model
15 | .. automethod:: Trainer.train
16 |
17 | Abstract methods
18 | ~~~~~~~~~~~~~~~~
19 |
20 | If you're doing NLP, :class:`~deep_qa.training.text_trainer.TextTrainer` implements most of these,
21 | so you shouldn't have to worry about them. The only one it doesn't is ``_build_model`` (though it
22 | adds some other abstract methods that you `might` have to worry about).
23 |
24 | .. automethod:: Trainer.create_data_arrays
25 | .. automethod:: Trainer.load_dataset_from_files
26 | .. automethod:: Trainer.score_dataset
27 | .. automethod:: Trainer.set_model_state_from_dataset
28 | .. automethod:: Trainer.set_model_state_from_indexed_dataset
29 | .. automethod:: Trainer._build_model
30 | .. automethod:: Trainer._set_params_from_model
31 | .. automethod:: Trainer._dataset_indexing_kwargs
32 |
33 | Protected methods
34 | ~~~~~~~~~~~~~~~~~
35 |
36 | .. automethod:: Trainer._get_callbacks
37 | .. automethod:: Trainer._get_custom_objects
38 | .. automethod:: Trainer._instance_debug_output
39 | .. automethod:: Trainer._load_auxiliary_files
40 | .. automethod:: Trainer._output_debug_info
41 | .. automethod:: Trainer._overall_debug_output
42 | .. automethod:: Trainer._post_epoch_hook
43 | .. automethod:: Trainer._pre_epoch_hook
44 | .. automethod:: Trainer._save_auxiliary_files
45 | .. automethod:: Trainer._uses_data_generators
46 |
--------------------------------------------------------------------------------
/example_experiments/entailment/snli_decomposable_attention.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_class": "DecomposableAttention",
3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/decomposable_attention/",
4 | "seq2seq_encoder": {
5 | "default": {
6 | "type": "bi_gru",
7 | "encoder_params": {
8 | "units": 100
9 | },
10 | "wrapper_params": {}
11 | }
12 | },
13 | "num_seq2seq_layers": 0,
14 | "decomposable_attention_params": {
15 | "num_hidden_layers": 2,
16 | "hidden_layer_width": 200,
17 | "hidden_layer_activation": "relu",
18 | "initializer": "random_normal"
19 | },
20 | "data_generator": {
21 | "dynamic_padding": true
22 | },
23 | "batch_size": 60,
24 | "patience": 3,
25 | "embeddings": {
26 | "words": {
27 | "dimension": 200,
28 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.840B.300d.txt.gz",
29 | "project": true,
30 | "fine_tune": false,
31 | "dropout": 0.2
32 | }
33 | },
34 | "num_epochs": 20,
35 | "optimizer": {
36 | "type": "adadelta",
37 | "learning_rate": 0.5
38 | },
39 | "validation_files": ["/net/efs/aristo/dlfa/snli/processed/dev.tsv"],
40 | "train_files": ["/net/efs/aristo/dlfa/snli/processed/train.tsv"]
41 | }
42 |
--------------------------------------------------------------------------------
/example_experiments/reading_comprehension/asreader_who_did_what.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_class": "AttentionSumReader",
3 | "model_serialization_prefix": "models/multiple_choice_qa/asreader",
4 | "encoder": {
5 | "default": {
6 | "type": "bi_gru",
7 | "units": 384
8 | }
9 | },
10 | "seq2seq_encoder": {
11 | "default": {
12 | "type": "bi_gru",
13 | "encoder_params": {
14 | "units": 384
15 | },
16 | "wrapper_params": {}
17 | }
18 | },
19 | "optimizer": {"type": "adam"},
20 | "gradient_clipping": {
21 | "type": "clip_by_norm",
22 | "value": 10
23 | },
24 | "patience": 1,
25 | "embeddings": {"words": {"dimension": 256, "dropout": 0.0}},
26 | "num_epochs": 5,
27 | "validation_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/val.tsv"],
28 | "train_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/train.tsv"]
29 | }
30 |
--------------------------------------------------------------------------------
/example_experiments/reading_comprehension/bidaf_squad.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_class": "BidirectionalAttentionFlow",
3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/bidaf",
4 | "encoder": {
5 | "word": {
6 | "type": "cnn",
7 | "ngram_filter_sizes": [5],
8 | "num_filters": 100
9 | }
10 | },
11 | "seq2seq_encoder": {
12 | "default": {
13 | "type": "bi_gru",
14 | "encoder_params": {
15 | "units": 100
16 | },
17 | "wrapper_params": {}
18 | }
19 | },
20 | "data_generator": {
21 | "dynamic_padding": true,
22 | "adaptive_batch_sizes": true,
23 | "adaptive_memory_usage_constant": 440000,
24 | "maximum_batch_size": 60
25 | },
26 | // This is not quite the same as Min's paper; we don't have encoder dropout yet.
27 | "patience": 3,
28 | "embeddings": {
29 | "words": {
30 | "dimension": 100,
31 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.6B.100d.txt.gz",
32 | "project": true,
33 | "fine_tune": false,
34 | "dropout": 0.2
35 | },
36 | "characters": {
37 | "dimension": 8,
38 | "dropout": 0.2
39 | }
40 | },
41 | "num_epochs": 20,
42 | "optimizer": {
43 | "type": "adadelta",
44 | "learning_rate": 0.5
45 | },
46 | "validation_files": ["/net/efs/aristo/dlfa/squad/processed/dev.tsv"],
47 | "train_files": ["/net/efs/aristo/dlfa/squad/processed/train.tsv"]
48 | }
49 |
--------------------------------------------------------------------------------
/example_experiments/reading_comprehension/gareader_who_did_what.json:
--------------------------------------------------------------------------------
1 | {
2 | "embeddings": {
3 | "words": {
4 | "dimension": 200,
5 | "pretrained_file": "/net/efs/data/dlfa/glove/glove.6B.100d.txt.gz",
6 | "fine_tune": false,
7 | "project": true,
8 | "dropout": 0.0
9 | },
10 | "characters": {
11 | "dimension": 16,
12 | "dropout": 0.0
13 | }
14 | },
15 | "model_class": "GatedAttentionReader",
16 | "cloze_token": "xxxxx",
17 | "num_word_characters": 10,
18 | "model_serialization_prefix": "models/multiple_choice_qa/gareader_wdw",
19 | "num_gated_attention_layers": 3,
20 | "tokenizer": {
21 | "type": "words and characters"
22 | },
23 | "encoder": {
24 | "word": {
25 | "type": "bi_gru",
26 | "units": 25
27 | }
28 | },
29 | "seq2seq_encoder": {
30 | "question_0": {
31 | "type": "bi_gru",
32 | "encoder_params": {
33 | "units": 128
34 | },
35 | "wrapper_params": {}
36 | },
37 | "document_0": {
38 | "type": "bi_gru",
39 | "encoder_params": {
40 | "units": 128
41 | },
42 | "wrapper_params": {}
43 | },
44 | "question_1": {
45 | "type": "bi_gru",
46 | "encoder_params": {
47 | "units": 128
48 | },
49 | "wrapper_params": {}
50 | },
51 | "document_1": {
52 | "type": "bi_gru",
53 | "encoder_params": {
54 | "units": 128
55 | },
56 | "wrapper_params": {}
57 | },
58 | "document_final": {
59 | "type": "bi_gru",
60 | "encoder_params": {
61 | "units": 128
62 | },
63 | "wrapper_params": {}
64 | },
65 | "question_final":{
66 | "type": "bi_gru",
67 | "encoder_params": {
68 | "units": 128
69 | },
70 | "wrapper_params": {
71 | "merge_mode": "None"
72 | }
73 | }
74 | },
75 | "optimizer": {
76 | "type": "adam",
77 | "learning_rate": 0.0005
78 | },
79 | "gradient_clipping": {
80 | "type": "clip_by_norm",
81 | "value": 10
82 | },
83 | "patience": 5,
84 | "num_epochs": 10,
85 | "validation_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/val.tsv"],
86 | "train_files": ["/net/efs/data/dlfa/who_did_what/processed/strict/train.tsv"]
87 | }
88 |
--------------------------------------------------------------------------------
/example_experiments/sequence_tagging/simple_tagger.json:
--------------------------------------------------------------------------------
1 | {
2 | "model_class": "SimpleTagger",
3 | "model_serialization_prefix": "/net/efs/aristo/dlfa/models/simple_tagger_test/",
4 | "encoder": {
5 | "word": {
6 | "type": "cnn",
7 | "ngram_filter_sizes": [2, 3, 4, 5],
8 | "num_filters": 100
9 | }
10 | },
11 | "seq2seq_encoder": {
12 | "default": {
13 | "type": "bi_gru",
14 | "encoder_params": {
15 | "units": 100
16 | },
17 | "wrapper_params": {}
18 | }
19 | },
20 | "num_stacked_rnns": 2,
21 | "instance_type": "PreTokenizedTaggingInstance",
22 | "tokenizer": {
23 | "type": "words and characters",
24 | "processor": {"word_splitter": "no_op"}
25 | },
26 | "data_generator": {
27 | "dynamic_padding": true
28 | },
29 | "patience": 3,
30 | "embeddings": {
31 | "words": {
32 | "pretrained_file": "/net/efs/aristo/dlfa/glove/glove.6B.100d.txt.gz",
33 | "project": false,
34 | "fine_tune": false,
35 | "dropout": 0.2
36 | },
37 | "characters": {
38 | "dimension": 8
39 | }
40 | },
41 | "num_epochs": 20,
42 | "optimizer": {
43 | "type": "adadelta",
44 | "learning_rate": 0.5
45 | },
46 | "validation_files": ["/net/efs/aristo/dlfa/squad/processed/tagging_dev.tsv"],
47 | "train_files": ["/net/efs/aristo/dlfa/squad/processed/tagging_train.tsv"]
48 | }
49 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --disable-warnings
3 | testpaths = tests/
4 | python_paths = ./
5 |
--------------------------------------------------------------------------------
/scripts/clean_raw_omnibus.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | This script takes as input raw TSV files from the Omnibus dataset and
4 | preprocesses them to be compatible with the deep_qa pipeline.
5 | """
6 | import logging
7 | import os
8 | import csv
9 |
10 | from argparse import ArgumentParser
11 | import pandas
12 |
13 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
14 |
15 |
16 | def main():
17 | log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
18 | logging.basicConfig(level=logging.INFO, format=log_format)
19 | parser = ArgumentParser(description=("Transform a raw Omnibus TSV "
20 | "to the format that the pipeline "
21 | "expects."))
22 | parser.add_argument('input_csv', nargs='+',
23 | metavar="", type=str,
24 | help=("Path of TSV files to clean up. Pass in "
25 | "as many as you want, and the output "
26 | "will be a concatenation of them "
27 | "written to .clean"))
28 |
29 | arguments = parser.parse_args()
30 | all_clean_file_rows = []
31 | for omnibus_file in arguments.input_csv:
32 | all_clean_file_rows.extend(clean_omnibus_csv(omnibus_file))
33 | # turn the list of rows into a dataframe, and write to TSV
34 | dataframe = pandas.DataFrame(all_clean_file_rows)
35 | folder, filename = os.path.split(arguments.input_csv[-1])
36 | outdirectory = folder + "/cleaned/"
37 | os.makedirs(outdirectory, exist_ok=True)
38 | outpath = outdirectory + filename + ".clean"
39 | logger.info("Saving cleaned file to %s", outpath)
40 | dataframe.to_csv(outpath, encoding="utf-8", index=False,
41 | sep="\t", header=False,
42 | quoting=csv.QUOTE_NONE)
43 |
44 |
45 | def clean_omnibus_csv(omnibus_file_path):
46 | logger.info("cleaning up %s", omnibus_file_path)
47 | # open the file as a csv
48 | dataframe = pandas.read_csv(omnibus_file_path, sep="\t",
49 | encoding='utf-8', header=None,
50 | quoting=csv.QUOTE_NONE)
51 | dataframe_trimmed = dataframe[[3, 9]]
52 | clean_rows = dataframe_trimmed.values.tolist()
53 | return clean_rows
54 |
55 | if __name__ == '__main__':
56 | main()
57 |
--------------------------------------------------------------------------------
/scripts/install_requirements.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pip install -r requirements.txt
4 | python -m nltk.downloader punkt
5 | python -m spacy.en.download all
6 |
--------------------------------------------------------------------------------
/scripts/pylint.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | echo 'Starting pylint checks'
3 | pylint -d locally-disabled,locally-enabled -f colorized deep_qa tests scripts/*.py
4 | echo -e "pylint checks passed\n"
5 |
--------------------------------------------------------------------------------
/scripts/run_ensemble.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import sys
4 |
5 | # pylint: disable=wrong-import-position
6 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
7 | from deep_qa import score_dataset_with_ensemble, compute_accuracy
8 | from deep_qa.common.checks import ensure_pythonhashseed_set
9 |
10 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
11 |
12 |
13 | def main():
14 | usage = 'USAGE: run_ensemble.py [param_file]+ -- [data_file]+'
15 | try:
16 | separator_index = sys.argv.index('--')
17 | except ValueError:
18 | print(usage)
19 | sys.exit(-1)
20 | param_files = sys.argv[1:separator_index]
21 | dataset_files = sys.argv[separator_index + 1:]
22 | predictions, labels = score_dataset_with_ensemble(param_files, dataset_files)
23 | compute_accuracy(predictions, labels)
24 |
25 |
26 | if __name__ == "__main__":
27 | ensure_pythonhashseed_set()
28 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
29 | level=logging.INFO)
30 | main()
31 |
--------------------------------------------------------------------------------
/scripts/run_model.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import sys
4 |
5 | # pylint: disable=wrong-import-position
6 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
7 | from deep_qa import run_model_from_file, evaluate_model
8 | from deep_qa.common.checks import ensure_pythonhashseed_set
9 |
10 | logger = logging.getLogger(__name__) # pylint: disable=invalid-name
11 |
12 |
13 | def main():
14 | usage = 'USAGE: run_model.py [param_file] [train|test]'
15 | if len(sys.argv) == 2:
16 | run_model_from_file(sys.argv[1])
17 | elif len(sys.argv) == 3:
18 | mode = sys.argv[2]
19 | if mode == 'train':
20 | run_model_from_file(sys.argv[1])
21 | elif mode == 'test':
22 | evaluate_model(sys.argv[1])
23 | else:
24 | print(usage)
25 | sys.exit(-1)
26 | else:
27 | print(usage)
28 | sys.exit(-1)
29 |
30 |
31 | if __name__ == "__main__":
32 | ensure_pythonhashseed_set()
33 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
34 | level=logging.INFO)
35 | main()
36 |
--------------------------------------------------------------------------------
/scripts/set_processor.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | USAGE="usage: ./set-processor [gpu|cpu]"
4 |
5 | if [ $# != 1 ]; then
6 | echo "$USAGE"
7 | exit 1
8 | fi
9 |
10 | PROCESSOR=$1
11 | if [ "$PROCESSOR" == "gpu" ]; then
12 | echo "Setting the processor to '$PROCESSOR'."
13 | sed -ie 's/^tensorflow-gpu/tensorflow/g' requirements.txt
14 | sed -ie 's/^tensorflow/tensorflow-gpu/g' requirements.txt
15 | elif [ "$PROCESSOR" == "cpu" ]; then
16 | echo "Setting the processor to '$PROCESSOR'."
17 | sed -ie 's/^tensorflow-gpu/tensorflow/g' requirements.txt
18 | else
19 | echo "Unknown argument: $PROCESSOR"
20 | echo "$USAGE"
21 | exit 1
22 | fi
23 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/__init__.py
--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/common/__init__.py
--------------------------------------------------------------------------------
/tests/common/pythonhashseed_test.py:
--------------------------------------------------------------------------------
1 | from deep_qa.common.checks import ensure_pythonhashseed_set
2 |
3 | def test_pythonhashseed():
4 | ensure_pythonhashseed_set()
5 |
--------------------------------------------------------------------------------
/tests/common/test_util.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from deep_qa.common import util
3 | from deep_qa.testing.test_case import DeepQaTestCase
4 |
5 |
6 | class TestCommonUtils(DeepQaTestCase):
7 | def test_group_by_count(self):
8 | assert util.group_by_count([1, 2, 3, 4, 5, 6, 7], 3, 20) == [[1, 2, 3], [4, 5, 6], [7, 20, 20]]
9 |
--------------------------------------------------------------------------------
/tests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/__init__.py
--------------------------------------------------------------------------------
/tests/data/dataset_readers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/dataset_readers/__init__.py
--------------------------------------------------------------------------------
/tests/data/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/datasets/__init__.py
--------------------------------------------------------------------------------
/tests/data/datasets/dataset_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from deep_qa.data.datasets.dataset import Dataset, TextDataset
3 | from deep_qa.data.instances.text_classification.text_classification_instance import TextClassificationInstance
4 |
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestDataset:
9 | def test_merge(self):
10 | instances = [TextClassificationInstance("testing", None, None),
11 | TextClassificationInstance("testing1", None, None)]
12 | dataset1 = Dataset(instances[:1])
13 | dataset2 = Dataset(instances[1:])
14 | merged = dataset1.merge(dataset2)
15 | assert merged.instances == instances
16 |
17 |
18 | class TestTextDataset(DeepQaTestCase):
19 | def test_read_from_file_with_no_default_label(self):
20 | filename = self.TEST_DIR + 'test_dataset_file'
21 | with open(filename, 'w') as datafile:
22 | datafile.write("1\tinstance1\t0\n")
23 | datafile.write("2\tinstance2\t1\n")
24 | datafile.write("3\tinstance3\n")
25 | dataset = TextDataset.read_from_file(filename, TextClassificationInstance)
26 | assert len(dataset.instances) == 3
27 | instance = dataset.instances[0]
28 | assert instance.index == 1
29 | assert instance.text == "instance1"
30 | assert instance.label is False
31 | instance = dataset.instances[1]
32 | assert instance.index == 2
33 | assert instance.text == "instance2"
34 | assert instance.label is True
35 | instance = dataset.instances[2]
36 | assert instance.index == 3
37 | assert instance.text == "instance3"
38 | assert instance.label is None
39 |
--------------------------------------------------------------------------------
/tests/data/datasets/language_modeling_dataset_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from deep_qa.common.params import Params
3 | from deep_qa.data.datasets import LanguageModelingDataset
4 | from deep_qa.data.instances.language_modeling.sentence_instance import SentenceInstance
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestLanguageModellingDataset(DeepQaTestCase):
9 |
10 | def setUp(self):
11 | super(TestLanguageModellingDataset, self).setUp()
12 | self.write_sentence_data()
13 |
14 | def test_read_from_file(self):
15 | args = Params({"sequence_length": 4})
16 | dataset = LanguageModelingDataset.read_from_file(self.TRAIN_FILE, SentenceInstance, args)
17 |
18 | instances = dataset.instances
19 | assert instances[0].text == "This is a sentence"
20 | assert instances[1].text == "for language modelling. Here's"
21 | assert instances[2].text == "another one for language"
22 |
--------------------------------------------------------------------------------
/tests/data/datasets/snli_dataset_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from deep_qa.data.datasets import SnliDataset
3 | from deep_qa.data.instances.entailment.snli_instance import SnliInstance
4 |
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestSnliDataset(DeepQaTestCase):
9 |
10 | def setUp(self):
11 | super(TestSnliDataset, self).setUp()
12 | self.write_original_snli_data()
13 |
14 | def test_read_from_file(self):
15 | dataset = SnliDataset.read_from_file(self.TRAIN_FILE, SnliInstance)
16 |
17 | instance1 = SnliInstance("A person on a horse jumps over a broken down airplane.",
18 | "A person is training his horse for a competition.",
19 | "neutral")
20 | instance2 = SnliInstance("A person on a horse jumps over a broken down airplane.",
21 | "A person is at a diner, ordering an omelette.",
22 | "contradicts")
23 | instance3 = SnliInstance("A person on a horse jumps over a broken down airplane.",
24 | "A person is outdoors, on a horse.",
25 | "entails")
26 |
27 | assert len(dataset.instances) == 3
28 | instance = dataset.instances[0]
29 | assert instance.index == instance1.index
30 | assert instance.first_sentence == instance1.first_sentence
31 | assert instance.second_sentence == instance1.second_sentence
32 | assert instance.label == instance1.label
33 | instance = dataset.instances[1]
34 | assert instance.index == instance2.index
35 | assert instance.first_sentence == instance2.first_sentence
36 | assert instance.second_sentence == instance2.second_sentence
37 | assert instance.label == instance2.label
38 | instance = dataset.instances[2]
39 | assert instance.index == instance3.index
40 | assert instance.first_sentence == instance3.first_sentence
41 | assert instance.second_sentence == instance3.second_sentence
42 | assert instance.label == instance3.label
43 |
--------------------------------------------------------------------------------
/tests/data/instances/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/__init__.py
--------------------------------------------------------------------------------
/tests/data/instances/entailment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/entailment/__init__.py
--------------------------------------------------------------------------------
/tests/data/instances/entailment/sentence_pair_instance_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 |
4 | from deep_qa.data.instances.entailment.sentence_pair_instance import IndexedSentencePairInstance
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestIndexedSentencePairInstance(DeepQaTestCase):
9 | def test_get_padding_lengths_returns_max_of_both_sentences(self):
10 | instance = IndexedSentencePairInstance([1, 2, 3], [1], True)
11 | assert instance.get_padding_lengths() == {'num_sentence_words': 3}
12 | instance = IndexedSentencePairInstance([1, 2, 3], [1, 2, 3, 4], True)
13 | assert instance.get_padding_lengths() == {'num_sentence_words': 4}
14 |
15 | def test_pad_pads_both_sentences(self):
16 | instance = IndexedSentencePairInstance([1, 2], [3, 4], True)
17 | instance.pad({'num_sentence_words': 3})
18 | assert instance.first_sentence_indices == [0, 1, 2]
19 | assert instance.second_sentence_indices == [0, 3, 4]
20 |
21 | def test_as_training_data_produces_correct_numpy_arrays(self):
22 | # pylint: disable=redefined-variable-type
23 | instance = IndexedSentencePairInstance([1, 2], [3, 4], [0, 1, 0])
24 | inputs, label = instance.as_training_data()
25 | assert isinstance(inputs, tuple)
26 | assert len(inputs) == 2
27 | assert numpy.all(inputs[0] == numpy.asarray([1, 2]))
28 | assert numpy.all(inputs[1] == numpy.asarray([3, 4]))
29 | assert numpy.all(label == numpy.asarray([0, 1, 0]))
30 |
--------------------------------------------------------------------------------
/tests/data/instances/language_modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/language_modeling/__init__.py
--------------------------------------------------------------------------------
/tests/data/instances/reading_comprehension/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/reading_comprehension/__init__.py
--------------------------------------------------------------------------------
/tests/data/instances/sequence_tagging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/sequence_tagging/__init__.py
--------------------------------------------------------------------------------
/tests/data/instances/sequence_tagging/test_tagging_instance.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from deep_qa.data.instances.sequence_tagging.tagging_instance import IndexedTaggingInstance
3 | from deep_qa.testing.test_case import DeepQaTestCase
4 | from numpy.testing import assert_array_almost_equal
5 |
6 |
7 | class TestIndexedTaggingInstance(DeepQaTestCase):
8 | def setUp(self):
9 | super(TestIndexedTaggingInstance, self).setUp()
10 | self.instance = IndexedTaggingInstance([1, 2, 3, 4], [4, 5, 6])
11 |
12 | def test_get_padding_lengths_returns_correct_lengths(self):
13 | assert self.instance.get_padding_lengths() == {'num_sentence_words': 4}
14 |
15 | def test_pad_truncates_correctly(self):
16 | self.instance.pad({'num_sentence_words': 2})
17 | assert self.instance.text_indices == [1, 2]
18 |
19 | def test_pad_adds_padding_correctly(self):
20 | self.instance.pad({'num_sentence_words': 6})
21 | assert self.instance.text_indices == [1, 2, 3, 4, 0, 0]
22 |
23 | def test_as_training_data_produces_correct_arrays(self):
24 | text_array, label_array = self.instance.as_training_data()
25 | assert_array_almost_equal(text_array, [1, 2, 3, 4])
26 | assert_array_almost_equal(label_array, [4, 5, 6])
27 |
--------------------------------------------------------------------------------
/tests/data/instances/text_classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/data/instances/text_classification/__init__.py
--------------------------------------------------------------------------------
/tests/data/tokenizers/tokenizer_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | from deep_qa.data.tokenizers.word_tokenizer import WordTokenizer
4 | from deep_qa.common.params import Params
5 |
6 | class TestTokenizer:
7 | tokenizer = WordTokenizer(Params({}))
8 | passage = "On January 7, 2012, Beyoncé gave birth to her first child, a daughter, Blue Ivy " +\
9 | "Carter, at Lenox Hill Hospital in New York. Five months later, she performed for four " +\
10 | "nights at Revel Atlantic City's Ovation Hall to celebrate the resort's opening, her " +\
11 | "first performances since giving birth to Blue Ivy."
12 |
13 | def test_char_span_to_token_span_handles_easy_cases(self):
14 | # "January 7, 2012"
15 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (3, 18))
16 | assert token_span == (1, 5)
17 | # "Lenox Hill Hospital"
18 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (91, 110))
19 | assert token_span == (22, 25)
20 | # "Lenox Hill Hospital in New York."
21 | token_span = self.tokenizer.char_span_to_token_span(self.passage, (91, 123))
22 | assert token_span == (22, 29)
23 |
--------------------------------------------------------------------------------
/tests/data/tokenizers/word_processor_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | from deep_qa.data.tokenizers.word_processor import WordProcessor
4 | from deep_qa.common.params import Params
5 |
6 | class TestWordProcessor:
7 | def test_passes_through_correctly(self):
8 | word_processor = WordProcessor(Params({}))
9 | sentence = "this (sentence) has 'crazy' \"punctuation\"."
10 | tokens = word_processor.get_tokens(sentence)
11 | expected_tokens = ["this", "(", "sentence", ")", "has", "'", "crazy", "'", "\"",
12 | "punctuation", "\"", "."]
13 | assert tokens == expected_tokens
14 |
15 | def test_stems_and_filters_correctly(self):
16 | word_processor = WordProcessor(Params({'word_stemmer': 'porter', 'word_filter': 'stopwords'}))
17 | sentence = "this (sentence) has 'crazy' \"punctuation\"."
18 | expected_tokens = ["sentenc", "ha", "crazi", "punctuat"]
19 | tokens = word_processor.get_tokens(sentence)
20 | assert tokens == expected_tokens
21 |
--------------------------------------------------------------------------------
/tests/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/__init__.py
--------------------------------------------------------------------------------
/tests/layers/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/attention/__init__.py
--------------------------------------------------------------------------------
/tests/layers/attention/masked_softmax_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras import backend as K
5 | from keras.layers import Input
6 | from keras.models import Model
7 |
8 | from deep_qa.layers.attention.masked_softmax import MaskedSoftmax
9 |
10 | class TestMaskedSoftmaxLayer:
11 | def test_call_works_with_no_mask(self):
12 | batch_size = 1
13 | num_options = 4
14 | options_input = Input(shape=(num_options,), dtype='float32')
15 | softmax_result = MaskedSoftmax()(options_input)
16 | model = Model(inputs=[options_input], outputs=[softmax_result])
17 | options_tensor = numpy.asarray([[2, 4, 0, 1]])
18 | softmax_tensor = model.predict([options_tensor])
19 | assert softmax_tensor.shape == (batch_size, num_options)
20 | numpy.testing.assert_almost_equal(softmax_tensor,
21 | [[0.112457, 0.830953, 0.015219, 0.041371]],
22 | decimal=5)
23 |
24 | def test_call_handles_higher_order_input(self):
25 | batch_size = 1
26 | length_1 = 5
27 | length_2 = 3
28 | num_options = 4
29 | options_input = Input(shape=(length_1, length_2, num_options,), dtype='float32')
30 | softmax_result = MaskedSoftmax()(options_input)
31 | model = Model(inputs=[options_input], outputs=[softmax_result])
32 | options_tensor = numpy.zeros((batch_size, length_1, length_2, num_options))
33 | for i in range(length_1):
34 | for j in range(length_2):
35 | options_tensor[0, i, j] = [2, 4, 0, 1]
36 | softmax_tensor = model.predict([options_tensor])
37 | assert softmax_tensor.shape == (batch_size, length_1, length_2, num_options)
38 | for i in range(length_1):
39 | for j in range(length_2):
40 | numpy.testing.assert_almost_equal(softmax_tensor[0, i, j],
41 | [0.112457, 0.830953, 0.015219, 0.041371],
42 | decimal=5)
43 |
44 | def test_call_handles_masking_properly(self):
45 | options = K.variable(numpy.asarray([[2, 4, 0, 1]]))
46 | mask = K.variable(numpy.asarray([[1, 0, 1, 1]]))
47 | softmax = K.eval(MaskedSoftmax().call(options, mask=mask))
48 | assert softmax.shape == (1, 4)
49 | numpy.testing.assert_almost_equal(softmax, [[0.66524096, 0, 0.09003057, 0.24472847]])
50 |
--------------------------------------------------------------------------------
/tests/layers/attentive_gru_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use
2 | import numpy
3 |
4 | from keras.layers import Input, Embedding, merge
5 | from keras.models import Model
6 | import keras.backend as K
7 |
8 | from deep_qa.layers.encoders import AttentiveGru
9 |
10 |
11 | class TestAttentiveGRU:
12 | def test_on_unmasked_input(self):
13 |
14 | sentence_length = 5
15 | embedding_dim = 10
16 | vocabulary_size = 15
17 | input_layer = Input(shape=(sentence_length,), dtype='int32')
18 | attention = Input(shape=(sentence_length,), dtype='float32')
19 | # Embedding does not mask zeros
20 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim)
21 | attentive_gru = AttentiveGru(output_dim=embedding_dim,
22 | input_length=sentence_length,
23 | return_sequences=True,
24 | name='attentive_gru_test')
25 | embedded_input = embedding(input_layer)
26 | concat_mode = lambda layer_outs: K.concatenate([K.expand_dims(layer_outs[0], axis=2),
27 | layer_outs[1]],
28 | axis=2)
29 |
30 | combined_sentence_with_attention = merge([attention, embedded_input],
31 | mode=concat_mode,
32 | output_shape=(5, 11))
33 |
34 | sequence_of_outputs = attentive_gru(combined_sentence_with_attention)
35 | model = Model(inputs=[input_layer, attention], outputs=sequence_of_outputs)
36 | model.compile(loss="mse", optimizer="sgd") # Will not train this model
37 | test_input = numpy.asarray([[0, 3, 1, 7, 10]], dtype='int32')
38 | attention_input = numpy.asarray([[1., 0., 0., 0., 0.]], dtype='float32')
39 |
40 | # To debug this model, we are going to check that if we pass an attention mask into
41 | # the attentive_gru which has all zeros apart from the first element which is one,
42 | # all the elements should be equal to the first output as the state won't change over
43 | # time, as we add in none of the memory. This is not the intended use of this class,
44 | # but if this works, the intended use will be correct.
45 | actual_sequence_of_outputs = numpy.squeeze(model.predict([test_input, attention_input]))
46 | for i in range(sentence_length - 1):
47 | assert numpy.array_equal(actual_sequence_of_outputs[i, :], actual_sequence_of_outputs[i+1, :])
48 |
--------------------------------------------------------------------------------
/tests/layers/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/backend/__init__.py
--------------------------------------------------------------------------------
/tests/layers/backend/collapse_and_expand_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from numpy.testing import assert_allclose
5 | from keras.layers import Input, Dense
6 | from keras.models import Model
7 |
8 | from deep_qa.layers.backend import CollapseToBatch, ExpandFromBatch, AddMask
9 |
10 |
11 | class TestCollapseAndExpand:
12 | # We need to test CollapseToBatch and ExpandFromBatch together, because Keras doesn't like it
13 | # if you change the batch size between inputs and outputs. It makes sense to test them
14 | # together, anyway.
15 | def test_collapse_and_expand_works_with_dynamic_shape(self):
16 | batch_size = 3
17 | length1 = 5
18 | length2 = 7
19 | length3 = 2
20 | dense_units = 6
21 | input_layer = Input(shape=(length1, None, length3), dtype='float32')
22 | masked_input = AddMask(mask_value=1)(input_layer)
23 | collapsed_1 = CollapseToBatch(num_to_collapse=1)(masked_input)
24 | collapsed_2 = CollapseToBatch(num_to_collapse=2)(masked_input)
25 | dense = Dense(dense_units)(collapsed_2)
26 | expanded_1 = ExpandFromBatch(num_to_expand=1)([collapsed_1, masked_input])
27 | expanded_2 = ExpandFromBatch(num_to_expand=2)([collapsed_2, masked_input])
28 | expanded_dense = ExpandFromBatch(num_to_expand=2)([dense, masked_input])
29 | model = Model(inputs=input_layer, outputs=[expanded_1, expanded_2, expanded_dense])
30 |
31 | input_tensor = numpy.random.randint(0, 3, (batch_size, length1, length2, length3))
32 | expanded_1_tensor, expanded_2_tensor, expanded_dense_tensor = model.predict(input_tensor)
33 | assert expanded_1_tensor.shape == input_tensor.shape
34 | assert expanded_2_tensor.shape == input_tensor.shape
35 | assert expanded_dense_tensor.shape == input_tensor.shape[:-1] + (dense_units,)
36 | assert_allclose(expanded_1_tensor, input_tensor)
37 | assert_allclose(expanded_2_tensor, input_tensor)
38 |
--------------------------------------------------------------------------------
/tests/layers/backend/envelope_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.backend import Envelope
8 |
9 | class TestEnvelopeLayer:
10 | def test_call_works_on_simple_input(self):
11 | batch_size = 1
12 | sequence_length = 5
13 | span_begin_input = Input(shape=(sequence_length,), dtype='float32')
14 | span_end_input = Input(shape=(sequence_length,), dtype='float32')
15 | envelope = Envelope()([span_begin_input, span_end_input])
16 | model = Model(inputs=[span_begin_input, span_end_input], outputs=[envelope])
17 | span_begin_tensor = numpy.asarray([[0.01, 0.1, 0.8, 0.05, 0.04]])
18 | span_end_tensor = numpy.asarray([[0.01, 0.04, 0.05, 0.2, 0.7]])
19 | envelope_tensor = model.predict([span_begin_tensor, span_end_tensor])
20 | assert envelope_tensor.shape == (batch_size, sequence_length)
21 | expected_envelope = [[0.01 * 0.99, 0.11 * 0.95, 0.91 * 0.9, 0.96 * 0.7, 1.0 * 0.0]]
22 | numpy.testing.assert_almost_equal(envelope_tensor, expected_envelope)
23 |
--------------------------------------------------------------------------------
/tests/layers/backend/max_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.backend import Max
8 |
9 | class TestMaxLayer:
10 | def test_call_works_on_simple_input(self):
11 | batch_size = 2
12 | input_length = 5
13 | input_layer = Input(shape=(input_length,), dtype='float32')
14 | max_output = Max()(input_layer)
15 | model = Model(inputs=[input_layer], outputs=[max_output])
16 | input_tensor = numpy.asarray([[2, 5, 3, 1, -4], [-1, -4, -2, -10, -4]])
17 | max_tensor = model.predict([input_tensor])
18 | assert max_tensor.shape == (batch_size,)
19 | numpy.testing.assert_almost_equal(max_tensor, [5, -1])
20 |
--------------------------------------------------------------------------------
/tests/layers/backend/permute_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.backend import Permute
8 |
9 | class TestPermuteLayer:
10 | def test_call_works_on_simple_input(self):
11 | batch_size = 2
12 | input_length_1 = 2
13 | input_length_2 = 1
14 | input_layer = Input(shape=(input_length_1, input_length_2), dtype='float32')
15 | permute_output = Permute(pattern=[0, 2, 1])(input_layer)
16 | model = Model(inputs=[input_layer], outputs=[permute_output])
17 | input_tensor = numpy.asarray([[[2], [5]], [[-1], [-4]]])
18 | permute_tensor = model.predict([input_tensor])
19 | assert permute_tensor.shape == (batch_size, input_length_2, input_length_1)
20 | numpy.testing.assert_almost_equal(permute_tensor, [[[2, 5]], [[-1, -4]]])
21 |
--------------------------------------------------------------------------------
/tests/layers/backend/repeat_like_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.backend import RepeatLike
8 |
9 | class TestRepeatLikeLayer:
10 | def test_call_works_on_simple_input(self):
11 | batch_size = 2
12 | input_length = 3
13 | repetitions = 4
14 | input_layer = Input(shape=(input_length,), dtype='float32')
15 | input_layer_2 = Input(shape=(None,), dtype='float32')
16 | repeat_output = RepeatLike(axis=1, copy_from_axis=1)([input_layer, input_layer_2])
17 | model = Model(inputs=[input_layer, input_layer_2], outputs=[repeat_output])
18 | input_tensor = numpy.asarray([[2, 5, 3], [-1, -4, -2]])
19 | input_tensor_2 = numpy.ones((batch_size, repetitions))
20 | repeat_tensor = model.predict([input_tensor, input_tensor_2])
21 | assert repeat_tensor.shape == (batch_size, repetitions, input_length)
22 | for i in range(repetitions):
23 | numpy.testing.assert_almost_equal(repeat_tensor[:, i, :], [[2, 5, 3], [-1, -4, -2]])
24 |
--------------------------------------------------------------------------------
/tests/layers/backend/repeat_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.backend import Repeat
8 |
9 | class TestRepeatLayer:
10 | def test_call_works_on_simple_input(self):
11 | batch_size = 2
12 | input_length = 3
13 | repetitions = 4
14 | input_layer = Input(shape=(input_length,), dtype='float32')
15 | repeat_output = Repeat(axis=1, repetitions=repetitions)(input_layer)
16 | model = Model(inputs=[input_layer], outputs=[repeat_output])
17 | input_tensor = numpy.asarray([[2, 5, 3], [-1, -4, -2]])
18 | repeat_tensor = model.predict([input_tensor])
19 | assert repeat_tensor.shape == (batch_size, repetitions, input_length)
20 | for i in range(repetitions):
21 | numpy.testing.assert_almost_equal(repeat_tensor[:, i, :], [[2, 5, 3], [-1, -4, -2]])
22 |
--------------------------------------------------------------------------------
/tests/layers/backend/replace_masked_values_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from numpy.testing import assert_almost_equal
5 | from keras.layers import Input
6 | from keras.models import Model
7 |
8 | from deep_qa.layers.backend import AddMask, ReplaceMaskedValues
9 |
10 | class TestReplaceMaskedValues:
11 | def test_call_works_on_simple_input(self):
12 | input_length = 3
13 | input_layer = Input(shape=(input_length,), dtype='float32')
14 | masked = AddMask(2)(input_layer)
15 | replaced = ReplaceMaskedValues(4)(masked)
16 | model = Model(inputs=[input_layer], outputs=[replaced])
17 | input_tensor = numpy.asarray([[2, 5, 2], [2, -4, -2]])
18 | replaced_tensor = model.predict([input_tensor])
19 | assert_almost_equal(replaced_tensor, numpy.asarray([[4, 5, 4], [4, -4, -2]]))
20 |
--------------------------------------------------------------------------------
/tests/layers/bigru_index_selector_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use
2 | import numpy
3 | from numpy.testing import assert_almost_equal
4 |
5 | from keras.layers import Input
6 | from keras.models import Model
7 |
8 | from deep_qa.layers import BiGRUIndexSelector
9 |
10 |
11 | class TestBiGRUIndexSelector():
12 | def test_batched_case(self):
13 | document_length = 5
14 | gru_hidden_dim = 2
15 | target = 8
16 |
17 | word_indices_input = Input(shape=(document_length,),
18 | dtype='int32',
19 | name="word_indices_input")
20 | gru_f_input = Input(shape=(document_length, gru_hidden_dim),
21 | dtype='float32',
22 | name="gru_f_input")
23 | gru_b_input = Input(shape=(document_length, gru_hidden_dim),
24 | dtype='float32',
25 | name="gru_b_input")
26 | index_bigru_output = BiGRUIndexSelector(target)([word_indices_input,
27 | gru_f_input,
28 | gru_b_input])
29 | model = Model([word_indices_input,
30 | gru_f_input,
31 | gru_b_input],
32 | index_bigru_output)
33 |
34 | document_indices = numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]])
35 | gru_f_input = numpy.array([[[0.1, 0.5], [0.3, 0.4], [0.4, 0.1], [0.9, 0.2], [0.1, 0.3]],
36 | [[0.4, 0.6], [0.7, 0.1], [0.3, 0.1], [0.9, 0.5], [0.4, 0.7]]])
37 | gru_b_input = numpy.array([[[0.7, 0.2], [0.9, 0.1], [0.3, 0.8], [0.2, 0.6], [0.7, 0.2]],
38 | [[0.2, 0.1], [0.3, 0.6], [0.2, 0.8], [0.3, 0.6], [0.4, 0.4]]])
39 | expected_output = numpy.array([[0.9, 0.2, 0.2, 0.6], [0.7, 0.1, 0.3, 0.6]])
40 |
41 | # Testing the general single-batch case.
42 | result = model.predict([document_indices, gru_f_input, gru_b_input])
43 | assert_almost_equal(result, expected_output)
44 |
--------------------------------------------------------------------------------
/tests/layers/complex_concat_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from flaky import flaky
5 | from keras.layers import Input
6 | from keras.models import Model
7 |
8 | from deep_qa.layers import ComplexConcat
9 |
10 | class TestComplexConcatLayer:
11 | def test_call_works_on_simple_input(self):
12 | input_shape = (3, 4, 5, 7)
13 | input_1 = Input(shape=input_shape[1:], dtype='float32')
14 | input_2 = Input(shape=input_shape[1:], dtype='float32')
15 | input_3 = Input(shape=input_shape[1:], dtype='float32')
16 | input_4 = Input(shape=input_shape[1:], dtype='float32')
17 | inputs = [input_1, input_2, input_3, input_4]
18 | concatenated = ComplexConcat(combination='1,2,3,4')(inputs)
19 | model = Model(inputs=inputs, outputs=[concatenated])
20 | input_1_tensor = numpy.random.rand(*input_shape)
21 | input_2_tensor = numpy.random.rand(*input_shape)
22 | input_3_tensor = numpy.random.rand(*input_shape)
23 | input_4_tensor = numpy.random.rand(*input_shape)
24 | input_tensors = [input_1_tensor, input_2_tensor, input_3_tensor, input_4_tensor]
25 | concat_tensor = model.predict(input_tensors)
26 | assert concat_tensor.shape == (3, 4, 5, 7*4)
27 | numpy.testing.assert_almost_equal(concat_tensor, numpy.concatenate(input_tensors, axis=-1))
28 |
29 | @flaky
30 | def test_call_handles_complex_combinations(self):
31 | input_shape = (3, 4, 5, 7)
32 | input_1 = Input(shape=input_shape[1:], dtype='float32')
33 | input_2 = Input(shape=input_shape[1:], dtype='float32')
34 | input_3 = Input(shape=input_shape[1:], dtype='float32')
35 | input_4 = Input(shape=input_shape[1:], dtype='float32')
36 | inputs = [input_1, input_2, input_3, input_4]
37 | concatenated = ComplexConcat(combination='1-2,2*4,3/1,4+3,3', axis=1)(inputs)
38 | model = Model(inputs=inputs, outputs=[concatenated])
39 | input_1_tensor = numpy.random.rand(*input_shape)
40 | input_2_tensor = numpy.random.rand(*input_shape)
41 | input_3_tensor = numpy.random.rand(*input_shape)
42 | input_4_tensor = numpy.random.rand(*input_shape)
43 | input_tensors = [input_1_tensor, input_2_tensor, input_3_tensor, input_4_tensor]
44 | concat_tensor = model.predict(input_tensors)
45 | assert concat_tensor.shape == (3, 4*5, 5, 7)
46 | expected_tensor = numpy.concatenate([
47 | input_1_tensor - input_2_tensor,
48 | input_2_tensor * input_4_tensor,
49 | input_3_tensor / input_1_tensor,
50 | input_4_tensor + input_3_tensor,
51 | input_3_tensor
52 | ], axis=1)
53 | numpy.testing.assert_almost_equal(concat_tensor, expected_tensor, decimal=3)
54 |
--------------------------------------------------------------------------------
/tests/layers/decomposable_attention_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from keras.layers import Input, Embedding
5 | from keras.models import Model
6 |
7 | from deep_qa.layers.entailment_models import DecomposableAttentionEntailment
8 |
9 | class TestDecomposableAttention:
10 | def test_decomposable_attention_does_not_crash(self):
11 | sentence_length = 5
12 | embedding_dim = 10
13 | vocabulary_size = 15
14 | num_sentences = 7
15 | premise_input_layer = Input(shape=(sentence_length,), dtype='int32')
16 | hypothesis_input_layer = Input(shape=(sentence_length,), dtype='int32')
17 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, mask_zero=True)
18 | embedded_premise = embedding(premise_input_layer)
19 | embedded_hypothesis = embedding(hypothesis_input_layer)
20 | entailment_layer = DecomposableAttentionEntailment()
21 | entailment_scores = entailment_layer([embedded_premise, embedded_hypothesis])
22 | model = Model(inputs=[premise_input_layer, hypothesis_input_layer], outputs=entailment_scores)
23 | premise_input = numpy.random.randint(0, vocabulary_size, (num_sentences, sentence_length))
24 | hypothesis_input = numpy.random.randint(0, vocabulary_size, (num_sentences, sentence_length))
25 | model.predict([premise_input, hypothesis_input])
26 |
--------------------------------------------------------------------------------
/tests/layers/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/encoders/__init__.py
--------------------------------------------------------------------------------
/tests/layers/noisy_or_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use, invalid-name
2 | import numpy as np
3 | from numpy.testing import assert_array_almost_equal
4 | from deep_qa.layers import NoisyOr, BetweenZeroAndOne
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 | from keras import backend as K
7 | from keras.layers import Input
8 | from keras.models import Model
9 |
10 |
11 | class TestNoisyOr(DeepQaTestCase):
12 | def test_general_case(self):
13 |
14 | input_layer = Input(shape=(3, 2,), dtype='float32', name="input")
15 | axis = 2
16 | noisy_or_layer = NoisyOr(axis=axis)
17 | output = noisy_or_layer(input_layer)
18 | model = Model([input_layer], output)
19 |
20 | # Testing general unmasked batched case.
21 | q = K.eval(noisy_or_layer.noise_parameter)
22 | batch_original_data = np.array([[[0.2, 0.1],
23 | [0.5, 0.3],
24 | [0.3, 0.7]],
25 | [[0.4, 0.55],
26 | [0.65, 0.8],
27 | [0.9, 0.15]]])
28 | batch_result = model.predict([batch_original_data])
29 | batch_desired_result = 1.0 - np.prod(1.0 - (q * batch_original_data), axis=axis)
30 | assert_array_almost_equal(batch_result, batch_desired_result)
31 |
32 | # Testing the masked case.
33 | # Here's a modified version of the batch_original_data, with extra probabilities.
34 | batch_data_with_masks = K.variable(np.array([[[0.2, 0.1, 0.7], [0.5, 0.3, 0.3], [0.3, 0.7, 0.2]],
35 | [[0.4, 0.55, 0.3], [0.65, 0.8, 0.1], [0.9, 0.15, 0.0]]]),
36 | dtype="float32")
37 | # Now here the added 3rd element is masked out, so the noisy_or probabilities resulting from the
38 | # masked version should be the same as the unmasked one (above).
39 | masks = K.variable(np.array([[[1, 1, 0], [1, 1, 0], [1, 1, 0]],
40 | [[1, 1, 0], [1, 1, 0], [1, 1, 0]]]), dtype="float32")
41 |
42 | masking_results = K.eval(noisy_or_layer.call(inputs=batch_data_with_masks, mask=masks))
43 | assert_array_almost_equal(batch_result, masking_results)
44 |
45 | def test_between_zero_and_one_constraint(self):
46 | p = K.variable(np.asarray([0.35, -0.4, 1.0, 1.2]), dtype='float32')
47 | desired_result = np.asarray([0.35, K.epsilon(), 1.0, 1.0])
48 | result = K.eval(BetweenZeroAndOne()(p))
49 | assert_array_almost_equal(result, desired_result)
50 |
--------------------------------------------------------------------------------
/tests/layers/overlap_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use
2 | import numpy
3 | from numpy.testing import assert_almost_equal
4 | import keras.backend as K
5 | from keras.layers import Input
6 | from keras.models import Model
7 |
8 | from deep_qa.layers import Overlap
9 |
10 |
11 | class TestOverlap:
12 | def test_batched_case(self):
13 | tensor_a_len = 5
14 | tensor_b_len = 4
15 |
16 | tensor_a_input = Input(shape=(tensor_a_len,),
17 | dtype='int32',
18 | name="tensor_a")
19 | tensor_b_input = Input(shape=(tensor_b_len,),
20 | dtype='int32',
21 | name="tensor_b")
22 | overlap_output = Overlap()([tensor_a_input,
23 | tensor_b_input])
24 | model = Model([tensor_a_input,
25 | tensor_b_input],
26 | overlap_output)
27 |
28 | tensor_a = numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]])
29 | tensor_b = numpy.array([[9, 4, 2, 5], [6, 1, 2, 2]])
30 | expected_output = numpy.array([[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]],
31 | [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]])
32 |
33 | # Testing the general batched case
34 | result = model.predict([tensor_a, tensor_b])
35 | assert_almost_equal(result, expected_output)
36 |
37 | def test_masked_batched_case(self):
38 | tensor_a = K.variable(numpy.array([[1, 3, 4, 8, 2], [2, 8, 1, 2, 3]]),
39 | dtype="int32")
40 | tensor_b = K.variable(numpy.array([[9, 4, 2, 5], [6, 1, 2, 2]]),
41 | dtype="int32")
42 | mask_a = K.variable(numpy.array([[1, 1, 1, 0, 0], [1, 1, 1, 1, 0]]))
43 | mask_b = K.variable(numpy.array([[1, 1, 0, 0], [1, 1, 0, 0]]))
44 | expected_output = numpy.array([[[1.0, 0.0], [1.0, 0.0],
45 | [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]],
46 | [[1.0, 0.0], [1.0, 0.0],
47 | [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]])
48 |
49 | # Testing the masked general batched case
50 | result = K.eval(Overlap()([tensor_a, tensor_b], mask=[mask_a, mask_b]))
51 | assert_almost_equal(result, expected_output)
52 |
--------------------------------------------------------------------------------
/tests/layers/test_subtract_minimum.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use
2 | import numpy as np
3 | from numpy.testing import assert_array_almost_equal
4 | from keras.layers import Input
5 | from keras.models import Model
6 | from deep_qa.layers.backend.add_mask import AddMask
7 | from deep_qa.layers.subtract_minimum import SubtractMinimum
8 | from deep_qa.testing.test_case import DeepQaTestCase
9 |
10 |
11 | class TestSubtractMinimum(DeepQaTestCase):
12 | def test_general_case(self):
13 |
14 | input_layer = Input(shape=(4, 3,), dtype='float32', name="input")
15 | subtract_minimum_layer = SubtractMinimum(axis=1)
16 | normalized_input = subtract_minimum_layer(input_layer)
17 |
18 | model = Model([input_layer], normalized_input)
19 | # Testing general unmasked 1D case.
20 | unnormalized_tensor = np.array([[[0.1, 0.1, 0.1],
21 | [0.2, 0.3, 0.4],
22 | [0.5, 0.4, 0.6],
23 | [0.5, 0.4, 0.6]]])
24 | result = model.predict([unnormalized_tensor])
25 |
26 | assert_array_almost_equal(result, np.array([[[0.0, 0.0, 0.0],
27 | [0.1, 0.2, 0.3],
28 | [0.4, 0.3, 0.5],
29 | [0.4, 0.3, 0.5]]]))
30 |
31 | # Testing masked batched case.
32 | # By setting the mast value to 0.1. should ignore this value when deciding the minimum
33 | mask_layer = AddMask(mask_value=0.1)
34 | masked_input = mask_layer(input_layer)
35 | normalized_masked_input = subtract_minimum_layer(masked_input)
36 | masking_model = Model([input_layer], normalized_masked_input)
37 |
38 | masked_result = masking_model.predict([unnormalized_tensor])
39 |
40 | assert_array_almost_equal(masked_result, np.array([[[-0.1, -0.2, -0.3],
41 | [0.0, 0.0, 0.0],
42 | [0.3, 0.1, 0.2],
43 | [0.3, 0.1, 0.2]]]))
44 |
--------------------------------------------------------------------------------
/tests/layers/tuple_alignment_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from keras.layers import Embedding, Input
4 | from keras.models import Model
5 |
6 | from deep_qa.layers.entailment_models import MultipleChoiceTupleEntailment
7 |
8 | class TestTupleAlignment:
9 | def test_tuple_alignment_does_not_crash(self):
10 | question_length = 5
11 | num_options = 4
12 | tuple_size = 3
13 | num_tuples = 7
14 | embedding_dim = 10
15 | vocabulary_size = 15
16 | batch_size = 32
17 | question_input_layer = Input(shape=(question_length,), dtype='int32')
18 | answer_input_layer = Input(shape=(num_options,), dtype='int32')
19 | knowledge_input_layer = Input(shape=(num_tuples, tuple_size), dtype='int32')
20 | # Embedding does not mask zeros
21 | embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim,
22 | mask_zero=True)
23 | embedded_question = embedding(question_input_layer)
24 | embedded_answer = embedding(answer_input_layer)
25 | embedded_knowledge = embedding(knowledge_input_layer)
26 | entailment_layer = MultipleChoiceTupleEntailment()
27 | entailment_scores = entailment_layer([embedded_knowledge, embedded_question, embedded_answer])
28 | model = Model(inputs=[knowledge_input_layer, question_input_layer, answer_input_layer],
29 | outputs=entailment_scores)
30 | model.compile(loss="mse", optimizer="sgd") # Will not train this model
31 | knowledge_input = numpy.random.randint(0, vocabulary_size, (batch_size, num_tuples, tuple_size))
32 | question_input = numpy.random.randint(0, vocabulary_size, (batch_size, question_length))
33 | answer_input = numpy.random.randint(0, vocabulary_size, (batch_size, num_options))
34 | model.predict([knowledge_input, question_input, answer_input])
35 |
--------------------------------------------------------------------------------
/tests/layers/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/layers/wrappers/__init__.py
--------------------------------------------------------------------------------
/tests/layers/wrappers/add_encoder_mask_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from deep_qa.layers.encoders import BOWEncoder
4 | from deep_qa.layers.wrappers import AddEncoderMask, OutputMask
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 | from deep_qa.training.models import DeepQaModel
7 | from keras.layers import Embedding, Input
8 |
9 |
10 | class TestAddEncoderMask(DeepQaTestCase):
11 | def test_mask_is_computed_correctly(self):
12 | background_input = Input(shape=(None, 3), dtype='int32')
13 | embedding = Embedding(input_dim=3, output_dim=2, mask_zero=True)
14 | embedded_background = embedding(background_input)
15 | encoded_background = BOWEncoder(units=2)(embedded_background)
16 | encoded_background_with_mask = AddEncoderMask()([encoded_background, embedded_background])
17 |
18 | mask_output = OutputMask()(encoded_background_with_mask)
19 | model = DeepQaModel(inputs=[background_input], outputs=mask_output)
20 |
21 | test_background = numpy.asarray([
22 | [
23 | [0, 0, 0],
24 | [2, 2, 2],
25 | [0, 0, 0],
26 | [0, 1, 2],
27 | [1, 0, 0],
28 | [0, 0, 0],
29 | [0, 1, 0],
30 | [1, 1, 1],
31 | ]
32 | ])
33 | expected_mask = numpy.asarray([[0, 1, 0, 1, 1, 0, 1, 1]])
34 | actual_mask = model.predict([test_background])
35 | numpy.testing.assert_array_equal(expected_mask, actual_mask)
36 |
--------------------------------------------------------------------------------
/tests/layers/wrappers/encoder_wrapper_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from deep_qa.layers.encoders import BOWEncoder
4 | from deep_qa.layers.wrappers import EncoderWrapper, OutputMask
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 | from deep_qa.training.models import DeepQaModel
7 | from keras.layers import Embedding, Input
8 |
9 |
10 | class TestEncoderWrapper(DeepQaTestCase):
11 | def test_mask_is_computed_correctly(self):
12 | background_input = Input(shape=(3, 3), dtype='int32')
13 | embedding = Embedding(input_dim=3, output_dim=2, mask_zero=True)
14 | embedded_background = embedding(background_input)
15 | encoded_background = EncoderWrapper(BOWEncoder(units=2))(embedded_background)
16 |
17 | mask_output = OutputMask()(encoded_background)
18 | model = DeepQaModel(inputs=[background_input], outputs=mask_output)
19 |
20 | test_background = numpy.asarray([
21 | [
22 | [0, 0, 0],
23 | [2, 2, 2],
24 | [0, 0, 0],
25 | ]
26 | ])
27 | expected_mask = numpy.asarray([[0, 1, 0]])
28 | actual_mask = model.predict([test_background])
29 | numpy.testing.assert_array_almost_equal(expected_mask, actual_mask)
30 |
--------------------------------------------------------------------------------
/tests/layers/wrappers/time_distributed_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from numpy.testing import assert_array_almost_equal
4 | from keras.layers import Input, Lambda
5 | from keras.models import Model
6 | from deep_qa.layers.wrappers import TimeDistributed
7 | from deep_qa.testing.test_case import DeepQaTestCase
8 |
9 |
10 | class TestTimeDistributed(DeepQaTestCase):
11 | def test_handles_multiple_inputs(self):
12 | input_layer_1 = Input(shape=(3, 1), dtype='int32')
13 | input_layer_2 = Input(shape=(3, 1), dtype='int32')
14 | combine_layer = Lambda(lambda x: x[0] ** x[1] + 1,
15 | output_shape=lambda x: (x[0][0], 1),
16 | name="a^b + 1 Layer")
17 | td_combine = TimeDistributed(combine_layer)
18 | output = td_combine([input_layer_1, input_layer_2])
19 | model = Model([input_layer_1, input_layer_2], output)
20 |
21 | batch_input_1 = numpy.array([[[4], [5], [6]],
22 | [[3], [3], [3]],
23 | [[0], [1], [2]]], dtype='float32')
24 | batch_input_2 = numpy.array([[[3], [2], [1]],
25 | [[1], [2], [3]],
26 | [[1], [0], [2]]], dtype='float32')
27 |
28 | expected_result = (batch_input_1 ** batch_input_2 + 1)
29 | # In TimeDistributed, we reshape tensors whose final dimension is 1, so we need to do that here.
30 | if numpy.shape(expected_result)[-1] == 1:
31 | expected_result = numpy.reshape(expected_result, numpy.shape(expected_result)[:-1])
32 | result = model.predict([batch_input_1, batch_input_2])
33 | assert_array_almost_equal(result, expected_result)
34 |
--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/__init__.py
--------------------------------------------------------------------------------
/tests/models/entailment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/entailment/__init__.py
--------------------------------------------------------------------------------
/tests/models/entailment/decomposable_attention_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | from deep_qa.common.params import Params
4 | from deep_qa.models.entailment import DecomposableAttention
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestDecomposableAttentionModel(DeepQaTestCase):
9 | def test_trains_and_loads_correctly(self):
10 | self.write_snli_files()
11 | args = Params({
12 | 'num_seq2seq_layers': 1,
13 | })
14 | self.ensure_model_trains_and_loads(DecomposableAttention, args)
15 |
--------------------------------------------------------------------------------
/tests/models/reading_comprehension/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/reading_comprehension/__init__.py
--------------------------------------------------------------------------------
/tests/models/reading_comprehension/attention_sum_reader_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | from deep_qa.common.params import Params
4 | from deep_qa.models.reading_comprehension import AttentionSumReader
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestAttentionSumReader(DeepQaTestCase):
9 | def test_train_does_not_crash_and_load_works(self):
10 | self.write_who_did_what_files()
11 | args = Params({
12 | "encoder": {
13 | "default": {
14 | "type": "bi_gru",
15 | "units": 7
16 | }
17 | },
18 | "seq2seq_encoder": {
19 | "default": {
20 | "type": "bi_gru",
21 | "encoder_params": {
22 | "units": 7
23 | },
24 | "wrapper_params": {}
25 | }
26 | },
27 | })
28 | self.ensure_model_trains_and_loads(AttentionSumReader, args)
29 |
--------------------------------------------------------------------------------
/tests/models/reading_comprehension/bidirectional_attention_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from deep_qa.common.params import Params
4 | from deep_qa.models.reading_comprehension import BidirectionalAttentionFlow
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 | from flaky import flaky
7 |
8 |
9 | class TestBidirectionalAttentionFlow(DeepQaTestCase):
10 | @flaky
11 | def test_trains_and_loads_correctly(self):
12 | self.write_span_prediction_files()
13 | args = Params({
14 | 'embeddings': {'words': {'dimension': 8}, 'characters': {'dimension': 4}},
15 | 'save_models': True,
16 | 'tokenizer': {'type': 'words and characters'},
17 | 'show_summary_with_masking_info': True,
18 | })
19 | model, _ = self.ensure_model_trains_and_loads(BidirectionalAttentionFlow, args)
20 | for layer in model.model.layers:
21 | if layer.name == 'characters_embedding':
22 | assert layer.get_output_shape_at(0)[-1] == 4
23 | break
24 | else:
25 | assert False, "couldn't find character embedding layer"
26 |
27 | def test_get_best_span(self):
28 | # Note that the best span cannot be (1, 0) since even though 0.3 * 0.5 is the greatest
29 | # value, the end span index is constrained to occur after the begin span index.
30 | span_begin_probs = numpy.array([0.1, 0.3, 0.05, 0.3, 0.25])
31 | span_end_probs = numpy.array([0.5, 0.1, 0.2, 0.05, 0.15])
32 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs,
33 | span_end_probs)
34 | assert begin_end_idxs == (1, 2)
35 |
36 | # Testing an edge case of the dynamic program here, for the order of when you update the
37 | # best previous span position. We should not get (1, 1), because that's an empty span.
38 | span_begin_probs = numpy.array([0.4, 0.5, 0.1])
39 | span_end_probs = numpy.array([0.3, 0.6, 0.1])
40 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs,
41 | span_end_probs)
42 | assert begin_end_idxs == (0, 1)
43 |
44 | # test higher-order input
45 | # Note that the best span cannot be (1, 1) since even though 0.3 * 0.5 is the greatest
46 | # value, the end span index is constrained to occur after the begin span index.
47 | span_begin_probs = numpy.array([[0.1, 0.3, 0.05, 0.3, 0.25]])
48 | span_end_probs = numpy.array([[0.1, 0.5, 0.2, 0.05, 0.15]])
49 | begin_end_idxs = BidirectionalAttentionFlow.get_best_span(span_begin_probs,
50 | span_end_probs)
51 | assert begin_end_idxs == (1, 2)
52 |
--------------------------------------------------------------------------------
/tests/models/sequence_tagging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/models/sequence_tagging/__init__.py
--------------------------------------------------------------------------------
/tests/models/sequence_tagging/simple_tagger_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from deep_qa.common.params import Params
4 | from deep_qa.models.sequence_tagging import SimpleTagger
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 |
7 |
8 | class TestSimpleTagger(DeepQaTestCase):
9 | def test_trains_and_loads_correctly(self):
10 | self.write_sequence_tagging_files()
11 | args = Params({
12 | 'save_models': True,
13 | 'show_summary_with_masking_info': True,
14 | 'instance_type': 'PreTokenizedTaggingInstance',
15 | 'tokenizer': {'processor': {'word_splitter': 'no_op'}},
16 | })
17 | self.ensure_model_trains_and_loads(SimpleTagger, args)
18 |
19 | def test_loss_function_uses_mask(self):
20 | # We're going to make sure that the loss and accuracy computations are the same for any
21 | # permutation of labels on padded tokens. If not, the loss/accuracy function is paying
22 | # attention to the labels when it shouldn't be. We're not going to test for any particular
23 | # accuracy value, just that all of them are the same - I ran this a few times by hand to be
24 | # sure that we're getting different accuracy values, depending on the initialization.
25 | self.write_sequence_tagging_files()
26 | args = Params({
27 | 'show_summary_with_masking_info': True,
28 | 'instance_type': 'PreTokenizedTaggingInstance',
29 | 'tokenizer': {'processor': {'word_splitter': 'no_op'}},
30 | })
31 | model = self.get_model(SimpleTagger, args)
32 | model.train()
33 |
34 | input_indices = [3, 2, 0, 0]
35 | labels = [[[0, 1], [1, 0], [1, 0], [1, 0]],
36 | [[0, 1], [1, 0], [1, 0], [0, 1]],
37 | [[0, 1], [1, 0], [0, 1], [1, 0]],
38 | [[0, 1], [1, 0], [0, 1], [0, 1]]]
39 | results = [model.model.evaluate(numpy.asarray([input_indices]), numpy.asarray([label]))
40 | for label in labels]
41 | loss, accuracy = zip(*results)
42 | assert len(set(loss)) == 1
43 | assert len(set(accuracy)) == 1
44 |
--------------------------------------------------------------------------------
/tests/run_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=invalid-name,no-self-use
2 | import json
3 | import os
4 |
5 | import numpy
6 | from numpy.testing import assert_almost_equal
7 | from deep_qa.run import compute_accuracy
8 | from deep_qa.run import run_model_from_file, load_model, evaluate_model
9 | from deep_qa.run import score_dataset, score_dataset_with_ensemble
10 | from deep_qa.testing.test_case import DeepQaTestCase
11 |
12 |
13 | class TestRun(DeepQaTestCase):
14 | # Our point here is mostly just to make sure the scripts don't crash.
15 | def setUp(self):
16 | super(TestRun, self).setUp()
17 | self.write_true_false_model_files()
18 | model_params = self.get_model_params({"model_class": "ClassificationModel",
19 | 'save_models': True})
20 | self.param_path = os.path.join(self.TEST_DIR, "params.json")
21 | with open(self.param_path, "w") as file_path:
22 | json.dump(model_params.as_dict(), file_path)
23 |
24 | def test_run_model_does_not_crash(self):
25 | run_model_from_file(self.param_path)
26 |
27 | def test_load_model_does_not_crash(self):
28 | run_model_from_file(self.param_path)
29 | loaded_model = load_model(self.param_path)
30 | assert loaded_model.can_train()
31 |
32 | def test_score_dataset_does_not_crash(self):
33 | run_model_from_file(self.param_path)
34 | score_dataset(self.param_path, [self.TEST_FILE])
35 |
36 | def test_evalaute_model_does_not_crash(self):
37 | run_model_from_file(self.param_path)
38 | evaluate_model(self.param_path, [self.TEST_FILE])
39 |
40 | def test_score_dataset_with_ensemble_gives_same_predictions_as_score_dataset(self):
41 | # We're just going to test something simple here: that the methods don't crash, and that we
42 | # get the same result with an ensemble of one model that we do with `score_dataset`.
43 | run_model_from_file(self.param_path)
44 | predictions, _ = score_dataset(self.param_path, [self.TEST_FILE])
45 | ensembled_predictions, _ = score_dataset_with_ensemble([self.param_path], [self.TEST_FILE])
46 | assert_almost_equal(predictions, ensembled_predictions)
47 |
48 | def test_compute_accuracy_computes_a_correct_metric(self):
49 | predictions = numpy.asarray([[.5, .5, .6], [.1, .4, .0]])
50 | labels = numpy.asarray([[1, 0, 0], [0, 1, 0]])
51 | assert compute_accuracy(predictions, labels) == .5
52 |
--------------------------------------------------------------------------------
/tests/tensors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/tensors/__init__.py
--------------------------------------------------------------------------------
/tests/tensors/backend_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | import numpy
3 | from deep_qa.tensors.backend import hardmax
4 | from deep_qa.testing.test_case import DeepQaTestCase
5 | from keras import backend as K
6 |
7 |
8 | class TestBackendTensorFunctions(DeepQaTestCase):
9 | def test_hardmax(self):
10 | batch_size = 3
11 | knowledge_length = 5
12 | unnormalized_attention = K.variable(numpy.random.rand(batch_size, knowledge_length))
13 | hardmax_output = hardmax(unnormalized_attention, knowledge_length)
14 | input_value = K.eval(unnormalized_attention)
15 | output_value = K.eval(hardmax_output)
16 | assert output_value.shape == (batch_size, knowledge_length) # pylint: disable=no-member
17 | # Assert all elements other than the ones are zeros
18 | assert numpy.count_nonzero(output_value) == batch_size
19 | # Assert the max values in all rows are ones
20 | assert numpy.all(numpy.equal(numpy.max(output_value, axis=1),
21 | numpy.ones((batch_size,))))
22 | # Assert ones are in the right places
23 | assert numpy.all(numpy.equal(numpy.argmax(output_value, axis=1),
24 | numpy.argmax(input_value, axis=1)))
25 |
--------------------------------------------------------------------------------
/tests/tensors/similarity_functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/tensors/similarity_functions/__init__.py
--------------------------------------------------------------------------------
/tests/tensors/similarity_functions/bilinear_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from numpy.testing import assert_almost_equal
5 | import keras.backend as K
6 |
7 | from deep_qa.tensors.similarity_functions.bilinear import Bilinear
8 |
9 | class TestBilinearSimilarityFunction:
10 | def test_initialize_weights_returns_correct_weight_sizes(self):
11 | bilinear = Bilinear(name='bilinear')
12 | weights = bilinear.initialize_weights(3, 3)
13 | assert isinstance(weights, list) and len(weights) == 2
14 | weight_vector, bias = weights
15 | assert K.int_shape(weight_vector) == (3, 3)
16 | assert K.int_shape(bias) == (1,)
17 |
18 | weights = bilinear.initialize_weights(2, 5)
19 | assert isinstance(weights, list) and len(weights) == 2
20 | weight_vector, bias = weights
21 | assert K.int_shape(weight_vector) == (2, 5)
22 | assert K.int_shape(bias) == (1,)
23 |
24 | def test_compute_similarity_does_a_bilinear_product(self):
25 | bilinear = Bilinear(name='bilinear')
26 | weights = numpy.asarray([[-.3, .5], [2.0, -1.0]])
27 | bilinear.weight_matrix = K.variable(weights)
28 | bilinear.bias = K.variable(numpy.asarray([.1]))
29 | a_vectors = numpy.asarray([[1, 1], [-1, -1]])
30 | b_vectors = numpy.asarray([[1, 0], [0, 1]])
31 | result = K.eval(bilinear.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
32 | assert result.shape == (2,)
33 | assert_almost_equal(result, [1.8, .6])
34 |
35 | def test_compute_similarity_works_with_higher_order_tensors(self):
36 | bilinear = Bilinear(name='bilinear')
37 | weights = numpy.random.rand(4, 7)
38 | bilinear.weight_matrix = K.variable(weights)
39 | bilinear.bias = K.variable(numpy.asarray([0]))
40 | a_vectors = numpy.random.rand(5, 4, 3, 6, 4)
41 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
42 | result = K.eval(bilinear.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
43 | assert result.shape == (5, 4, 3, 6)
44 | expected_result = numpy.dot(numpy.dot(numpy.transpose(a_vectors[3, 2, 1, 3]), weights),
45 | b_vectors[3, 2, 1, 3])
46 | assert_almost_equal(result[3, 2, 1, 3], expected_result, decimal=5)
47 |
--------------------------------------------------------------------------------
/tests/tensors/similarity_functions/cosine_similarity_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from numpy.testing import assert_almost_equal
5 | import keras.backend as K
6 |
7 | from deep_qa.tensors.similarity_functions.cosine_similarity import CosineSimilarity
8 | from deep_qa.tensors.similarity_functions.dot_product import DotProduct
9 |
10 | class TestCosineSimilarityFunction:
11 | cosine_similarity = CosineSimilarity(name='cosine_similarity')
12 | dot_product = DotProduct(name="dot_product")
13 |
14 | def test_initialize_weights_returns_empty(self):
15 | weights = self.cosine_similarity.initialize_weights(3, 3)
16 | assert isinstance(weights, list) and len(weights) == 0
17 |
18 | def test_compute_similarity_does_a_cosine_similarity(self):
19 | a_vectors = numpy.asarray([[numpy.random.random(3) for _ in range(2)]], dtype="float32")
20 | b_vectors = numpy.asarray([[numpy.random.random(3) for _ in range(2)]], dtype="float32")
21 | normed_a = K.l2_normalize(K.variable(a_vectors), axis=-1)
22 | normed_b = K.l2_normalize(K.variable(b_vectors), axis=-1)
23 | desired_result = K.eval(self.dot_product.compute_similarity(normed_a, normed_b))
24 | result = K.eval(self.cosine_similarity.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
25 | assert result.shape == (1, 2) # batch_size = 1
26 | assert numpy.all(result == desired_result)
27 |
28 | def test_compute_similarity_works_with_higher_order_tensors(self):
29 | a_vectors = numpy.random.rand(5, 4, 3, 6, 7)
30 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
31 | normed_a = K.eval(K.l2_normalize(K.variable(a_vectors), axis=-1))
32 | normed_b = K.eval(K.l2_normalize(K.variable(b_vectors), axis=-1))
33 | result = K.eval(self.cosine_similarity.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
34 | assert result.shape == (5, 4, 3, 6)
35 | assert_almost_equal(result[3, 2, 1, 3],
36 | numpy.dot(normed_a[3, 2, 1, 3], normed_b[3, 2, 1, 3]),
37 | decimal=6)
38 |
--------------------------------------------------------------------------------
/tests/tensors/similarity_functions/dot_product_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 |
3 | import numpy
4 | from numpy.testing import assert_almost_equal
5 | import keras.backend as K
6 |
7 | from deep_qa.tensors.similarity_functions.dot_product import DotProduct
8 |
9 | class TestDotProductSimilarityFunction:
10 | dot_product = DotProduct(name='dot_product')
11 | def test_initialize_weights_returns_empty(self):
12 | weights = self.dot_product.initialize_weights(3, 3)
13 | assert isinstance(weights, list) and len(weights) == 0
14 |
15 | def test_compute_similarity_does_a_dot_product(self):
16 | a_vectors = numpy.asarray([[1, 1, 1], [-1, -1, -1]])
17 | b_vectors = numpy.asarray([[1, 0, 1], [1, 0, 0]])
18 | result = K.eval(self.dot_product.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
19 | assert result.shape == (2,)
20 | assert numpy.all(result == [2, -1])
21 |
22 | def test_compute_similarity_works_with_higher_order_tensors(self):
23 | a_vectors = numpy.random.rand(5, 4, 3, 6, 7)
24 | b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
25 | result = K.eval(self.dot_product.compute_similarity(K.variable(a_vectors), K.variable(b_vectors)))
26 | assert result.shape == (5, 4, 3, 6)
27 | assert_almost_equal(result[3, 2, 1, 3],
28 | numpy.dot(a_vectors[3, 2, 1, 3], b_vectors[3, 2, 1, 3]),
29 | decimal=6)
30 |
--------------------------------------------------------------------------------
/tests/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/deep_qa/48b4340650ec70b801ec93adfdf651bde9c0546e/tests/training/__init__.py
--------------------------------------------------------------------------------
/tests/training/losses_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=invalid-name,no-self-use
2 | import numpy
3 | from numpy.testing import assert_almost_equal
4 | from keras import backend as K
5 | from deep_qa.testing.test_case import DeepQaTestCase
6 | from deep_qa.training.losses import ranking_loss, ranking_loss_with_margin
7 |
8 |
9 | class TestLosses(DeepQaTestCase):
10 | def test_ranking_loss_is_computed_correctly(self):
11 | predictions = numpy.asarray([[.1, .4, .8], [-.1, -.2, .1]])
12 | labels = numpy.asarray([[0, 0, 1], [1, 0, 0]])
13 | sigmoid = lambda x: 1.0 / (1.0 + numpy.exp(-x))
14 | expected_result = numpy.mean(-sigmoid(numpy.asarray([.8 - .4, -.1 - .1])))
15 | result = K.eval(ranking_loss(K.variable(predictions), K.variable(labels)))
16 | assert_almost_equal(expected_result, result)
17 |
18 | def test_ranking_loss_with_margin_is_computed_correctly(self):
19 | predictions = numpy.asarray([[.1, .4, .8], [-.1, -.2, .1]])
20 | labels = numpy.asarray([[0, 0, 1], [1, 0, 0]])
21 | expected_result = numpy.mean(numpy.maximum(0, numpy.asarray([1 + .4 - .8, 1 + .1 - -.1])))
22 | result = K.eval(ranking_loss_with_margin(K.variable(predictions), K.variable(labels)))
23 | assert_almost_equal(expected_result, result)
24 |
--------------------------------------------------------------------------------
/tests/training/multi_gpu_test.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=no-self-use,invalid-name
2 | from copy import deepcopy
3 |
4 | import keras.backend as K
5 | from deep_qa.common.params import Params
6 | from deep_qa.models.text_classification import ClassificationModel
7 | from deep_qa.testing.test_case import DeepQaTestCase
8 |
9 |
10 | class TestMultiGpu(DeepQaTestCase):
11 |
12 | def setUp(self):
13 | super(TestMultiGpu, self).setUp()
14 | self.write_true_false_model_files()
15 | self.args = Params({
16 | 'num_gpus': 2,
17 | })
18 |
19 | def test_model_can_train_and_load(self):
20 | self.ensure_model_trains_and_loads(ClassificationModel, self.args)
21 |
22 | def test_model_can_train_and_load_with_generator(self):
23 | args = self.args
24 |
25 | args["data_generator"] = {"dynamic_batching": True, "padding_noise": 0.4}
26 | self.ensure_model_trains_and_loads(ClassificationModel, args)
27 |
28 | def test_variables_live_on_cpu(self):
29 | model = self.get_model(ClassificationModel, self.args)
30 | model.train()
31 |
32 | trainable_variables = model.model.trainable_weights
33 | for variable in trainable_variables:
34 | # This is an odd quirk of tensorflow - the devices are actually named
35 | # slightly differently from their scopes ... (i.e != "/cpu:0")
36 | assert variable.device == "/cpu:0" or variable.device == ""
37 |
38 | def test_multi_gpu_shares_variables(self):
39 | multi_gpu_model = self.get_model(ClassificationModel, self.args)
40 |
41 | single_gpu_args = deepcopy(self.args)
42 | single_gpu_args["num_gpus"] = 1
43 | single_gpu_model = self.get_model(ClassificationModel, single_gpu_args)
44 |
45 | multi_gpu_model.train()
46 | multi_gpu_variables = [x.name for x in multi_gpu_model.model.trainable_weights]
47 |
48 | K.clear_session()
49 | single_gpu_model.train()
50 | single_gpu_variables = ["tower_0/" + x.name for x in single_gpu_model.model.trainable_weights]
51 |
52 | assert single_gpu_variables == multi_gpu_variables
53 |
--------------------------------------------------------------------------------