├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── api_docs
    ├── APIDocGeneration
    │   ├── Makefile
    │   ├── conf.py
    │   ├── index.rst
    │   ├── jtr.format.convert.rst
    │   ├── jtr.format.rst
    │   ├── jtr.load.embeddings.rst
    │   ├── jtr.load.rst
    │   ├── jtr.nn.rst
    │   ├── jtr.rst
    │   ├── jtr.util.rst
    │   ├── modules.rst
    │   └── setup.rst
    ├── How_to_apidocstrings2htmlmd
    ├── How_to_contribute.md
    ├── genindex.html
    ├── index.html
    ├── jtr.format.convert.html
    ├── jtr.format.html
    ├── jtr.html
    ├── jtr.load.embeddings.html
    ├── jtr.load.html
    ├── jtr.nn.html
    ├── jtr.util.html
    ├── modules.html
    ├── objects.inv
    ├── py-modindex.html
    ├── search.html
    ├── searchindex.js
    ├── setup.html
    ├── sources
    │   ├── index.rst.txt
    │   ├── jtr.format.convert.rst.txt
    │   ├── jtr.format.rst.txt
    │   ├── jtr.load.embeddings.rst.txt
    │   ├── jtr.load.rst.txt
    │   ├── jtr.nn.rst.txt
    │   ├── jtr.rst.txt
    │   ├── jtr.util.rst.txt
    │   ├── modules.rst.txt
    │   └── setup.rst.txt
    └── static
    │   ├── ajax-loader.gif
    │   ├── alabaster.css
    │   ├── basic.css
    │   ├── comment-bright.png
    │   ├── comment-close.png
    │   ├── comment.png
    │   ├── custom.css
    │   ├── doctools.js
    │   ├── down-pressed.png
    │   ├── down.png
    │   ├── file.png
    │   ├── jquery-3.1.0.js
    │   ├── jquery.js
    │   ├── minus.png
    │   ├── plus.png
    │   ├── pygments.css
    │   ├── searchtools.js
    │   ├── underscore-1.3.1.js
    │   ├── underscore.js
    │   ├── up-pressed.png
    │   ├── up.png
    │   └── websupport.js
├── bin
    ├── create-squad-predictions.py
    ├── jack-eval.py
    ├── jack-train.py
    ├── mmap-cli.py
    └── squad_evaluate-v1.1.py
├── conf
    ├── jack.yaml
    ├── lp
    │   ├── complex.yaml
    │   ├── complex_fb.yaml
    │   ├── complex_wn18rr.yaml
    │   ├── distmult.yaml
    │   ├── distmult_fb.yaml
    │   ├── distmult_test.yaml
    │   ├── distmult_wn18rr.yaml
    │   ├── transe.yaml
    │   ├── transe_fb.yaml
    │   └── transe_wn18rr.yaml
    ├── nli
    │   ├── cbilstm.yaml
    │   ├── dam.yaml
    │   ├── esim.yaml
    │   ├── modular_nli.yaml
    │   ├── multinli
    │   │   ├── abstract_multinli.yaml
    │   │   ├── cbilstm.yaml
    │   │   ├── dam.yaml
    │   │   └── esim.yaml
    │   └── snli
    │   │   ├── abstract_snli.yaml
    │   │   ├── cbilstm.yaml
    │   │   ├── dam.yaml
    │   │   └── esim.yaml
    └── qa
    │   ├── bidaf.yaml
    │   ├── fastqa.yaml
    │   ├── jackqa.yaml
    │   ├── modular_qa.yaml
    │   ├── squad
    │       ├── abstract_squad.yaml
    │       ├── bidaf.yaml
    │       ├── fastqa.yaml
    │       └── jackqa.yaml
    │   └── triviaqa
    │       ├── web
    │           ├── abstract_triviaqa.yaml
    │           ├── bidaf.yaml
    │           ├── fastqa.yaml
    │           └── jackqa.yaml
    │       └── wiki
    │           ├── abstract_triviaqa.yaml
    │           ├── bidaf.yaml
    │           ├── fastqa.yaml
    │           └── jackqa.yaml
├── data
    ├── CBT
    │   ├── .gitignore
    │   ├── download.sh
    │   └── snippet.jtr.json
    ├── FB15k-237
    │   ├── .gitignore
    │   ├── download.sh
    │   └── snippet.jtr.json
    ├── GloVe
    │   ├── download.sh
    │   ├── download_small.sh
    │   └── glove.the.50d.txt
    ├── LS
    │   ├── download.sh
    │   └── snippet.jtr.json
    ├── MCTest
    │   ├── .gitignore
    │   ├── download.sh
    │   └── snippet.jtr.json
    ├── MultiNLI
    │   ├── download.sh
    │   └── snippet.jtr.json
    ├── NYT
    │   ├── download.sh
    │   └── naacl2013_snippet.jtr.json
    ├── NewsQA
    │   └── download.sh
    ├── PTB
    │   └── download.sh
    ├── QAngaroo
    │   ├── instructions.md
    │   └── qangaroo2squad.py
    ├── SNLI
    │   ├── .gitignore
    │   ├── README.md
    │   ├── download.sh
    │   ├── snippet.json
    │   ├── snippet.jtr_v1.json
    │   └── snippet.jtr_v2.json
    ├── SQuAD
    │   ├── .gitignore
    │   ├── download.sh
    │   ├── snippet.json
    │   └── snippet.jtr.json
    ├── TBD
    │   ├── SemEval2017Task10
    │   │   ├── S0022311514005480.ann
    │   │   └── S0022311514005480.txt
    │   ├── StoryCloze
    │   │   ├── debug_shuffled.tsv
    │   │   ├── dev_shuffled.tsv
    │   │   ├── test_shuffled.tsv
    │   │   └── train_shuffled.tsv
    │   ├── TACKBP
    │   │   └── tackbp_snippet.json
    │   └── scienceQA
    │   │   ├── scienceQA_cloze_snippet.json
    │   │   └── scienceQA_cloze_snippet.txt
    ├── WN18
    │   ├── download.sh
    │   ├── snippet.jtr
    │   └── snippet.txt
    ├── WN18RR
    │   └── download.sh
    ├── emoji2vec
    │   ├── download.sh
    │   └── visualize.py
    ├── rc-data
    │   ├── .gitignore
    │   ├── README.md
    │   ├── cnn_snippet.jtr.json
    │   └── post_download.sh
    ├── sentihood
    │   ├── download.sh
    │   ├── sentihood-dev.json
    │   ├── sentihood-test.json
    │   ├── sentihood-train.json
    │   └── single_jtr.json
    ├── simpleQuestions
    │   ├── README
    │   ├── download.sh
    │   ├── snippet.jtr.json
    │   └── snippet.txt
    ├── triviaqa
    │   ├── README
    │   ├── config.py
    │   ├── convert2jack.py
    │   └── download.sh
    └── word2vec
    │   └── download.sh
├── docs
    ├── CLI.md
    ├── Encoder_Modules.md
    ├── Formats_for_Embeddings.md
    ├── How_to_test.md
    ├── TensorPorts.md
    └── Understanding_Jack_the_Reader.md
├── jack
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── data_structures.py
    │   ├── input_module.py
    │   ├── model_module.py
    │   ├── output_module.py
    │   ├── reader.py
    │   ├── shared_resources.py
    │   ├── tensorflow.py
    │   ├── tensorport.py
    │   └── torch.py
    ├── eval
    │   ├── __init__.py
    │   ├── base.py
    │   ├── classification.py
    │   ├── extractive_qa.py
    │   ├── link_prediction.py
    │   └── output_schema.json
    ├── io
    │   ├── CBT2jtr.py
    │   ├── FB15K2jtr.py
    │   ├── MCTest2jtr.py
    │   ├── NYT2jtr.py
    │   ├── SNLI2jtr.py
    │   ├── SNLI2jtr_concat.py
    │   ├── SQuAD2jtr.py
    │   ├── WN182jtr.py
    │   ├── __init__.py
    │   ├── bAbI2JTR.py
    │   ├── dataset_schema.json
    │   ├── embeddings
    │   │   ├── __init__.py
    │   │   ├── embeddings.py
    │   │   ├── fasttext.py
    │   │   ├── glove.py
    │   │   ├── memory_map.py
    │   │   └── word_to_vec.py
    │   ├── load.py
    │   ├── ls2jtr.py
    │   ├── merge_JTR_data_files.py
    │   ├── multiNLI2jtr.py
    │   ├── newsqa2jtr.py
    │   ├── newsqa2squad.py
    │   ├── rc-data2jtr.py
    │   ├── read_semeval2017Task10.py
    │   ├── scienceQA2jtr.py
    │   ├── sentihood2jtr.py
    │   ├── simpleQuestions2jtr.py
    │   └── validate.py
    ├── readers
    │   ├── __init__.py
    │   ├── classification
    │   │   ├── __init__.py
    │   │   ├── shared.py
    │   │   └── util.py
    │   ├── extractive_qa
    │   │   ├── __init__.py
    │   │   ├── shared.py
    │   │   ├── tensorflow
    │   │   │   ├── __init__.py
    │   │   │   ├── abstract_model.py
    │   │   │   ├── answer_layer.py
    │   │   │   ├── fastqa.py
    │   │   │   └── modular_qa_model.py
    │   │   ├── torch
    │   │   │   ├── __init__.py
    │   │   │   └── fastqa.py
    │   │   └── util.py
    │   ├── implementations.py
    │   ├── link_prediction
    │   │   ├── __init__.py
    │   │   ├── models.py
    │   │   ├── scores.py
    │   │   └── similarities.py
    │   └── natural_language_inference
    │   │   ├── __init__.py
    │   │   ├── conditional_bilstm.py
    │   │   ├── decomposable_attention.py
    │   │   ├── modular_nli_model.py
    │   │   └── prediction_layer.py
    ├── train_reader.py
    └── util
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── hooks.py
    │   ├── map.py
    │   ├── preprocessing.py
    │   ├── random.py
    │   ├── tf
    │       ├── __init__.py
    │       ├── activations.py
    │       ├── attention.py
    │       ├── dropout.py
    │       ├── embedding.py
    │       ├── highway.py
    │       ├── interaction_layer.py
    │       ├── masking.py
    │       ├── misc.py
    │       ├── modular_encoder.py
    │       ├── pairwise_losses.py
    │       ├── rnn.py
    │       ├── segment.py
    │       ├── sequence_encoder.py
    │       ├── simple.py
    │       └── xqa.py
    │   ├── torch
    │       ├── __init__.py
    │       ├── embedding.py
    │       ├── highway.py
    │       ├── misc.py
    │       ├── rnn.py
    │       ├── segment.py
    │       └── xqa.py
    │   └── vocab.py
├── notebooks
    ├── model_implementation.ipynb
    ├── model_training.ipynb
    ├── prettyprint.py
    └── quick_start.ipynb
├── projects
    └── knowledge_integration
    │   ├── README.md
    │   ├── __init__.py
    │   ├── conf
    │       ├── nli
    │       │   ├── multinli
    │       │   │   └── cbilstm_assertion.yaml
    │       │   └── snli
    │       │   │   └── cbilstm_assertion.yaml
    │       └── qa
    │       │   ├── bilstm_assertion.yaml
    │       │   ├── squad
    │       │       ├── bilstm_assertion.yaml
    │       │       └── bilstm_assertion_definition.yaml
    │       │   └── triviaqa
    │       │       ├── web
    │       │           ├── bilstm_assertion.yaml
    │       │           └── bilstm_assertion_definition.yaml
    │       │       └── wiki
    │       │           ├── bilstm_assertion.yaml
    │       │           └── bilstm_assertion_definition.yaml
    │   ├── knowledge_store.py
    │   ├── nli.py
    │   ├── qa
    │       ├── __init__.py
    │       ├── definition_model.py
    │       └── shared.py
    │   ├── readers.py
    │   ├── scripts
    │       ├── __init__.py
    │       ├── extract_conceptnet.py
    │       ├── extract_side_information_for_dataset.py
    │       └── extract_wikipedia_short_abstract.py
    │   ├── shared.py
    │   └── tfutil.py
├── pytest.ini
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── conftest.py
    ├── jack
    │   ├── debug
    │   │   └── test_debug.py
    │   ├── eval
    │   │   └── test_kbp_eval.py
    │   ├── load
    │   │   └── test_loaders.py
    │   ├── preprocess
    │   │   ├── test_batch.py
    │   │   ├── test_map.py
    │   │   └── test_vocab_prune.py
    │   ├── readers
    │   │   ├── extractive_qa
    │   │   │   └── test_util.py
    │   │   ├── multiple_choice
    │   │   │   └── test_simple_mcqa.py
    │   │   ├── test_fastqa.py
    │   │   ├── test_fastqa_loop.py
    │   │   ├── test_kbp.py
    │   │   ├── test_models.py
    │   │   ├── test_readers.py
    │   │   └── test_serialization.py
    │   ├── test_core.py
    │   └── test_embeddings.py
    ├── test_conf
    │   ├── dam_test.yaml
    │   ├── fastqa_test.yaml
    │   └── snli_small_adagrad_test.yaml
    ├── test_data
    │   ├── MultiNLI
    │   │   ├── 1000_samples_dev_jtr.json
    │   │   ├── 2000_samples_train_jtr.json
    │   │   └── overfit.json
    │   ├── SNLI
    │   │   ├── 1000_samples_dev_jtr_v1.json
    │   │   ├── 1000_samples_snli_1.0_train.jsonl
    │   │   ├── 2000_samples_test_jtr_v1.json
    │   │   ├── 2000_samples_train_jtr_v1.json
    │   │   ├── dev.json
    │   │   ├── overfit.json
    │   │   ├── test.json
    │   │   └── train.json
    │   ├── WN18
    │   │   └── wn18-snippet.jack.json
    │   ├── glove.500.50d.txt
    │   ├── glove.840B.300d_top256.txt
    │   ├── sentihood
    │   │   ├── overfit.json
    │   │   ├── sentihood-dev.json
    │   │   ├── sentihood-test.json
    │   │   └── sentihood-train.json
    │   ├── snli.json
    │   ├── snli_1k.json
    │   ├── snli_3k.json
    │   ├── squad
    │   │   ├── dev.json
    │   │   ├── overfit.json
    │   │   ├── snippet_jtr.json
    │   │   ├── test.json
    │   │   └── train.json
    │   └── wiki.json
    ├── test_readme.py
    └── test_results
    │   ├── dam_test
    │       ├── checkpoint
    │       ├── model_module.data-00000-of-00001
    │       ├── model_module.index
    │       ├── model_module.meta
    │       └── shared_resources
    │       │   ├── answer_vocab
    │       │   ├── config.yaml
    │       │   ├── remainder
    │       │   └── vocab
    │   ├── fastqa_test
    │       ├── checkpoint
    │       ├── model_module.data-00000-of-00001
    │       ├── model_module.index
    │       ├── model_module.meta
    │       └── shared_resources
    │       │   ├── config.yaml
    │       │   ├── embeddings
    │       │       └── config.yaml
    │       │   ├── remainder
    │       │   └── vocab
    │   ├── overfit_test
    │       ├── SNLI
    │       │   ├── dam
    │       │   │   └── expected_results.txt
    │       │   └── esim
    │       │   │   └── expected_results.txt
    │       └── squad
    │       │   └── fastqa
    │       │       └── expected_results.txt
    │   ├── rename_recursively.py
    │   └── smalldata_test
    │       ├── SNLI
    │           ├── dam
    │           │   └── expected_results.txt
    │           └── esim
    │           │   └── expected_results.txt
    │       └── squad
    │           └── fastqa
    │               └── expected_results.txt
└── wercker.yml


/.gitignore:
--------------------------------------------------------------------------------
  1 | # OSX specific
  2 | .DS_Store
  3 | __MACOSX
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *,cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # IPython Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # Idea
 96 | .idea/
 97 | 
 98 | # TensorBoard dirs
 99 | .tb
100 | 
101 | # Vim buffer files
102 | *.swp
103 | 
104 | # Test result files
105 | testresult_*
106 | 
107 | # Snippets
108 | snippets/
109 | 
110 | # Datasets
111 | data/WN18/*.tgz
112 | data/WN18/wordnet-mlj12
113 | tests/test_results/fastqa_reader_test/
114 | tests/test_results/dam_reader_test/
115 | data/GloVe/glove.840B.300d.*
116 | data/MultiNLI/multinli_1.0/
117 | 
118 | data/WN18/train.jtr
119 | data/WN18/valid.jtr
120 | data/WN18/test.jtr
121 | data/WN18/wn18*
122 | 
123 | data/WN18RR/test.jtr
124 | data/WN18RR/test.txt
125 | data/WN18RR/train.jtr
126 | data/WN18RR/train.txt
127 | data/WN18RR/valid.jtr
128 | data/WN18RR/valid.txt
129 | data/WN18RR/wn18.tgz
130 | 
131 | saved_reader/
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 UCL Machine Reading
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # simple makefile to simplify repetitive build env management tasks under posix
 2 | PYTHON := python3
 3 | PIP := $(PYTHON) -m pip
 4 | PYTEST := $(PYTHON) -m pytest
 5 | 
 6 | init:
 7 | 	$(PIP) install -r requirements.txt
 8 | install:
 9 | 	$(PYTHON) setup.py install
10 | install-develop:
11 | 	$(PYTHON) setup.py develop
12 | install-user:
13 | 	$(PYTHON) setup.py install --user
14 | clean:
15 | 	$(PYTHON) setup.py clean --all
16 | unittest:
17 | 	$(PYTEST) tests -v -m "not (overfit or smalldata)" -k "not test_pipeline"	
18 | test:
19 | 	$(PYTEST) tests -v -m "not (smalldata)"
20 | 
21 | # FIXME: this should probably be test-overfit rather than overfit
22 | overfit:
23 | 	$(PYTEST) tests -v -m "overfit"
24 | smalldata:
25 | 	$(PYTEST) tests -v -m "smalldata"
26 | 
27 | SNLI:
28 | 	$(PYTEST) tests -v -m SNLI
29 | doctests:
30 | 	$(PYTEST) --doctest-modules jtr/preprocess/vocab.py
31 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = jtr
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/index.rst:
--------------------------------------------------------------------------------
 1 | .. jtr documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan  9 17:30:20 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to jtr's documentation!
 7 | ==================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 4
11 |    :caption: Contents:
12 |    
13 |    modules
14 | 
15 | 
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.format.convert.rst:
--------------------------------------------------------------------------------
 1 | jtr.format.convert package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.format.convert.nyt module
 8 | -----------------------------
 9 | 
10 | .. automodule:: jtr.format.convert.nyt
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.format.convert.squad module
16 | -------------------------------
17 | 
18 | .. automodule:: jtr.format.convert.squad
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: jtr.format.convert
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.format.rst:
--------------------------------------------------------------------------------
 1 | jtr.format package
 2 | ==================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     jtr.format.convert
10 | 
11 | Submodules
12 | ----------
13 | 
14 | jtr.format.validate module
15 | --------------------------
16 | 
17 | .. automodule:: jtr.format.validate
18 |     :members:
19 |     :undoc-members:
20 |     :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: jtr.format
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.load.embeddings.rst:
--------------------------------------------------------------------------------
 1 | jtr.load.embeddings package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.load.embeddings.embeddings module
 8 | -------------------------------------
 9 | 
10 | .. automodule:: jtr.load.embeddings.embeddings
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.load.embeddings.glove module
16 | --------------------------------
17 | 
18 | .. automodule:: jtr.load.embeddings.glove
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | jtr.load.embeddings.vocabulary module
24 | -------------------------------------
25 | 
26 | .. automodule:: jtr.load.embeddings.vocabulary
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | jtr.load.embeddings.word_to_vec module
32 | --------------------------------------
33 | 
34 | .. automodule:: jtr.load.embeddings.word_to_vec
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: jtr.load.embeddings
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.load.rst:
--------------------------------------------------------------------------------
  1 | jtr.load package
  2 | ================
  3 | 
  4 | Subpackages
  5 | -----------
  6 | 
  7 | .. toctree::
  8 | 
  9 |     jtr.load.embeddings
 10 | 
 11 | Submodules
 12 | ----------
 13 | 
 14 | jtr.load.FB15K2jtr module
 15 | -------------------------
 16 | 
 17 | .. automodule:: jtr.load.FB15K2jtr
 18 |     :members:
 19 |     :undoc-members:
 20 |     :show-inheritance:
 21 | 
 22 | jtr.load.NYT2jtr module
 23 | -----------------------
 24 | 
 25 | .. automodule:: jtr.load.NYT2jtr
 26 |     :members:
 27 |     :undoc-members:
 28 |     :show-inheritance:
 29 | 
 30 | jtr.load.SNLI2jtr_v1 module
 31 | ---------------------------
 32 | 
 33 | .. automodule:: jtr.load.SNLI2jtr_v1
 34 |     :members:
 35 |     :undoc-members:
 36 |     :show-inheritance:
 37 | 
 38 | jtr.load.SNLI2jtr_v2 module
 39 | ---------------------------
 40 | 
 41 | .. automodule:: jtr.load.SNLI2jtr_v2
 42 |     :members:
 43 |     :undoc-members:
 44 |     :show-inheritance:
 45 | 
 46 | jtr.load.create_dummpy_scienceQA module
 47 | ---------------------------------------
 48 | 
 49 | .. automodule:: jtr.load.create_dummpy_scienceQA
 50 |     :members:
 51 |     :undoc-members:
 52 |     :show-inheritance:
 53 | 
 54 | jtr.load.ls2jtr module
 55 | ----------------------
 56 | 
 57 | .. automodule:: jtr.load.ls2jtr
 58 |     :members:
 59 |     :undoc-members:
 60 |     :show-inheritance:
 61 | 
 62 | jtr.load.read_jtr module
 63 | ------------------------
 64 | 
 65 | .. automodule:: jtr.load.read_jtr
 66 |     :members:
 67 |     :undoc-members:
 68 |     :show-inheritance:
 69 | 
 70 | jtr.load.read_semeval2017Task10 module
 71 | --------------------------------------
 72 | 
 73 | .. automodule:: jtr.load.read_semeval2017Task10
 74 |     :members:
 75 |     :undoc-members:
 76 |     :show-inheritance:
 77 | 
 78 | jtr.load.scienceQA2jtr module
 79 | -----------------------------
 80 | 
 81 | .. automodule:: jtr.load.scienceQA2jtr
 82 |     :members:
 83 |     :undoc-members:
 84 |     :show-inheritance:
 85 | 
 86 | jtr.load.sentihood2jtr module
 87 | -----------------------------
 88 | 
 89 | .. automodule:: jtr.load.sentihood2jtr
 90 |     :members:
 91 |     :undoc-members:
 92 |     :show-inheritance:
 93 | 
 94 | jtr.load.simpleQuestions2jtr module
 95 | -----------------------------------
 96 | 
 97 | .. automodule:: jtr.load.simpleQuestions2jtr
 98 |     :members:
 99 |     :undoc-members:
100 |     :show-inheritance:
101 | 
102 | 
103 | Module contents
104 | ---------------
105 | 
106 | .. automodule:: jtr.load
107 |     :members:
108 |     :undoc-members:
109 |     :show-inheritance:
110 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.nn.rst:
--------------------------------------------------------------------------------
 1 | jtr.nn package
 2 | ==============
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.nn.core_models module
 8 | -------------------------
 9 | 
10 | .. automodule:: jtr.nn.core_models
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.nn.models module
16 | --------------------
17 | 
18 | .. automodule:: jtr.nn.models
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: jtr.nn
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.rst:
--------------------------------------------------------------------------------
 1 | jtr package
 2 | ===========
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     jtr.format
10 |     jtr.load
11 |     jtr.nn
12 |     jtr.util
13 | 
14 | Submodules
15 | ----------
16 | 
17 | jtr.pairwise_losses module
18 | --------------------------
19 | 
20 | .. automodule:: jtr.pairwise_losses
21 |     :members:
22 |     :undoc-members:
23 |     :show-inheritance:
24 | 
25 | jtr.pipelines module
26 | --------------------
27 | 
28 | .. automodule:: jtr.pipelines
29 |     :members:
30 |     :undoc-members:
31 |     :show-inheritance:
32 | 
33 | jtr.train module
34 | ----------------
35 | 
36 | .. automodule:: jtr.train
37 |     :members:
38 |     :undoc-members:
39 |     :show-inheritance:
40 | 
41 | jtr.training_pipeline module
42 | ----------------------------
43 | 
44 | .. automodule:: jtr.training_pipeline
45 |     :members:
46 |     :undoc-members:
47 |     :show-inheritance:
48 | 
49 | 
50 | Module contents
51 | ---------------
52 | 
53 | .. automodule:: jtr
54 |     :members:
55 |     :undoc-members:
56 |     :show-inheritance:
57 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/jtr.util.rst:
--------------------------------------------------------------------------------
 1 | jtr.util package
 2 | ================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.util.gen_data module
 8 | ------------------------
 9 | 
10 | .. automodule:: jtr.util.gen_data
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.util.hooks module
16 | ---------------------
17 | 
18 | .. automodule:: jtr.util.hooks
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | jtr.util.rs module
24 | ------------------
25 | 
26 | .. automodule:: jtr.util.rs
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | jtr.util.tfutil module
32 | ----------------------
33 | 
34 | .. automodule:: jtr.util.tfutil
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | jtr.util.util module
40 | --------------------
41 | 
42 | .. automodule:: jtr.util.util
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: jtr.util
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/modules.rst:
--------------------------------------------------------------------------------
1 | jtr
2 | ======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    jtr
8 |    setup
9 | 


--------------------------------------------------------------------------------
/api_docs/APIDocGeneration/setup.rst:
--------------------------------------------------------------------------------
1 | setup module
2 | ============
3 | 
4 | .. automodule:: setup
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/api_docs/How_to_apidocstrings2htmlmd:
--------------------------------------------------------------------------------
 1 | # How to Generate New Docs From Scratch
 2 | Install [Sphinx](http://www.sphinx-doc.org/en/1.5.1/install.html) and its requirements. Then execute the steps below as taken from [this github issue](Ahttp://stackoverflow.com/questions/20354768/python-sphinx-how-to-document-one-file-with-functions)
 3 | 
 4 | Here is a step-by-step list:
 5 | 
 6 | 1. Create documentation folder: `mkdir doc`
 7 | 2. Enter doc/: `cd doc`
 8 | 3. Execute sphinx-quickstart (Be sure to select autodoc: y, Makefile: y)
 9 | 4. Edit conf.py to specify sys.path: `sys.path.insert(0, os.path.abspath('..'))`
10 | 5. Edit index.rst and specify modules in the toctree:
11 | ```
12 | .. toctree::
13 |     :maxdepth: 2
14 | 
15 |     modules
16 | ```
17 | 6. Execute sphinx-apidoc -o . ..
18 | 7. Generate the html output: make html
19 | 8. View your documentation: firefox _build/html/index.html
20 | 


--------------------------------------------------------------------------------
/api_docs/How_to_contribute.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | In the following, you can see a simple baseline model for JTR:
 4 | 
 5 | TODO: describe nvocab, **options
 6 | 
 7 | ```python
 8 | def boe_nosupport_cands_reader_model(placeholders, nvocab, **options):
 9 |     """
10 |     Bag of embedding reader with pairs of (question, support) and candidates
11 |     """
12 | 
13 |     # Model
14 |     # [batch_size, max_seq1_length]
15 |     question = placeholders['question']
16 | 
17 |     # [batch_size, candidate_size]
18 |     targets = placeholders['targets']
19 | 
20 |     # [batch_size, max_num_cands]
21 |     candidates = placeholders['candidates']
22 | 
23 |     with tf.variable_scope("embedders") as varscope:
24 |         question_embedded = nvocab(question)
25 |         varscope.reuse_variables()
26 |         candidates_embedded = nvocab(candidates)
27 | 
28 |     logger.info('TRAINABLE VARIABLES (only embeddings): {}'.format(get_total_trainable_variables()))
29 |     question_encoding = tf.reduce_sum(question_embedded, 1)
30 | 
31 |     scores = logits = tf.reduce_sum(tf.expand_dims(question_encoding, 1) * candidates_embedded, 2)
32 |     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(scores, targets), name='predictor_loss')
33 |     predict = tf.arg_max(tf.nn.softmax(logits), 1, name='prediction')
34 | 
35 |     logger.info('TRAINABLE VARIABLES (embeddings + model): {}'.format(get_total_trainable_variables()))
36 |     logger.info('ALL VARIABLES (embeddings + model): {}'.format(get_total_variables()))
37 | 
38 |     return logits, loss, predict
39 | ```
40 | 


--------------------------------------------------------------------------------
/api_docs/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/objects.inv


--------------------------------------------------------------------------------
/api_docs/sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | .. jtr documentation master file, created by
 2 |    sphinx-quickstart on Mon Jan  9 17:30:20 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to jtr's documentation!
 7 | ==================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 4
11 |    :caption: Contents:
12 |    
13 |    modules
14 | 
15 | 
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.format.convert.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr.format.convert package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.format.convert.nyt module
 8 | -----------------------------
 9 | 
10 | .. automodule:: jtr.format.convert.nyt
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.format.convert.squad module
16 | -------------------------------
17 | 
18 | .. automodule:: jtr.format.convert.squad
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: jtr.format.convert
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.format.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr.format package
 2 | ==================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     jtr.format.convert
10 | 
11 | Submodules
12 | ----------
13 | 
14 | jtr.format.validate module
15 | --------------------------
16 | 
17 | .. automodule:: jtr.format.validate
18 |     :members:
19 |     :undoc-members:
20 |     :show-inheritance:
21 | 
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: jtr.format
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.load.embeddings.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr.load.embeddings package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.load.embeddings.embeddings module
 8 | -------------------------------------
 9 | 
10 | .. automodule:: jtr.load.embeddings.embeddings
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.load.embeddings.glove module
16 | --------------------------------
17 | 
18 | .. automodule:: jtr.load.embeddings.glove
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | jtr.load.embeddings.vocabulary module
24 | -------------------------------------
25 | 
26 | .. automodule:: jtr.load.embeddings.vocabulary
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | jtr.load.embeddings.word_to_vec module
32 | --------------------------------------
33 | 
34 | .. automodule:: jtr.load.embeddings.word_to_vec
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: jtr.load.embeddings
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.load.rst.txt:
--------------------------------------------------------------------------------
  1 | jtr.load package
  2 | ================
  3 | 
  4 | Subpackages
  5 | -----------
  6 | 
  7 | .. toctree::
  8 | 
  9 |     jtr.load.embeddings
 10 | 
 11 | Submodules
 12 | ----------
 13 | 
 14 | jtr.load.FB15K2jtr module
 15 | -------------------------
 16 | 
 17 | .. automodule:: jtr.load.FB15K2jtr
 18 |     :members:
 19 |     :undoc-members:
 20 |     :show-inheritance:
 21 | 
 22 | jtr.load.NYT2jtr module
 23 | -----------------------
 24 | 
 25 | .. automodule:: jtr.load.NYT2jtr
 26 |     :members:
 27 |     :undoc-members:
 28 |     :show-inheritance:
 29 | 
 30 | jtr.load.SNLI2jtr_v1 module
 31 | ---------------------------
 32 | 
 33 | .. automodule:: jtr.load.SNLI2jtr_v1
 34 |     :members:
 35 |     :undoc-members:
 36 |     :show-inheritance:
 37 | 
 38 | jtr.load.SNLI2jtr_v2 module
 39 | ---------------------------
 40 | 
 41 | .. automodule:: jtr.load.SNLI2jtr_v2
 42 |     :members:
 43 |     :undoc-members:
 44 |     :show-inheritance:
 45 | 
 46 | jtr.load.create_dummpy_scienceQA module
 47 | ---------------------------------------
 48 | 
 49 | .. automodule:: jtr.load.create_dummpy_scienceQA
 50 |     :members:
 51 |     :undoc-members:
 52 |     :show-inheritance:
 53 | 
 54 | jtr.load.ls2jtr module
 55 | ----------------------
 56 | 
 57 | .. automodule:: jtr.load.ls2jtr
 58 |     :members:
 59 |     :undoc-members:
 60 |     :show-inheritance:
 61 | 
 62 | jtr.load.read_jtr module
 63 | ------------------------
 64 | 
 65 | .. automodule:: jtr.load.read_jtr
 66 |     :members:
 67 |     :undoc-members:
 68 |     :show-inheritance:
 69 | 
 70 | jtr.load.read_semeval2017Task10 module
 71 | --------------------------------------
 72 | 
 73 | .. automodule:: jtr.load.read_semeval2017Task10
 74 |     :members:
 75 |     :undoc-members:
 76 |     :show-inheritance:
 77 | 
 78 | jtr.load.scienceQA2jtr module
 79 | -----------------------------
 80 | 
 81 | .. automodule:: jtr.load.scienceQA2jtr
 82 |     :members:
 83 |     :undoc-members:
 84 |     :show-inheritance:
 85 | 
 86 | jtr.load.sentihood2jtr module
 87 | -----------------------------
 88 | 
 89 | .. automodule:: jtr.load.sentihood2jtr
 90 |     :members:
 91 |     :undoc-members:
 92 |     :show-inheritance:
 93 | 
 94 | jtr.load.simpleQuestions2jtr module
 95 | -----------------------------------
 96 | 
 97 | .. automodule:: jtr.load.simpleQuestions2jtr
 98 |     :members:
 99 |     :undoc-members:
100 |     :show-inheritance:
101 | 
102 | 
103 | Module contents
104 | ---------------
105 | 
106 | .. automodule:: jtr.load
107 |     :members:
108 |     :undoc-members:
109 |     :show-inheritance:
110 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.nn.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr.nn package
 2 | ==============
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.nn.core_models module
 8 | -------------------------
 9 | 
10 | .. automodule:: jtr.nn.core_models
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.nn.models module
16 | --------------------
17 | 
18 | .. automodule:: jtr.nn.models
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: jtr.nn
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr package
 2 | ===========
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     jtr.format
10 |     jtr.load
11 |     jtr.nn
12 |     jtr.util
13 | 
14 | Submodules
15 | ----------
16 | 
17 | jtr.pairwise_losses module
18 | --------------------------
19 | 
20 | .. automodule:: jtr.pairwise_losses
21 |     :members:
22 |     :undoc-members:
23 |     :show-inheritance:
24 | 
25 | jtr.pipelines module
26 | --------------------
27 | 
28 | .. automodule:: jtr.pipelines
29 |     :members:
30 |     :undoc-members:
31 |     :show-inheritance:
32 | 
33 | jtr.train module
34 | ----------------
35 | 
36 | .. automodule:: jtr.train
37 |     :members:
38 |     :undoc-members:
39 |     :show-inheritance:
40 | 
41 | jtr.training_pipeline module
42 | ----------------------------
43 | 
44 | .. automodule:: jtr.training_pipeline
45 |     :members:
46 |     :undoc-members:
47 |     :show-inheritance:
48 | 
49 | 
50 | Module contents
51 | ---------------
52 | 
53 | .. automodule:: jtr
54 |     :members:
55 |     :undoc-members:
56 |     :show-inheritance:
57 | 


--------------------------------------------------------------------------------
/api_docs/sources/jtr.util.rst.txt:
--------------------------------------------------------------------------------
 1 | jtr.util package
 2 | ================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | jtr.util.gen_data module
 8 | ------------------------
 9 | 
10 | .. automodule:: jtr.util.gen_data
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | jtr.util.hooks module
16 | ---------------------
17 | 
18 | .. automodule:: jtr.util.hooks
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | jtr.util.rs module
24 | ------------------
25 | 
26 | .. automodule:: jtr.util.rs
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | jtr.util.tfutil module
32 | ----------------------
33 | 
34 | .. automodule:: jtr.util.tfutil
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | jtr.util.util module
40 | --------------------
41 | 
42 | .. automodule:: jtr.util.util
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: jtr.util
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/api_docs/sources/modules.rst.txt:
--------------------------------------------------------------------------------
1 | jtr
2 | ======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    jtr
8 |    setup
9 | 


--------------------------------------------------------------------------------
/api_docs/sources/setup.rst.txt:
--------------------------------------------------------------------------------
1 | setup module
2 | ============
3 | 
4 | .. automodule:: setup
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/api_docs/static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/ajax-loader.gif


--------------------------------------------------------------------------------
/api_docs/static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/comment-bright.png


--------------------------------------------------------------------------------
/api_docs/static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/comment-close.png


--------------------------------------------------------------------------------
/api_docs/static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/comment.png


--------------------------------------------------------------------------------
/api_docs/static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 | 


--------------------------------------------------------------------------------
/api_docs/static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/down-pressed.png


--------------------------------------------------------------------------------
/api_docs/static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/down.png


--------------------------------------------------------------------------------
/api_docs/static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/file.png


--------------------------------------------------------------------------------
/api_docs/static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/minus.png


--------------------------------------------------------------------------------
/api_docs/static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/plus.png


--------------------------------------------------------------------------------
/api_docs/static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/up-pressed.png


--------------------------------------------------------------------------------
/api_docs/static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/api_docs/static/up.png


--------------------------------------------------------------------------------
/bin/create-squad-predictions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | import logging
 6 | import os
 7 | import sys
 8 | 
 9 | import tensorflow as tf
10 | 
11 | from jack.io.load import loaders
12 | from jack.readers.implementations import reader_from_file
13 | 
14 | logger = logging.getLogger(os.path.basename(sys.argv[0]))
15 | logging.basicConfig(level=logging.INFO)
16 | 
17 | tf.app.flags.DEFINE_string('dataset', None, 'dataset file')
18 | tf.app.flags.DEFINE_string('loader', 'squad', 'either squad or jack')
19 | tf.app.flags.DEFINE_string('load_dir', None, 'directory to saved model')
20 | tf.app.flags.DEFINE_string('out', "results.json", 'Result file path.')
21 | tf.app.flags.DEFINE_integer('batch_size', 64, 'batch size')
22 | tf.app.flags.DEFINE_string('overwrite', '{}', 'json string that can overwrite configuration.')
23 | 
24 | FLAGS = tf.app.flags.FLAGS
25 | 
26 | logger.info("Creating and loading reader from {}...".format(FLAGS.load_dir))
27 | config = {"max_support_length": None}
28 | config.update(json.loads(FLAGS.overwrite))
29 | reader = reader_from_file(FLAGS.load_dir, **config)
30 | 
31 | dataset = loaders[FLAGS.loader](FLAGS.file)
32 | 
33 | logger.info("Start!")
34 | answers = reader.process_dataset(dataset, FLAGS.batch_size, silent=False)
35 | results = {dataset[i][0].id: a.text for i, a in enumerate(answers)}
36 | with open(FLAGS.out, "w") as out_file:
37 |     json.dump(results, out_file)
38 | 
39 | logger.info("Done!")
40 | 


--------------------------------------------------------------------------------
/bin/jack-eval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | import logging
 6 | import os
 7 | import sys
 8 | 
 9 | import tensorflow as tf
10 | 
11 | from jack.eval import evaluate_reader, pretty_print_results
12 | from jack.io.load import loaders
13 | from jack.readers import reader_from_file
14 | 
15 | logger = logging.getLogger(os.path.basename(sys.argv[0]))
16 | logging.basicConfig(level=logging.INFO)
17 | 
18 | tf.app.flags.DEFINE_string('dataset', None, 'dataset file')
19 | tf.app.flags.DEFINE_string('loader', 'jack', 'name of loader')
20 | tf.app.flags.DEFINE_string('load_dir', None, 'directory to saved model')
21 | tf.app.flags.DEFINE_integer('batch_size', 64, 'batch size')
22 | tf.app.flags.DEFINE_integer('max_examples', None, 'maximum number of examples to evaluate')
23 | tf.app.flags.DEFINE_string('overwrite', '{}', 'json string that overwrites configuration.')
24 | 
25 | FLAGS = tf.app.flags.FLAGS
26 | 
27 | logger.info("Creating and loading reader from {}...".format(FLAGS.load_dir))
28 | 
29 | kwargs = json.loads(FLAGS.overwrite)
30 | 
31 | reader = reader_from_file(FLAGS.load_dir, **kwargs)
32 | dataset = loaders[FLAGS.loader](FLAGS.dataset)
33 | if FLAGS.max_examples:
34 |     dataset = dataset[:FLAGS.max_examples]
35 | 
36 | logger.info("Start!")
37 | result_dict = evaluate_reader(reader, dataset, FLAGS.batch_size)
38 | 
39 | 
40 | logger.info("############### RESULTS ##############")
41 | pretty_print_results(result_dict)
42 | 


--------------------------------------------------------------------------------
/bin/mmap-cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | from jack.io.embeddings import load_embeddings
 8 | from jack.io.embeddings.memory_map import save_as_memory_map_dir
 9 | 
10 | import logging
11 | logger = logging.getLogger(os.path.basename(sys.argv[0]))
12 | 
13 | 
14 | def main():
15 |     import argparse
16 |     parser = argparse.ArgumentParser(description='Convert embeddings to memory map directory')
17 |     parser.add_argument("input_file", help="The input embedding file.")
18 |     parser.add_argument("output_dir",
19 |                         help="The name of the directory to store the memory map in. Will be created if it doesn't "
20 |                              "exist.")
21 |     parser.add_argument("-f", "--input_format", help="Format of input embeddings.", default="glove",
22 |                         choices=["glove", "word2vec", "memory_map_dir"])
23 |     args = parser.parse_args()
24 |     input_name = args.input_file
25 |     output_dir = args.output_dir
26 |     embeddings = load_embeddings(input_name, typ=args.input_format)
27 |     logging.info("Loaded embeddings from {}".format(input_name))
28 |     save_as_memory_map_dir(output_dir, embeddings)
29 |     logging.info("Stored embeddings to {}".format(output_dir))
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/bin/squad_evaluate-v1.1.py:
--------------------------------------------------------------------------------
 1 | """ Official evaluation script for v1.1 of the SQuAD dataset. """
 2 | from __future__ import print_function
 3 | 
 4 | import argparse
 5 | import json
 6 | 
 7 | from jack.eval.extractive_qa_eval import *
 8 | 
 9 | 
10 | def evaluate(dataset, predictions):
11 |     f1 = exact_match = total = 0
12 |     for article in dataset:
13 |         for paragraph in article['paragraphs']:
14 |             for qa in paragraph['qas']:
15 |                 total += 1
16 |                 if qa['id'] not in predictions:
17 |                     message = 'Unanswered question ' + qa['id'] + \
18 |                               ' will receive score 0.'
19 |                     print(message, file=sys.stderr)
20 |                     continue
21 |                 ground_truths = list(map(lambda x: x['text'], qa['answers']))
22 |                 prediction = predictions[qa['id']]
23 |                 exact_match += metric_max_over_ground_truths(
24 |                     exact_match_score, prediction, ground_truths)
25 |                 f1 += metric_max_over_ground_truths(
26 |                     f1_score, prediction, ground_truths)
27 | 
28 |     exact_match = 100.0 * exact_match / total
29 |     f1 = 100.0 * f1 / total
30 | 
31 |     return {'exact_match': exact_match, 'f1': f1}
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     expected_version = '1.1'
36 |     parser = argparse.ArgumentParser(
37 |         description='Evaluation for SQuAD ' + expected_version)
38 |     parser.add_argument('dataset_file', help='Dataset file')
39 |     parser.add_argument('prediction_file', help='Prediction File')
40 |     args = parser.parse_args()
41 |     with open(args.dataset_file) as dataset_file:
42 |         dataset_json = json.load(dataset_file)
43 |         if (dataset_json['version'] != expected_version):
44 |             print('Evaluation expects v-' + expected_version +
45 |                   ', but got dataset with v-' + dataset_json['version'],
46 |                   file=sys.stderr)
47 |         dataset = dataset_json['data']
48 |     with open(args.prediction_file) as prediction_file:
49 |         predictions = json.load(prediction_file)
50 |     print(json.dumps(evaluate(dataset, predictions)))
51 | 


--------------------------------------------------------------------------------
/conf/lp/complex.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'complex'
 8 | debug: False
 9 | 
10 | reader: 'complex_reader'
11 | 
12 | train: 'data/WN18/train.jtr'
13 | dev: 'data/WN18/valid.jtr'
14 | test: 'data/WN18/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/complex_fb.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'complex'
 8 | debug: False
 9 | 
10 | reader: 'complex_reader'
11 | 
12 | train: 'data/FB15k-237/train.jtr'
13 | dev: 'data/FB15k-237/valid.jtr'
14 | test: 'data/FB15k-237/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/complex_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'complex'
 8 | debug: False
 9 | 
10 | reader: 'complex_reader'
11 | 
12 | train: 'data/WN18RR/train.jtr'
13 | dev: 'data/WN18RR/valid.jtr'
14 | test: 'data/WN18RR/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/distmult.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'distmult'
 8 | debug: False
 9 | 
10 | reader: 'distmult_reader'
11 | 
12 | train: 'data/WN18/train.jtr'
13 | dev: 'data/WN18/valid.jtr'
14 | test: 'data/WN18/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 8192
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/distmult_fb.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'distmult'
 8 | debug: False
 9 | 
10 | reader: 'distmult_reader'
11 | 
12 | train: 'data/FB15k-237/train.jtr'
13 | dev: 'data/FB15k-237/valid.jtr'
14 | test: 'data/FB15k-237/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 8192
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/distmult_test.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'distmult'
 8 | debug: False
 9 | 
10 | reader: 'distmult_reader'
11 | 
12 | train: 'data/WN18/snippet.jtr'
13 | dev: 'data/WN18/snippet.jtr'
14 | test: 'data/WN18/snippet.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 20
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/distmult_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'distmult'
 8 | debug: False
 9 | 
10 | reader: 'distmult_reader'
11 | 
12 | train: 'data/WN18RR/train.jtr'
13 | dev: 'data/WN18RR/valid.jtr'
14 | test: 'data/WN18RR/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 8192
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/transe.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'transe'
 8 | debug: False
 9 | 
10 | reader: 'transe_reader'
11 | 
12 | train: 'data/WN18/train.jtr'
13 | dev: 'data/WN18/valid.jtr'
14 | test: 'data/WN18/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/transe_fb.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'transe'
 8 | debug: False
 9 | 
10 | reader: 'transe_reader'
11 | 
12 | train: 'data/FB15k-237/train.jtr'
13 | dev: 'data/FB15k-237/valid.jtr'
14 | test: 'data/FB15k-237/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/lp/transe_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   DistMult - https://www.microsoft.com/en-us/research/publication/embedding-entities-and-relations-for-learning-and-inference-in-knowledge-bases/
 3 | 
 4 | parent_config: 'conf/jack.yaml'
 5 | loader: jack
 6 | 
 7 | name: 'transe'
 8 | debug: False
 9 | 
10 | reader: 'transe_reader'
11 | 
12 | train: 'data/WN18RR/train.jtr'
13 | dev: 'data/WN18RR/valid.jtr'
14 | test: 'data/WN18RR/test.jtr'
15 | 
16 | seed: 1337
17 | epochs: 100
18 | repr_dim: 200
19 | num_negative: 1
20 | with_char_embeddings: False
21 | prune: False
22 | lowercase: False
23 | 
24 | batch_size: 32
25 | optimizer: adam
26 | learning_rate: 0.005
27 | learning_rate_decay: 1
28 | 


--------------------------------------------------------------------------------
/conf/nli/cbilstm.yaml:
--------------------------------------------------------------------------------
1 | description: >
2 |   Conditional BiLSTM, processes premise with BiLSTM conditioned on processed hypothesis. Max pooling over the processed
3 |   premise states is used prior to classification.
4 | 
5 | parent_config: './conf/jack.yaml'
6 | 
7 | # Reader model to use, see jack/readers/implementations.py for options
8 | reader: 'cbilstm_nli_reader'
9 | 


--------------------------------------------------------------------------------
/conf/nli/dam.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   Decomposable Attention Model https://arxiv.org/abs/1606.01933
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | # Reader model to use, see jack/readers/implementations.py for options
 7 | reader: 'dam_snli_reader'
 8 | 
 9 | normalize_embeddings: True
10 | 


--------------------------------------------------------------------------------
/conf/nli/esim.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   ESIM model https://arxiv.org/abs/1609.06038
 3 | 
 4 | parent_config: './conf/nli/modular_nli.yaml'
 5 | 
 6 | name: 'esim_reader'
 7 | 
 8 | model:
 9 |   encoder_layer:
10 |   # BiLSTM
11 |   - input: 'hypothesis'
12 |     module: 'lstm'
13 |     with_projection: True  # not in original model but helps
14 |     activation: 'tanh'
15 |     name: 'encoder'
16 |     dropout: True
17 | 
18 |   # BiLSTM
19 |   - input: 'premise'
20 |     module: 'lstm'
21 |     with_projection: True  # not in original model but helps
22 |     activation: 'tanh'
23 |     name: 'encoder'
24 |     dropout: True
25 | 
26 |   # Attention
27 |   - input: 'premise'
28 |     dependent: 'hypothesis'
29 |     output: 'hypothesis_attn'
30 |     module: 'attention_matching'
31 |     attn_type: 'dot'
32 |     concat: False
33 |   - input: 'hypothesis'
34 |     dependent: 'premise'
35 |     output: 'premise_attn'
36 |     module: 'attention_matching'
37 |     attn_type: 'dot'
38 |     concat: False
39 | 
40 |   - input: ['premise', 'hypothesis_attn']
41 |     output: 'premise_mul'
42 |     module: 'mul'
43 |   - input: ['premise', 'hypothesis_attn']
44 |     output: 'premise_sub'
45 |     module: 'sub'
46 |   - input: ['premise', 'hypothesis_attn', 'premise_mul', 'premise_sub']
47 |     output: 'premise'
48 |     module: 'concat'
49 |   - input: 'premise'
50 |     module: 'dense'
51 |     name: 'projection'
52 |     activation: 'relu'
53 |     dropout: True
54 | 
55 |   - input: ['hypothesis', 'premise_attn']
56 |     output: 'hypothesis_mul'
57 |     module: 'mul'
58 |   - input: ['hypothesis', 'premise_attn']
59 |     output: 'hypothesis_sub'
60 |     module: 'sub'
61 |   - input: ['hypothesis', 'premise_attn', 'hypothesis_mul', 'hypothesis_sub']
62 |     output: 'hypothesis'
63 |     module: 'concat'
64 |   - input: 'hypothesis'
65 |     module: 'dense'
66 |     name: 'projection'
67 |     activation: 'relu'
68 |     dropout: True
69 | 
70 |   # inference composition
71 |   # BiLSTM
72 |   - input: 'hypothesis'
73 |     module: 'lstm'
74 |     name: 'composition'
75 | 
76 |   # BiLSTM
77 |   - input: 'premise'
78 |     module: 'lstm'
79 |     name: 'composition'
80 | 
81 |   prediction_layer:
82 |     module: 'max_avg_mlp'
83 |     dropout: True
84 | 


--------------------------------------------------------------------------------
/conf/nli/modular_nli.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   A configuration inheriting from the default jack.yaml
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | # Reader model to use, see jack/readers/implementations.py for options
 7 | reader: 'modular_nli_reader'
 8 | 
 9 | with_char_embeddings: False
10 | model:
11 |   encoder_layer: null  # list of encoder modules with input key ('question' or 'support' in beginning)
12 |   prediction_layer: null  # 'max_avg_mlp' (h_max, h_avg, p_max, p_avg), 'max_mlp' (h_max, p_max), 'max_interaction_mlp' (h_max, p_max, h_max - p_max, h+max * p_max)
13 | 
14 | # encoder modules can be selected as wanted and are defined by the following keys
15 | # * input - required, string indicating what input to encode (starts with possibilities 'question' or 'support')
16 | # * output - optional, set to input by default but can be overwritten to something else => after defining a new output key it can be used later as input somewhere else
17 | # * repr_dim - dimensionality of output
18 | # * module - BiRNNs: 'lstm', 'gru', 'sru'  ('with_projection: True' will also employ a projection layer on top of the BiRNNs which is recommended)
19 | #                CONVs: 'gldr' (gated linear dilated residual network), 'conv' (convolution)
20 | #                MISC: 'projection' (linear projection), 'self_attn', 'concat' (use 'input' to define list of keys to concatenate)
21 | # * residual - whether this encoder should be residually employed
22 | # * num_layers number of times this is consecutively
23 | # 'conv' requires another parameter, 'conv_width' (3 by default) and can have and 'activation'
24 | # 'gldr' requires another parameters, 'conv_width' (3 by default) and 'dilations'
25 | # (a list of dilations for each layer of the gldr network)
26 | # 'projection' has additional 'activation' attribute which can be 'relu', 'tanh','sigmoid', etc (everything in tf.nn)
27 | # 'self_attn' support attn types: 'dot', 'bilinear', 'diagonal_bilinear', 'mlp'
28 | # 'dot', 'bilinear', 'diagonal_bilinear' have additional scale attribute which scales attn scores by sqrt of repr_dim
29 | # of input states, it is recommended to use it for 'dot' and 'diagonal_bilinear'
30 | # 'mlp' has additional 'repr_dim' and 'activation' property for the dimensionality and activation of the hidden layer
31 | # you can set the number of parallel attention heads using num_attn_heads
32 | #
33 | # You can reuse encoders (i.e., their parameters) by giving them the same name and set 'reuse: True'
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/conf/nli/multinli/abstract_multinli.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: './conf/jack.yaml'
 2 | 
 3 | # loader for the dataset, ['jack', 'squad', 'snli'] are supported. For everything else convert to jtr format first.
 4 | loader: snli
 5 | 
 6 | # MultiNLI training file
 7 | train: 'data/MultiNLI/multinli_1.0/multinli_1.0_train.jsonl'
 8 | 
 9 | # MultiNLI dev file
10 | dev: 'data/MultiNLI/multinli_1.0/multinli_1.0_dev.jsonl'
11 | 
12 | # MultiNLI test file
13 | test: null
14 | 
15 | epochs: 20
16 | 
17 | embedding_format: 'memory_map_dir'
18 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
19 | 
20 | # Use fixed vocab of pretrained embeddings
21 | vocab_from_embeddings: True
22 | 
23 | # Use also character based embeddings in readers which support it
24 | with_char_embeddings: False
25 | 
26 | batch_size: 64
27 | 
28 | lowercase: False
29 | 
30 | learning_rate: 0.001
31 | min_learning_rate: 0.0001
32 | learning_rate_decay: 0.8
33 | 


--------------------------------------------------------------------------------
/conf/nli/multinli/cbilstm.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/nli/cbilstm.yaml', './conf/nli/snli/abstract_snli.yaml']
2 | 
3 | # fixed experiment seed
4 | seed: 1337
5 | dropout: 0.2
6 | repr_dim: 300
7 | 


--------------------------------------------------------------------------------
/conf/nli/multinli/dam.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: ['./conf/nli/dam.yaml', './conf/nli/multinli/abstract_multinli.yaml']
 2 | 
 3 | # fixed experiment seed
 4 | seed: 1337
 5 | dropout: 0.2
 6 | repr_dim: 300
 7 | epochs: 100
 8 | batch_size: 32
 9 | optimizer: adagrad
10 | learning_rate: 0.05
11 | learning_rate_decay: 1.0
12 | 


--------------------------------------------------------------------------------
/conf/nli/multinli/esim.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: ['./conf/nli/esim.yaml', './conf/nli/multinli/abstract_multinli.yaml']
 2 | 
 3 | # fixed experiment seed
 4 | seed: 1337
 5 | dropout: 0.2
 6 | repr_dim: 300
 7 | validation_interval: 2000
 8 | learning_rate: 0.0004
 9 | learning_rate_decay: 1.0
10 | batch_size: 32
11 | epochs: 10
12 | 


--------------------------------------------------------------------------------
/conf/nli/snli/abstract_snli.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: './conf/jack.yaml'
 2 | 
 3 | # loader for the dataset, ['jack', 'squad', 'snli'] are supported. For everything else convert to jtr format first.
 4 | loader: snli
 5 | 
 6 | # SNLI training file
 7 | train: 'data/SNLI/snli_1.0/snli_1.0_train.jsonl'
 8 | 
 9 | # SNLI dev file
10 | dev: 'data/SNLI/snli_1.0/snli_1.0_dev.jsonl'
11 | 
12 | # SNLI test file
13 | test: 'data/SNLI/snli_1.0/snli_1.0_test.jsonl'
14 | 
15 | epochs: 20
16 | 
17 | repr_dim: 300
18 | 
19 | embedding_format: 'memory_map_dir'
20 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
21 | 
22 | # Use fixed vocab of pretrained embeddings
23 | vocab_from_embeddings: True
24 | 
25 | # Use also character based embeddings in readers which support it
26 | with_char_embeddings: False
27 | 
28 | batch_size: 64
29 | lowercase: False
30 | learning_rate: 0.001
31 | min_learning_rate: 0.0001
32 | learning_rate_decay: 0.8
33 | 
34 | 


--------------------------------------------------------------------------------
/conf/nli/snli/cbilstm.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/nli/cbilstm.yaml', './conf/nli/snli/abstract_snli.yaml']
2 | 
3 | # fixed experiment seed
4 | seed: 1337
5 | dropout: 0.2
6 | repr_dim: 300
7 | 


--------------------------------------------------------------------------------
/conf/nli/snli/dam.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: ['./conf/nli/dam.yaml', './conf/nli/snli/abstract_snli.yaml']
 2 | 
 3 | # fixed experiment seed
 4 | seed: 1337
 5 | dropout: 0.2
 6 | repr_dim: 200
 7 | epochs: 100
 8 | batch_size: 32
 9 | optimizer: adagrad
10 | learning_rate: 0.05
11 | learning_rate_decay: 1.0
12 | 


--------------------------------------------------------------------------------
/conf/nli/snli/esim.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: ['./conf/nli/esim.yaml', './conf/nli/snli/abstract_snli.yaml']
 2 | 
 3 | # fixed experiment seed
 4 | seed: 1337
 5 | dropout: 0.2
 6 | repr_dim: 300
 7 | validation_interval: 2000
 8 | learning_rate: 0.0004
 9 | learning_rate_decay: 1.0
10 | batch_size: 32
11 | epochs: 10
12 | 


--------------------------------------------------------------------------------
/conf/qa/bidaf.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiDAF reader implementation as described in https://arxiv.org/abs/1611.01603. This is a slightly adapted version.
 3 | 
 4 | parent_config: './conf/qa/modular_qa.yaml'
 5 | 
 6 | # Reader model to use, see jack/readers/implementations.py for options
 7 | name: 'bidaf_reader'
 8 | 
 9 | # fixed experiment seed
10 | seed: 1337
11 | 
12 | # where to store the reader
13 | save_dir: './bidaf_reader'
14 | 
15 | with_char_embeddings: True
16 | 
17 | max_span_size: 16
18 | 
19 | model:
20 |   encoder_layer:
21 |   # Embedding computation
22 |   # Support
23 |   - input: ['support', 'char_support']
24 |     output: 'support'
25 |     module: 'concat'
26 |   - input: 'support'
27 |     name: 'embedding_highway'
28 |     module: 'highway'
29 |     num_layers: 2
30 | 
31 |   # Question
32 |   - input: ['question', 'char_question']
33 |     output: 'question'
34 |     module: 'concat'
35 |   - input: 'question'
36 |     name: 'embedding_highway'  # use same network as support
37 |     module: 'highway'
38 |     num_layers: 2
39 | 
40 |   # Contextual Encoding
41 |   - input: 'question'
42 |     module: 'lstm'
43 |     name: 'contextual_encoding'
44 |     with_projection: True # not in the original bidaf implementation, but help
45 |     dropout: True
46 |   - input: 'support'
47 |     module: 'lstm'
48 |     with_projection: True # not in the original bidaf implementation, but helps
49 |     name: 'contextual_encoding' # shared encoding at this point helps
50 |     dropout: True
51 | 
52 |   # Attention Encoding
53 |   - input: 'support'
54 |     dependent: 'question'
55 |     module: 'bidaf'
56 | 
57 |   - input: 'support'
58 |     module: 'lstm'
59 |     with_projection: True # not in the original bidaf implementation, but helps
60 |     num_layers: 2
61 |     dropout: True
62 | 
63 |   answer_layer:
64 |     module: 'bidaf'
65 |     encoder:  # only needed for bidaf answer layer
66 |       module: 'lstm'
67 | 
68 | 


--------------------------------------------------------------------------------
/conf/qa/fastqa.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   FastQA configuration to train model on SQuAD as described in https://arxiv.org/abs/1703.04816.
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | # Reader model to use, see jack/readers/implementations.py for options
 7 | reader: 'fastqa_reader'
 8 | 
 9 | # fixed experiment seed
10 | seed: 1337
11 | 
12 | # where to store the reader
13 | save_dir: './fastqa_reader'
14 | 
15 | # 'lstm', 'gru', 'sru' (simple recurrent unit)
16 | encoder: 'lstm'
17 | with_char_embeddings: True
18 | 
19 | # 'conditional' (original fastqa, end score conditioned on predicted start), 'conditional_bilinear', 'bilinear'
20 | # bilinear has shown best performance
21 | answer_layer: bilinear
22 | 


--------------------------------------------------------------------------------
/conf/qa/modular_qa.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   Modular QA Reader base configuration.
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | reader: 'modular_qa_reader'
 7 | 
 8 | with_char_embeddings: True
 9 | model:
10 |   encoder_layer: null  # list of encoder modules with input key ('question' or 'support' in beginning)
11 |   answer_layer: null  # 'bilinear', 'mlp', 'conditional' (i.e., fastqa), 'conditional_bilinear', 'bidaf', 'san' (https://arxiv.org/pdf/1712.03556.pdf)
12 | 
13 | 
14 | # encoder modules can be selected as wanted and are defined by the following keys
15 | # * input - required, string indicating what input to encode (starts with possibilities 'question' or 'support')
16 | # * output - optional, set to input by default but can be overwritten to something else => after defining a new output key it can be used later as input somewhere else
17 | # * repr_dim - dimensionality of output
18 | # * module - BiRNNs: 'lstm', 'gru', 'sru'  ('with_projection: True' will also employ a projection layer on top of the BiRNNs which is recommended)
19 | #                CONVs: 'gldr' (gated linear dilated residual network), 'conv' (convolution)
20 | #                MISC: 'projection' (linear projection), 'self_attn', 'concat' (use 'input' to define list of keys to concatenate)
21 | # * residual - whether this encoder should be residually employed
22 | # * num_layers number of times this is consecutively
23 | # 'conv' requires another parameter, 'conv_width' (3 by default) and can have and 'activation'
24 | # 'gldr' requires another parameters, 'conv_width' (3 by default) and 'dilations'
25 | # (a list of dilations for each layer of the gldr network)
26 | # 'projection' has additional 'activation' attribute which can be 'relu', 'tanh','sigmoid', etc (everything in tf.nn)
27 | # 'self_attn' support attn types: 'dot', 'bilinear', 'diagonal_bilinear', 'mlp'
28 | # 'dot', 'bilinear', 'diagonal_bilinear' have additional scale attribute which scales attn scores by sqrt of repr_dim
29 | # of input states, it is recommended to use it for 'dot' and 'diagonal_bilinear'
30 | # 'mlp' has additional 'repr_dim' and 'activation' property for the dimensionality and activation of the hidden layer
31 | # you can set the number of parallel attention heads using num_attn_heads
32 | #
33 | # You can reuse encoders (i.e., their parameters) by giving them the same name and set 'reuse: True'
34 | 
35 | 


--------------------------------------------------------------------------------
/conf/qa/squad/abstract_squad.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: './conf/jack.yaml'
 2 | 
 3 | # loader to use in experiment
 4 | loader: 'squad'
 5 | 
 6 | # How large the support should be. Can be used for cutting or filtering QA examples
 7 | max_support_length: -1
 8 | 
 9 | train: 'data/SQuAD/train-v1.1.json'
10 | dev: 'data/SQuAD/dev-v1.1.json'
11 | test: null
12 | 
13 | # [word2vec], [glove] or [memory_map_dir] format of embeddings to be loaded
14 | embedding_format: 'memory_map_dir'
15 | 
16 | # embeddings to be loaded
17 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
18 | 
19 | # Use fixed vocab of pretrained embeddings
20 | vocab_from_embeddings: True
21 | 
22 | epochs: 20
23 | 
24 | dropout: 0.2
25 | 
26 | batch_size: 64
27 | lowercase: False
28 | 
29 | max_span_size: 16
30 | 
31 | learning_rate: 0.001
32 | min_learning_rate: 0.0001
33 | learning_rate_decay: 0.5
34 | 
35 | # 'sum' (loss for summed prob. over all possible gold answer spans), 'max' (loss for best span)
36 | loss: 'sum'
37 | 


--------------------------------------------------------------------------------
/conf/qa/squad/bidaf.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/bidaf.yaml', './conf/qa/squad/abstract_squad.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/squad/fastqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/fastqa.yaml', './conf/qa/squad/abstract_squad.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 150
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/squad/jackqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/jackqa.yaml', './conf/qa/squad/abstract_squad.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/web/abstract_triviaqa.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: './conf/jack.yaml'
 2 | 
 3 | # loader to use in experiment
 4 | loader: 'jack'
 5 | 
 6 | # How large the support should be. Can be used for cutting or filtering QA examples
 7 | max_support_length: 600
 8 | 
 9 | train: 'data/triviaqa/web-train.json'
10 | dev: 'data/triviaqa/web-dev.json'
11 | test: null
12 | 
13 | # cache preprocessed examples on file in JACK_TEMP to avoid RAM problems
14 | file_cache: True
15 | 
16 | # [word2vec], [glove] or [memory_map_dir] format of embeddings to be loaded
17 | embedding_format: 'memory_map_dir'
18 | 
19 | # embeddings to be loaded
20 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
21 | 
22 | # Use fixed vocab of pretrained embeddings
23 | vocab_from_embeddings: True
24 | 
25 | epochs: 3
26 | 
27 | dropout: 0.2
28 | 
29 | batch_size: 16
30 | 
31 | lowercase: False
32 | 
33 | # default take all, if set to >0 will be used to select only the top supports based on tf idf with question
34 | max_num_support: 6
35 | # set to -1 if you want to use all during training (which will of course demand more training time)
36 | # paragraphs are subsampled from the top `max_num_support`, the best paragraph is sampled twice as likely as rest
37 | max_training_support: 2
38 | 
39 | max_span_size: 8
40 | 
41 | learning_rate: 0.001
42 | min_learning_rate: 0.0001
43 | learning_rate_decay: 0.5
44 | validation_interval: 2000
45 | num_dev_examples: 1000
46 | 
47 | # 'sum' (loss for summed prob. over all possible gold answer spans), 'max' (loss for best span)
48 | loss: 'sum'
49 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/web/bidaf.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/bidaf.yaml', './conf/qa/triviaqa/web/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/web/fastqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/fastqa.yaml', './conf/qa/triviaqa/web/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 150
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/web/jackqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/jackqa.yaml', './conf/qa/triviaqa/web/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/wiki/abstract_triviaqa.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: './conf/jack.yaml'
 2 | 
 3 | # loader to use in experiment
 4 | loader: 'jack'
 5 | 
 6 | # How large the support should be. Can be used for cutting or filtering QA examples
 7 | max_support_length: 600
 8 | 
 9 | train: 'data/triviaqa/wiki-train.json'
10 | dev: 'data/triviaqa/wiki-dev.json'
11 | test: null
12 | 
13 | # [word2vec], [glove] or [memory_map_dir] format of embeddings to be loaded
14 | embedding_format: 'memory_map_dir'
15 | 
16 | # embeddings to be loaded
17 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
18 | 
19 | # Use fixed vocab of pretrained embeddings
20 | vocab_from_embeddings: True
21 | 
22 | epochs: 20
23 | 
24 | dropout: 0.2
25 | 
26 | batch_size: 16
27 | 
28 | lowercase: False
29 | 
30 | # default take all, if set to >0 will be used to select only the top supports based on tf idf with question
31 | max_num_support: 6
32 | # set to -1 if you want to use all during training (which will of course demand more training time)
33 | # paragraphs are subsampled from the top `max_num_support`, the best paragraph is sampled twice as likely as rest
34 | max_training_support: 2
35 | 
36 | max_span_size: 8
37 | 
38 | learning_rate: 0.001
39 | min_learning_rate: 0.0001
40 | learning_rate_decay: 0.5
41 | validation_interval: 2000
42 | num_dev_examples: 1000
43 | 
44 | # 'sum' (loss for summed prob. over all possible gold answer spans), 'max' (loss for best span)
45 | loss: 'sum'
46 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/wiki/bidaf.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/bidaf.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/wiki/fastqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/fastqa.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 150
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/conf/qa/triviaqa/wiki/jackqa.yaml:
--------------------------------------------------------------------------------
1 | parent_config: ['./conf/qa/jackqa.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
2 | seed: 1337
3 | 
4 | repr_dim: 100
5 | dropout: 0.2
6 | 


--------------------------------------------------------------------------------
/data/CBT/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !download.sh
7 | !snippet.json
8 | !snippet.jtr.json
9 | 


--------------------------------------------------------------------------------
/data/CBT/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -P data/CBT http://www.thespermwhale.com/jaseweston/babi/CBTest.tgz
3 | tar -xzf data/CBT/CBTest.tgz -C data/CBT/
4 | 


--------------------------------------------------------------------------------
/data/FB15k-237/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !download.sh
7 | !snippet.jtr.json
8 | 


--------------------------------------------------------------------------------
/data/FB15k-237/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -O data/FB15k-237/fb15k-237.zip https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
3 | unzip -xzf data/FB15k-237/fb15k-237.zip -d data/FB15k-237/
4 | #rm data/FB15k/fb15k.tgz
5 | 


--------------------------------------------------------------------------------
/data/FB15k-237/snippet.jtr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "meta": "FB15K with entity neighbours as supporting facts.",
 3 |   "instances": [
 4 |     {
 5 |       "support": [
 6 |         {
 7 |           "text": "1 0 2"
 8 |         }
 9 |       ],
10 |       "questions": [
11 |         {
12 |           "answers": [
13 |             {
14 |               "text": "2"
15 |             }
16 |           ],
17 |           "candidates": [],
18 |           "question": "1 0"
19 |         }
20 |       ]
21 |     },
22 |     {
23 |       "support": [
24 |         {
25 |           "text": "0 1 3"
26 |         }
27 |       ],
28 |       "questions": [
29 |         {
30 |           "answers": [
31 |             {
32 |               "text": "3"
33 |             }
34 |           ],
35 |           "candidates": [],
36 |           "question": "0 1"
37 |         }
38 |       ]
39 |     }
40 |   ],
41 |   "globals": {
42 |     "candidates": [
43 |       {
44 |         "text": "0"
45 |       },
46 |       {
47 |         "text": "1"
48 |       },
49 |       {
50 |         "text": "2"
51 |       },
52 |       {
53 |         "text": "3"
54 |       }
55 |     ]
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/data/GloVe/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ -f "data/GloVe/glove.840B.300d.txt" ]
 4 | then
 5 |     echo "data/GloVe/glove.840B.300d.txt already exists! Doing nothing!"
 6 | else
 7 |     # echo "Downloading glove.840B.300d.txt!"
 8 |     # wget -c -P data/GloVe/ http://nlp.stanford.edu/data/glove.840B.300d.zip
 9 |     # unzip -d data/GloVe/ data/GloVe/glove.840B.300d.zip
10 |     echo "Downloading glove.840B.300d.memory_map_dir!"
11 |     wget -c -P data/GloVe/ http://data.neuralnoise.com/jack/embeddings/glove.840B.300d.memory_map_dir.tar.gz
12 |     tar xvfz data/GloVe/glove.840B.300d.memory_map_dir.tar.gz -C data/GloVe/
13 | fi
14 | 


--------------------------------------------------------------------------------
/data/GloVe/download_small.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ -f "data/GloVe/glove.6B.50d.txt" ]
 4 | then
 5 |     echo "glove.6B.50d.txt already exists! Doing nothing!"
 6 | else
 7 |     echo "Downloading glove.6B.50d.txt!"
 8 |     wget -c -P data/GloVe/ http://nlp.stanford.edu/data/glove.6B.zip
 9 |     unzip data/GloVe/glove.6B.zip -d data/GloVe
10 | fi
11 | 


--------------------------------------------------------------------------------
/data/GloVe/glove.the.50d.txt:
--------------------------------------------------------------------------------
1 | the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.044457 -0.49688 -0.17862 -0.00066023 -0.6566 0.27843 -0.14767 -0.55677 0.14658 -0.0095095 0.011658 0.10204 -0.12792 -0.8443 -0.12181 -0.016801 -0.33279 -0.1552 -0.23131 -0.19181 -1.8823 -0.76746 0.099051 -0.42125 -0.19526 4.0071 -0.18594 -0.52287 -0.31681 0.00059213 0.0074449 0.17778 -0.15897 0.012041 -0.054223 -0.29871 -0.15749 -0.34758 -0.045637 -0.44251 0.18785 0.0027849 -0.18411 -0.11514 -0.78581


--------------------------------------------------------------------------------
/data/LS/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | wget http://www.dianamccarthy.co.uk/files/task10data.tar.gz
 3 | wget http://nlp.cs.swarthmore.edu/semeval/tasks/task10/data/trial.tar.gz
 4 | wget http://nlp.cs.swarthmore.edu/semeval/tasks/task10/data/test.tar.gz
 5 | wget http://nlp.cs.swarthmore.edu/semeval/tasks/task10/data/key.tar.gz
 6 | tar -xzf task10data.tar.gz
 7 | tar -xzf trial.tar.gz
 8 | tar -xzf test.tar.gz
 9 | tar -xzf key.tar.gz
10 | curl -O -L https://raw.githubusercontent.com/gaurav324/English-Lexicalized-Text-Substituion/master/TaskTestData/test/lexsub_test_cleaned.xml
11 | curl -O -L https://raw.githubusercontent.com/gaurav324/English-Lexicalized-Text-Substituion/master/TaskTestData/trial/lexsub_trial_cleaned.xml
12 | mv lexsub_test_cleaned.xml ./test/
13 | mv lexsub_trial_cleaned.xml ./trial/


--------------------------------------------------------------------------------
/data/MCTest/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !download.sh
7 | !snippet.jtr.json
8 | 


--------------------------------------------------------------------------------
/data/MCTest/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd data/MCTest
3 | wget http://research-srv.microsoft.com/en-us/um/redmond/projects/mctest/data/MCTest.zip
4 | unzip MCTest.zip
5 | cd ../..
6 | 


--------------------------------------------------------------------------------
/data/MultiNLI/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -P data/MultiNLI/ https://www.nyu.edu/projects/bowman/multinli/multinli_1.0.zip
3 | unzip data/MultiNLI/multinli_1.0.zip -d data/MultiNLI
4 | rm data/MultiNLI/multinli_1.0.zip
5 | 
6 | # Create joint dev set
7 | cat data/MultiNLI/multinli_1.0/multinli_1.0_dev_matched.jsonl data/MultiNLI/multinli_1.0/multinli_1.0_dev_mismatched.jsonl > data/MultiNLI/multinli_1.0/multinli_1.0_dev.jsonl
8 | 


--------------------------------------------------------------------------------
/data/MultiNLI/snippet.jtr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [],
 3 |   "globals": {
 4 |     "candidates": [
 5 |       {
 6 |         "text": "entailment"
 7 |       },
 8 |       {
 9 |         "text": "neutral"
10 |       },
11 |       {
12 |         "text": "contradiction"
13 |       }
14 |     ]
15 |   },
16 |   "meta": "MultiSNLI"
17 | }


--------------------------------------------------------------------------------
/data/NYT/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -O data/NYT/naacl2013.txt.zip https://www.dropbox.com/s/5iulumlihydo1k7/naacl2013.txt.zip?dl=1
3 | unzip data/NYT/naacl2013.txt.zip -d data/NYT/
4 | rm data/NYT/naacl2013.txt.zip


--------------------------------------------------------------------------------
/data/NewsQA/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(dirname "$0")"
 4 | 
 5 | echo "Cloning NewsQA repo to newsqa..."
 6 | git clone https://github.com/Maluuba/newsqa.git
 7 | 
 8 | cd newsqa
 9 | pip2 install --requirement requirements.txt
10 | 
11 | # download cnn
12 | echo "Download the CNN stories manually to the maluuba/newsqa folder from (don't extract): http://cs.nyu.edu/~kcho/DMQA/"
13 | echo "Press [Enter] when done so.."
14 | read a
15 | 
16 | echo "Download the questions and answers to the maluuba/newsqa folder manually from: https://datasets.maluuba.com/NewsQA/dl..."
17 | echo "Press [Enter] when done so.."
18 | read a
19 | 
20 | cd maluuba/newsqa
21 | tar xzf newsqa-data-v1.tar.gz
22 | 
23 | cd ../..
24 | 
25 | # fix a bug
26 | sed -ie 's/\\r/\\n/g' maluuba/newsqa/data_processing.py
27 | rm maluuba/newsqa/data_processing.pye
28 | 
29 | python2 maluuba/newsqa/example.py
30 | python2 maluuba/newsqa/split_dataset.py
31 | 
32 | mv newsqa/maluuba/newsqa/* .
33 | rm -r newsqa
34 | 
35 | echo "Find resulting dataset in data/NewsQA/newsqa/maluuba/newsqa/[train,dev,test]_story_ids.csv and newsqa/maluuba/newsqa/split_data"
36 | echo "These can be used as input to the conversion scripts in jack/io/NewsQA2*.py"
37 | 


--------------------------------------------------------------------------------
/data/PTB/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
3 | tar -xzf simple-examples.tgz


--------------------------------------------------------------------------------
/data/QAngaroo/instructions.md:
--------------------------------------------------------------------------------
 1 | You have to download the dataset manually from
 2 | 
 3 | http://qangaroo.cs.ucl.ac.uk
 4 | 
 5 | by clicking the _Download Dataset_ button.
 6 | 
 7 | After unzipping the contents, you can convert the dataset into a SQuAD-like format.
 8 | 
 9 | E.g.
10 | 
11 | `python3 qangaroo2squad.py qangaroo_v1.1/wikihop/dev.json wikihop_dev.squad_format.json`
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/data/QAngaroo/qangaroo2squad.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | 
 5 | def load_json(path):
 6 |     with open(path, 'r') as f:
 7 |         return json.load(f)
 8 | 
 9 | 
10 | def convert2SQUAD_format(hoppy_data, write_file_name):
11 |     """
12 |     Converts QAngaroo data (hoppy_data) into SQuAD format.
13 |     The SQuAD-formatted data is written to disk at write_file_name.
14 |     Note: All given support documents per example are concatenated
15 |         into one super-document. All text is lowercased.
16 |     """
17 |     # adapt the JSON tree structure used in SQUAD.
18 |     squad_formatted_content = dict()
19 |     squad_formatted_content['version'] = 'hoppy_squad_format'
20 |     data = []
21 | 
22 |     # loop over dataset
23 |     for datum in hoppy_data:
24 | 
25 |         # Format is deeply nested JSON -- prepare data structures
26 |         data_ELEMENT = dict()
27 |         data_ELEMENT['title'] = 'dummyTitle'
28 |         paragraphs = []
29 |         paragraphs_ELEMENT = dict()
30 |         qas = []
31 |         qas_ELEMENT = dict()
32 |         qas_ELEMENT_ANSWERS = []
33 |         ANSWERS_ELEMENT = dict()
34 | 
35 | 
36 |         ### content start
37 |         qas_ELEMENT['id'] = datum['id']
38 |         qas_ELEMENT['question'] = datum['query']
39 | 
40 |         # concatenate all support documents into one superdocument
41 |         superdocument = " <new_doc> ".join(datum['supports']).lower()
42 | 
43 |         # where is the answer in the superdocument?
44 |         answer_position = superdocument.find(datum['answer'].lower())
45 |         if answer_position == -1:
46 |             continue
47 | 
48 |         ANSWERS_ELEMENT['answer_start'] = answer_position
49 |         ANSWERS_ELEMENT['text'] = datum['answer'].lower()
50 |         ### content end
51 | 
52 | 
53 |         # recursively fill in content into the nested SQuAD data format
54 |         paragraphs_ELEMENT['context'] = superdocument
55 |         qas_ELEMENT_ANSWERS.append(ANSWERS_ELEMENT)
56 | 
57 |         qas_ELEMENT['answers'] = qas_ELEMENT_ANSWERS
58 |         qas.append(qas_ELEMENT)
59 | 
60 |         paragraphs_ELEMENT['qas'] = qas
61 |         paragraphs.append(paragraphs_ELEMENT)
62 | 
63 |         data_ELEMENT['paragraphs'] = paragraphs
64 |         data.append(data_ELEMENT)
65 | 
66 |     squad_formatted_content['data'] = data
67 | 
68 |     with open(write_file_name, 'w') as f:
69 |         json.dump(squad_formatted_content, f, indent=1)
70 | 
71 |     print('Done writing SQuAD-formatted data to: ',write_file_name)
72 | 
73 | 
74 | 
75 | 
76 | def main():
77 |     input_path = sys.argv[1]
78 |     output_path = sys.argv[2]
79 |     convert2SQUAD_format(load_json(input_path), output_path)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     main()
84 | 


--------------------------------------------------------------------------------
/data/SNLI/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !download.sh
7 | !snippet.json
8 | !snippet.jtr.json
9 | 


--------------------------------------------------------------------------------
/data/SNLI/README.md:
--------------------------------------------------------------------------------
1 | - Mapping to jtr format
2 | `$ python3 jtr/io/SNLI2jtr_v1.py`
3 | - Validating format
4 | `$ python3 jtr/io/validate.py ./jtr/data/snippet/SNLI_v1/snippet_jtrformat.json jtr/io/dataset_schema.json`
5 | - Debugging
6 | `$ python3 jtr/model/reader.py --train jtr/data/SNLI/snli_1.0/snli_1.0_debug_jtr.jsonl --test jtr/data/SNLI/snli_1.0/snli_1.0_debug_jtr.jsonl`


--------------------------------------------------------------------------------
/data/SNLI/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -P data/SNLI/ http://nlp.stanford.edu/projects/snli/snli_1.0.zip
3 | unzip data/SNLI/snli_1.0.zip -d data/SNLI
4 | rm data/SNLI/snli_1.0.zip
5 | 


--------------------------------------------------------------------------------
/data/SNLI/snippet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "annotator_labels": [
 3 |     "neutral"
 4 |   ],
 5 |   "captionID": "3416050480.jpg#4",
 6 |   "gold_label": "neutral",
 7 |   "pairID": "3416050480.jpg#4r1n",
 8 |   "sentence1": "A person on a horse jumps over a broken down airplane.",
 9 |   "sentence1_binary_parse": "( ( ( A person ) ( on ( a horse ) ) ) ( ( jumps ( over ( a ( broken ( down airplane ) ) ) ) ) . ) )",
10 |   "sentence1_parse": "(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN on) (NP (DT a) (NN horse)))) (VP (VBZ jumps) (PP (IN over) (NP (DT a) (JJ broken) (JJ down) (NN airplane)))) (. .)))",
11 |   "sentence2": "A person is training his horse for a competition.",
12 |   "sentence2_binary_parse": "( ( A person ) ( ( is ( ( training ( his horse ) ) ( for ( a competition ) ) ) ) . ) )",
13 |   "sentence2_parse": "(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) (VP (VBG training) (NP (PRP$ his) (NN horse)) (PP (IN for) (NP (DT a) (NN competition))))) (. .)))"
14 | }
15 | 


--------------------------------------------------------------------------------
/data/SQuAD/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !download.sh
7 | !snippet.json
8 | !snippet.jtr.json
9 | 


--------------------------------------------------------------------------------
/data/SQuAD/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | if [ -f "data/SQuAD/dev-v1.1.json" ]
 5 | then
 6 |     echo "Already downloaded."
 7 | else
 8 |     wget -P data/SQuAD/ https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json
 9 |     wget -P data/SQuAD/ https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json
10 | fi
11 | 


--------------------------------------------------------------------------------
/data/TBD/SemEval2017Task10/S0022311514005480.ann:
--------------------------------------------------------------------------------
 1 | T1	Task 1232 1286	biaxial compressive and triaxial tensile stress states
 2 | T2	Task 4 23	second stress state
 3 | T3	Task 29 53	tri-axial tensile stress
 4 | R1	Hyponym-of Arg1:T2 Arg2:T3	
 5 | T4	Process 259 282	Finite element analysis
 6 | T5	Task 875 935	simulated by applying tensile stress in direction 1, 2 and 3
 7 | T6	Material 97 116	advancing crack tip
 8 | T7	Material 171 182	oxide layer
 9 | T8	Material 227 248	metal–oxide interface
10 | T9	Material 321 327	cracks
11 | T10	Material 348 374	localised tensile stresses
12 | T11	Material 762 804	manufactured partially stabilised zirconia
13 | T12	Material 1185 1217	manufactured stabilized zirconia
14 | T13	Process 999 1023	applied tensile stresses
15 | T14	Material 394 415	metal–oxide interface
16 | T15	Process 521 566	tetragonal to monoclinic phase transformation
17 | T16	Material 664 669	oxide
18 | T17	Process 676 686	de-bonding
19 | T18	Process 706 729	triaxial tensile stress
20 | T19	Process 849 865	tetragonal phase
21 | T20	Process 1137 1145	fracture
22 | T21	Process 888 911	applying tensile stress
23 | T22	Process 949 963	maximum stress
24 | 


--------------------------------------------------------------------------------
/data/TBD/SemEval2017Task10/S0022311514005480.txt:
--------------------------------------------------------------------------------
1 | The second stress state is a tri-axial tensile stress designed to represent the zone ahead of an advancing crack tip. Micro-scale lateral cracks have been observed in the oxide layer, and appear to form very close to or at the metal–oxide interface (Fig. 1). Finite element analysis by Parise et al. indicated that these cracks form as a result of localised tensile stresses above peaks in the metal–oxide interface roughness [31]. These cracks are considered separate to any nano-scale cracks that might result from the tetragonal to monoclinic phase transformation. An assumption is made here that whether the micro-scale lateral cracks form via fracture of the oxide or by de-bonding at the interface a triaxial tensile stress state will still be present. In manufactured partially stabilised zirconia cracks would be expected to destabilise the tetragonal phase. This is simulated by applying tensile stress in direction 1, 2 and 3. As this the maximum stress at the crack tip is not known, the applied tensile stresses cover a range from 0.1GPa up to a maximum stress value of 2.2GPa as it is approximately equal to three times the fracture strength of bulk fracture strength for manufactured stabilized zirconia [34]. For the biaxial compressive and triaxial tensile stress states it is the trends in behaviour rather than the absolute values that are considered of greatest importance for this work.
2 | 


--------------------------------------------------------------------------------
/data/WN18/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -O data/WN18/wn18.zip https://github.com/ttrouill/complex/raw/master/datasets/wn18.zip
3 | unzip data/WN18/wn18.zip -d data/WN18/
4 | 


--------------------------------------------------------------------------------
/data/WN18/snippet.txt:
--------------------------------------------------------------------------------
 1 | 03964744	_hyponym	04371774
 2 | 00260881	_hypernym	00260622
 3 | 02199712	_member_holonym	02188065
 4 | 01332730	_derivationally_related_form	03122748
 5 | 06066555	_derivationally_related_form	00645415
 6 | 09322930	_instance_hypernym	09360122
 7 | 11575425	_hyponym	12255934
 8 | 07193596	_derivationally_related_form	00784342
 9 | 05726596	_hyponym	06162979
10 | 01768969	_derivationally_related_form	02636811
11 | 02557199	_hyponym	02557790
12 | 01455754	_hypernym	01974062
13 | 02716866	_hyponym	03032576
14 | 03214670	_hyponym	04423288
15 | 07554856	_hypernym	07553301
16 | 11669921	_hyponym	11992806
17 | 01291069	_hyponym	01530678
18 | 07965085	_hyponym	08278169
19 | 00057306	_hypernym	00056912
20 | 10341660	_derivationally_related_form	02661252
21 | 13219258	_hypernym	13167078
22 | 01698271	_also_see	01754576
23 | 08189659	_hyponym	08077292
24 | 10499355	_hypernym	10083823
25 | 02222318	_hyponym	02223238
26 | 02103406	_hypernym	02084071
27 | 07190941	_hypernym	07185325
28 | 12090318	_member_meronym	12093769
29 | 08620061	_hyponym	08620763
30 | 03562126	_hyponym	03318438
31 | 12213635	_member_meronym	12214245
32 | 02651424	_derivationally_related_form	02672371
33 | 


--------------------------------------------------------------------------------
/data/WN18RR/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget -O data/WN18RR/wn18.tgz https://github.com/TimDettmers/ConvE/raw/master/WN18RR.tar.gz
3 | tar -xzf data/WN18RR/wn18.tgz -C data/WN18RR/
4 | 


--------------------------------------------------------------------------------
/data/emoji2vec/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | wget https://github.com/uclmr/emoji2vec/raw/master/pre-trained/emoji2vec.txt
3 | 


--------------------------------------------------------------------------------
/data/emoji2vec/visualize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.contrib.tensorboard.plugins import projector
 5 | import os
 6 | 
 7 | import numpy as np
 8 | 
 9 | dir = "./jack/data/emoji2vec/"
10 | emojis = []
11 | vecs = []
12 | with open(dir + "metadata.tsv", "w") as f_out:
13 |     # f_out.write("emoji\n")
14 |     with open(dir + "emoji2vec.txt", "r") as f_in:
15 |         for ix, line in enumerate(f_in.readlines()[1:]):
16 |             splits = line.strip().split(" ")
17 |             emoji = splits[0]
18 |             vec = [float(x) for x in splits[1:]]
19 |             assert len(vec) == 300
20 |             # print(emoji, vec)
21 |             emojis.append(emoji)
22 |             vecs.append(vec)
23 |             f_out.write(emoji+"\n")
24 |         f_in.close()
25 |     f_out.close()
26 | 
27 | emoji2vec = tf.constant(np.array(vecs))
28 | tf_emoji2vec = tf.get_variable("emoji2vec", [len(vecs), 300], tf.float64)
29 | 
30 | # save embeddings to file
31 | with tf.Session() as sess:
32 |     sess.run(tf.global_variables_initializer())
33 |     sess.run(tf_emoji2vec.assign(emoji2vec))
34 | 
35 |     saver = tf.train.Saver()
36 |     saver.save(sess, os.path.join(dir, "model.ckpt"), 0)
37 | 
38 |     # Use the same LOG_DIR where you stored your checkpoint.
39 |     summary_writer = tf.summary.FileWriter(dir)
40 | 
41 |     # Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto
42 |     config = projector.ProjectorConfig()
43 | 
44 |     # You can add multiple embeddings. Here we add only one.
45 |     embedding = config.embeddings.add()
46 |     embedding.tensor_name = tf_emoji2vec.name
47 |     # Link this tensor to its metadata file (e.g. labels).
48 |     embedding.metadata_path = os.path.join(dir, 'metadata.tsv')
49 | 
50 |     # Saves a configuration file that TensorBoard will read during startup.
51 |     projector.visualize_embeddings(summary_writer, config)
52 | 


--------------------------------------------------------------------------------
/data/rc-data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | 
4 | # ...except:
5 | !.gitignore
6 | !post_download.sh
7 | !README.md
8 | !cnn_snippet.jtr.json
9 | 


--------------------------------------------------------------------------------
/data/rc-data/README.md:
--------------------------------------------------------------------------------
1 | Download data here:
2 |     http://cs.nyu.edu/~kcho/DMQA/
3 | 


--------------------------------------------------------------------------------
/data/rc-data/post_download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # download the data per instructions in README.md and then execute this script
 3 | cd data/rc-data
 4 | 
 5 | tar -xvzf cnn.tgz
 6 | tar -xvzf cnn_stories.tgz
 7 | tar -xvzf dailymail.tgz
 8 | tar -xvzf dailymail_stories.tgz
 9 | wget https://github.com/deepmind/rc-data/raw/master/generate_questions.py
10 | 
11 | # obtained from: https://github.com/deepmind/rc-data/blob/master/README.md
12 | virtualenv venv
13 | source venv/bin/activate
14 | wget https://github.com/deepmind/rc-data/raw/master/requirements.txt
15 | pip install -r requirements.txt
16 | python generate_questions.py --corpus=cnn --mode=generate
17 | python generate_questions.py --corpus=dailymail --mode=generate
18 | deactivate
19 | cd ../..


--------------------------------------------------------------------------------
/data/sentihood/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget http://annotate-neighborhood.com/download/sentihood-train.json 
3 | wget http://annotate-neighborhood.com/download/sentihood-dev.json 
4 | wget http://annotate-neighborhood.com/download/sentihood-test.json 
5 | 


--------------------------------------------------------------------------------
/data/simpleQuestions/README:
--------------------------------------------------------------------------------
1 | Note that the full context for all questions are the triples in the subfolder freebase-subsets.
2 | The exact triples selected as full context for each question are not distributed, only the Freebase triple the question was based on.
3 | From this triple, the full context can be looked up.
4 | Therefore the conversion script only converts the subject and rel of the triple as context.
5 | See the paper [http://arxiv.org/pdf/1506.02075v1.pdf] for how the full context for each question is obtained.


--------------------------------------------------------------------------------
/data/simpleQuestions/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | wget -P data/simpleQuestions/ https://www.dropbox.com/s/tohrsllcfy7rch4/SimpleQuestions_v2.tgz
4 | tar -xzvf data/simpleQuestions/SimpleQuestions_v2.tgz -C data/simpleQuestions/


--------------------------------------------------------------------------------
/data/simpleQuestions/snippet.jtr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "meta": "simpleQuestions.json",
 3 |   "instances": [
 4 |     {
 5 |       "questions": [
 6 |         {
 7 |           "answers": [
 8 |             "www.freebase.com/m/01cj3p"
 9 |           ],
10 |           "question": "what is the book e about"
11 |         }
12 |       ],
13 |       "support": [
14 |         {
15 |           "text": "www.freebase.com/m/04whkz5 www.freebase.com/book/written_work/subjects"
16 |         }
17 |       ]
18 |     },
19 |     {
20 |       "questions": [
21 |         {
22 |           "answers": [
23 |             "www.freebase.com/m/0sjc7c1"
24 |           ],
25 |           "question": "to what release does the release track cardiac arrest come from"
26 |         }
27 |       ],
28 |       "support": [
29 |         {
30 |           "text": "www.freebase.com/m/0tp2p24 www.freebase.com/music/release_track/release"
31 |         }
32 |       ]
33 |     },
34 |     {
35 |       "questions": [
36 |         {
37 |           "answers": [
38 |             "www.freebase.com/m/07ssc"
39 |           ],
40 |           "question": "what country was the film the debt from"
41 |         }
42 |       ],
43 |       "support": [
44 |         {
45 |           "text": "www.freebase.com/m/04j0t75 www.freebase.com/film/film/country"
46 |         }
47 |       ]
48 |     },
49 |     {
50 |       "questions": [
51 |         {
52 |           "answers": [
53 |             "www.freebase.com/m/0p600l"
54 |           ],
55 |           "question": "what songs have nobuo uematsu produced?"
56 |         }
57 |       ],
58 |       "support": [
59 |         {
60 |           "text": "www.freebase.com/m/0ftqr www.freebase.com/music/producer/tracks_produced"
61 |         }
62 |       ]
63 |     },
64 |     {
65 |       "questions": [
66 |         {
67 |           "answers": [
68 |             "www.freebase.com/m/0677ng"
69 |           ],
70 |           "question": "Who produced eve-olution?"
71 |         }
72 |       ],
73 |       "support": [
74 |         {
75 |           "text": "www.freebase.com/m/036p007 www.freebase.com/music/release/producers"
76 |         }
77 |       ]
78 |     }
79 |   ]
80 | }


--------------------------------------------------------------------------------
/data/simpleQuestions/snippet.txt:
--------------------------------------------------------------------------------
1 | www.freebase.com/m/04whkz5	www.freebase.com/book/written_work/subjects	www.freebase.com/m/01cj3p	what is the book e about
2 | www.freebase.com/m/0tp2p24	www.freebase.com/music/release_track/release	www.freebase.com/m/0sjc7c1	to what release does the release track cardiac arrest come from
3 | www.freebase.com/m/04j0t75	www.freebase.com/film/film/country	www.freebase.com/m/07ssc	what country was the film the debt from
4 | www.freebase.com/m/0ftqr	www.freebase.com/music/producer/tracks_produced	www.freebase.com/m/0p600l	what songs have nobuo uematsu produced?
5 | www.freebase.com/m/036p007	www.freebase.com/music/release/producers	www.freebase.com/m/0677ng	Who produced eve-olution?
6 | 


--------------------------------------------------------------------------------
/data/triviaqa/README:
--------------------------------------------------------------------------------
 1 | The download script does the following:
 2 | * downloads and unpacks triviaqa
 3 | * clones github.com/allenai/document-qa temporarily (third party)
 4 | * preprocesses triviaqa with third party
 5 | * converts datasets from third-party to Jack format while sub-sampling supporting paragraphs using tf-idf following [1]
 6 | 
 7 | You can set parallelism with with the first argument to download script and the download dir of triviaqa
 8 | with the 2nd argument. Try to use an SSD to speed up things. The whole setup can take a few hours.
 9 | 
10 | 
11 | [1] https://arxiv.org/pdf/1710.10723.pdf
12 | 


--------------------------------------------------------------------------------
/data/triviaqa/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import join
 3 | 
 4 | """
 5 | Global config options
 6 | """
 7 | 
 8 | TRIVIA_QA = os.environ.get('TRIVIAQA_HOME', None)
 9 | TRIVIA_QA_UNFILTERED = os.environ.get('TRIVIAQA_UNFILTERED_HOME', None)
10 | 
11 | CORPUS_DIR = join(os.environ.get('TRIVIAQA_HOME', ''), "preprocessed")
12 | 
13 | VEC_DIR = ''
14 | 


--------------------------------------------------------------------------------
/data/triviaqa/download.sh:
--------------------------------------------------------------------------------
 1 | echo "This script might take a while (a couple of hours)."
 2 | echo "You can set parallelism up with the first argument and the working dir with the 2nd argument. Try to use an SSD to speed up things."
 3 | 
 4 | #set parallelism
 5 | if [ $# -lt 1 ]; then
 6 |     N=`nproc --all`
 7 | else
 8 |     N=$1
 9 | fi
10 | 
11 | if [ $# -lt 2 ]; then
12 |     DOWNLOADPATH=data/triviaqa
13 | else
14 |     DOWNLOADPATH=$2
15 | fi
16 | 
17 | export TRIVIAQA_HOME=$DOWNLOADPATH/triviaqa-rc
18 | if [ ! -d $TRIVIAQA_HOME ]; then
19 |     echo "Downloading and extracting dataset..."
20 |     wget -P $DOWNLOADPATH http://nlp.cs.washington.edu/triviaqa/data/triviaqa-rc.tar.gz
21 |     tar xf $DOWNLOADPATH/triviaqa-rc.tar.gz -C $DOWNLOADPATH
22 | fi
23 | 
24 | git clone https://github.com/dirkweissenborn/document-qa.git data/triviaqa/document-qa
25 | 
26 | cp data/triviaqa/config.py data/triviaqa/document-qa/docqa/
27 | export PYTHONPATH=data/triviaqa/document-qa:$PYTHONPATH
28 | 
29 | echo "Third-party preprocessing..."
30 | python3 data/triviaqa/document-qa/docqa/triviaqa/evidence_corpus.py -n $N
31 | python3 data/triviaqa/document-qa/docqa/triviaqa/build_span_corpus.py wiki --n_processes $N
32 | python3 data/triviaqa/document-qa/docqa/triviaqa/build_span_corpus.py web --n_processes $N
33 | 
34 | echo "Converting to Jack format..."
35 | # We only extract the top (tf/idf) 6 paragraphs (merged/split to maximum of 600 tokens each) to save disk space.
36 | # In case you want all paragraphs, change 6 to -1.
37 | 
38 | # for dev and test take all paragraphs
39 | python3 data/triviaqa/convert2jack.py web-dev $N -1 600
40 | python3 data/triviaqa/convert2jack.py wiki-dev $N -1 600
41 | python3 data/triviaqa/convert2jack.py web-test $N -1 600
42 | python3 data/triviaqa/convert2jack.py wiki-test $N -1 600
43 | 
44 | # for training we only need the top k paragraphs
45 | python3 data/triviaqa/convert2jack.py web-train $N 4 600
46 | python3 data/triviaqa/convert2jack.py wiki-train $N 6 600
47 | 
48 | echo "Removing data/triviaqa/document-qa repository, since it is not needed anymore."
49 | rm -rf data/triviaqa/document-qa
50 | 
51 | echo "Find prepared datasets in data/triviaqa/. If you want, you can safely remove $DOWNLOADPATH/triviaqa-rc now."
52 | 


--------------------------------------------------------------------------------
/data/word2vec/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd "$(dirname "$0")"
4 | 
5 | wget https://www.dropbox.com/s/bnm0trligffakd9/GoogleNews-vectors-negative300.bin.gz
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/Formats_for_Embeddings.md:
--------------------------------------------------------------------------------
 1 | # Formats for Embeddings
 2 | 
 3 | Jack supports loading of various embedding formats, including glove and word2vec. These can be specified in the 
 4 | configuration files or command line parameters of your models via the `embedding_format` parameter. In particular,
 5 | we support
 6 | 
 7 | * `glove`: the original glove format, either as txt file or zipped
 8 | * `word2vec`: word2vec format
 9 | * `fasttext`: fasttext format
10 | * `memory_map_dir`: a directory that contains the embeddings as a numpy memory map, and meta information necessary to
11 | instantiate it. 
12 | 
13 | ## Memory Map Directories
14 | For large embeddings (large dimensions, many words), loading embeddings into memory can both take up a lot of 
15 | CPU memory, and be very slow. Numpy provides a file format for matrices that loads vectors on the fly. In Jack
16 | this functionality is used via the `memory_map_dir` format. 
17 | 
18 | You can convert your embeddings into this format via the `memory_map.py` script. For example, to convert Glove embeddings,
19 | assuming you are in the top level jack directory, write:
20 | 
21 | ```bash
22 | $ export PYTHONPATH=$PYTHONPATH:.
23 | $ python3 bin/mmap-cli.py --help
24 | $ python3 bin/mmap-cli.py data/GloVe/glove.840B.300d.txt data/GloVe/glove.840B.300d.memory_map_dir
25 | ```
26 | 
27 | This creates a directory `data/GloVe/glove.840B.300d.memory_map_dir` that stores the memory map and some necessary
28 | meta information.
29 | 
30 | Using this format can substantially reduce start-up times and memory footprint.
31 | 


--------------------------------------------------------------------------------
/jack/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from jack.train_reader import train
4 | 
5 | __all__ = [
6 |     'train'
7 | ]
8 | 


--------------------------------------------------------------------------------
/jack/core/__init__.py:
--------------------------------------------------------------------------------
1 | from jack.core.input_module import *
2 | from jack.core.model_module import *
3 | from jack.core.output_module import *
4 | from jack.core.reader import *
5 | from jack.core.tensorport import *
6 | from jack.core.shared_resources import *
7 | 


--------------------------------------------------------------------------------
/jack/core/output_module.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from abc import abstractmethod
 4 | from typing import Sequence, Mapping
 5 | 
 6 | import numpy as np
 7 | 
 8 | from jack.core.data_structures import QASetting, Answer
 9 | from jack.core.tensorport import TensorPort
10 | 
11 | 
12 | class OutputModule:
13 |     """
14 |     An output module takes the output (numpy) tensors of the model module and turns them into
15 |     jack data structures.
16 |     """
17 | 
18 |     @property
19 |     @abstractmethod
20 |     def input_ports(self) -> Sequence[TensorPort]:
21 |         """Returns: correspond to a subset of output ports of model module."""
22 |         raise NotImplementedError
23 | 
24 |     @abstractmethod
25 |     def __call__(self, questions: Sequence[QASetting], tensors: Mapping[TensorPort, np.array]) \
26 |             -> Sequence[Answer]:
27 |         """
28 |         Process the tensors corresponding to the defined `input_ports` for a batch to produce a list of answers.
29 |         The module has access to the original inputs.
30 |         Args:
31 |             questions:
32 |             prediction:
33 | 
34 |         Returns:
35 | 
36 |         """
37 |         raise NotImplementedError
38 | 
39 |     @abstractmethod
40 |     def setup(self):
41 |         pass
42 | 
43 |     def store(self, path):
44 |         """Store the state of this module. Default is that there is no state, so nothing to store."""
45 |         pass
46 | 
47 |     def load(self, path):
48 |         """Load the state of this module. Default is that there is no state, so nothing to load."""
49 |         pass
50 | 


--------------------------------------------------------------------------------
/jack/eval/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from jack.eval import extractive_qa, link_prediction, classification
4 | from jack.eval.base import evaluators, evaluate_reader, pretty_print_results
5 | 


--------------------------------------------------------------------------------
/jack/eval/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.eval import extractive_qa, link_prediction, classification
 4 | 
 5 | evaluators = {
 6 |     'extractive_qa': extractive_qa.evaluate,
 7 |     'link_prediction': link_prediction.evaluate,
 8 |     'classification': None
 9 | }
10 | 
11 | 
12 | def evaluate_reader(reader, dataset, batch_size):
13 |     from jack.readers.implementations import extractive_qa_readers, classification_readers, link_prediction_readers
14 |     reader_name = reader.shared_resources.config.get('reader')
15 |     if reader_name in extractive_qa_readers:
16 |         return extractive_qa.evaluate(reader, dataset, batch_size)
17 |     elif reader_name in link_prediction_readers:
18 |         return link_prediction.evaluate(reader, dataset, batch_size)
19 |     elif reader_name in classification_readers:
20 |         return classification.evaluate(reader, dataset, batch_size)
21 | 
22 | 
23 | def pretty_print_results(d, prefix=''):
24 |     for k, v in sorted(d.items(), key=lambda x: x[0]):
25 |         if isinstance(v, dict):
26 |             print(prefix + k + ":")
27 |             pretty_print_results(v, prefix + '\t')
28 |         elif '\n' in str(v):
29 |             print(prefix + k + ":")
30 |             print(str(v).replace('\n', '\n' + prefix + '\t'))
31 |         else:
32 |             print(prefix + k + ":", str(v))
33 | 


--------------------------------------------------------------------------------
/jack/eval/classification.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | 
 4 | def evaluate(reader, dataset, batch_size):
 5 |     answers = reader.process_dataset(dataset, batch_size, silent=False)
 6 | 
 7 |     confusion_matrix = defaultdict(lambda: defaultdict(int))
 8 | 
 9 |     for (q, a), pa in zip(dataset, answers):
10 |         confusion_matrix[a[0].text][pa.text] += 1
11 | 
12 |     classes = sorted(confusion_matrix.keys())
13 |     max_class = max(6, len(max(classes, key=len)))
14 | 
15 |     precision = dict()
16 |     recall = dict()
17 |     f1 = dict()
18 | 
19 |     confusion_matrix_string = ['\n', ' ' * max_class]
20 |     for c in classes:
21 |         confusion_matrix_string.append('\t')
22 |         confusion_matrix_string.append(c)
23 |         confusion_matrix_string.append(' ' * (max_class - len(c)))
24 |     confusion_matrix_string.append('\n')
25 |     for c1 in classes:
26 |         confusion_matrix_string.append(c1)
27 |         confusion_matrix_string.append(' ' * (max_class - len(c1)))
28 |         for c2 in classes:
29 |             confusion_matrix_string.append('\t')
30 |             ct = str(confusion_matrix[c1][c2])
31 |             confusion_matrix_string.append(ct)
32 |             confusion_matrix_string.append(' ' * (max_class - len(ct)))
33 |         confusion_matrix_string.append('\n')
34 |         precision[c1] = confusion_matrix[c1][c1] / max(1.0, sum(p[c1] for p in confusion_matrix.values()))
35 |         recall[c1] = confusion_matrix[c1][c1] / max(1.0, sum(confusion_matrix[c1].values()))
36 |         f1[c1] = 2 * precision[c1] * recall[c1] / max(1.0, precision[c1] + recall[c1])
37 | 
38 |     accuracy = sum(confusion_matrix[c][c] for c in classes) / max(
39 |         1.0, sum(sum(vs.values()) for vs in confusion_matrix.values()))
40 | 
41 |     return {
42 |         'Accuracy': accuracy,
43 |         'Precision': precision,
44 |         'Recall': recall,
45 |         'F1': f1,
46 |         'Confusion Matrix': ''.join(confusion_matrix_string)
47 |     }
48 | 


--------------------------------------------------------------------------------
/jack/eval/extractive_qa.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | import string
 4 | from collections import Counter
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | def evaluate(reader, dataset, batch_size):
10 |     answers = reader.process_dataset(dataset, batch_size, silent=False)
11 | 
12 |     f1 = exact_match = 0
13 |     for pa, (q, ass) in zip(answers, dataset):
14 |         ground_truth = [a.text for a in ass]
15 |         f1 += metric_max_over_ground_truths(f1_score, pa.text, ground_truth)
16 |         exact_match += metric_max_over_ground_truths(exact_match_score, pa.text, ground_truth)
17 | 
18 |     f1 /= len(answers)
19 |     exact_match /= len(answers)
20 | 
21 |     return {'F1': f1, 'Exact': exact_match}
22 | 
23 | 
24 | def normalize_answer(s):
25 |     """Lower text and remove punctuation, articles and extra whitespace."""
26 | 
27 |     def remove_articles(text):
28 |         return re.sub(r'\b(a|an|the)\b', ' ', text)
29 | 
30 |     def white_space_fix(text):
31 |         return ' '.join(text.split())
32 | 
33 |     def remove_punc(text):
34 |         exclude = set(string.punctuation)
35 |         return ''.join(ch for ch in text if ch not in exclude)
36 | 
37 |     def lower(text):
38 |         return text.lower()
39 | 
40 |     return white_space_fix(remove_articles(remove_punc(lower(s))))
41 | 
42 | 
43 | def f1_score(prediction, ground_truth):
44 |     prediction_tokens = normalize_answer(prediction).split()
45 |     ground_truth_tokens = normalize_answer(ground_truth).split()
46 |     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
47 |     num_same = sum(common.values())
48 |     if num_same == 0:
49 |         return 0
50 |     precision = 1.0 * num_same / len(prediction_tokens)
51 |     recall = 1.0 * num_same / len(ground_truth_tokens)
52 |     f1 = (2 * precision * recall) / (precision + recall)
53 |     return f1
54 | 
55 | 
56 | def exact_match_score(prediction, ground_truth):
57 |     return normalize_answer(prediction) == normalize_answer(ground_truth)
58 | 
59 | 
60 | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
61 |     scores_for_ground_truths = [0.0]
62 |     for ground_truth in ground_truths:
63 |         score = metric_fn(prediction, ground_truth)
64 |         scores_for_ground_truths.append(score)
65 |     return max(scores_for_ground_truths)
66 | 


--------------------------------------------------------------------------------
/jack/eval/output_schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"$schema": "http://json-schema.org/draft-04/schema#",
 3 | 	"title": "output schema",
 4 | 	"description": "The schema for predictions of a model.",
 5 | 	"type": "array",
 6 | 	"items": {
 7 | 		"instances": {
 8 | 			"type": "array",
 9 | 			"items": {
10 | 				"type":"array",
11 | 				"items": {
12 | 					"type":"object",
13 | 					"properties": {
14 | 						"candidates": {
15 | 							"type":"array",
16 | 							"items": {
17 | 								"type":"object",
18 | 								"properties": {
19 | 									"text": {"type":"string" },
20 | 									"label": {"type": "string" },
21 | 									"score": {"type": "number"},
22 | 									"span": {
23 | 										"type": "array",
24 | 										"items": {
25 | 											"type":"integer"
26 | 										}
27 | 									}
28 | 								}
29 | 							}
30 | 						}
31 | 					}
32 | 				}
33 | 			}
34 |     }
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/jack/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/io/__init__.py


--------------------------------------------------------------------------------
/jack/io/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.io.embeddings.embeddings import Embeddings, load_embeddings
 4 | from jack.io.embeddings.glove import load_glove
 5 | 
 6 | __all__ = [
 7 |     'Embeddings',
 8 |     'load_embeddings'
 9 |     'load_word2vec',
10 |     'get_word2vec_vocabulary',
11 |     'load_glove',
12 | ]
13 | 


--------------------------------------------------------------------------------
/jack/io/embeddings/fasttext.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | import numpy as np
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def load_fasttext(stream, vocab=None):
11 |     """Loads fastText file and merges it if optional vocabulary
12 |     Args:
13 |         stream (iterable): An opened filestream to the fastText file.
14 |         vocab (dict=None): Word2idx dict of existing vocabulary.
15 |     Returns:
16 |         return_vocab (Vocabulary), lookup (matrix); Vocabulary contains the
17 |                      word2idx and the matrix contains the embedded words.
18 |     """
19 |     logger.info('Loading fastText vectors ..')
20 | 
21 |     word2idx = {}
22 |     vec_n, vec_size = map(int, stream.readline().split())
23 |     lookup = np.empty([vocab.get_size() if vocab is not None else vec_n, vec_size], dtype=np.float)
24 |     n = 0
25 |     for line in stream:
26 |         word, vec = line.rstrip().split(maxsplit=1)
27 |         if vocab is None or word in vocab and word not in word2idx:
28 |             word = word.decode('utf-8')
29 |             idx = len(word2idx)
30 |             word2idx[word] = idx
31 |             # if idx > np.size(lookup, axis=0) - 1:
32 |             #    lookup.resize([lookup.shape[0] + 500000, lookup.shape[1]])
33 |             lookup[idx] = np.fromstring(vec, sep=' ')
34 |         n += 1
35 |     # lookup.resize([len(word2idx), dim])
36 |     logger.info('Loading fastText vectors completed.')
37 |     return word2idx, lookup
38 | 


--------------------------------------------------------------------------------
/jack/io/embeddings/glove.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | import numpy as np
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def load_glove(stream, vocab=None):
11 |     """Loads GloVe file and merges it if optional vocabulary
12 |     Args:
13 |         stream (iterable): An opened filestream to the GloVe file.
14 |         vocab (dict=None): Word2idx dict of existing vocabulary.
15 |     Returns:
16 |         return_vocab (Vocabulary), lookup (matrix); Vocabulary contains the
17 |                      word2idx and the matrix contains the embedded words.
18 |     """
19 |     logger.info('Loading GloVe vectors ..')
20 | 
21 |     word2idx = {}
22 |     first_line = stream.readline()
23 |     dim = len(first_line.split()) - 1
24 |     lookup = np.empty([500000, dim], dtype=np.float)
25 |     lookup[0] = np.fromstring(first_line.split(maxsplit=1)[1], sep=' ')
26 |     word2idx[first_line.split(maxsplit=1)[0].decode('utf-8')] = 0
27 |     n = 1
28 |     for line in stream:
29 |         word, vec = line.rstrip().split(maxsplit=1)
30 |         if vocab is None or word in vocab and word not in word2idx:
31 |             word = word.decode('utf-8')
32 |             idx = len(word2idx)
33 |             word2idx[word] = idx
34 |             if idx > np.size(lookup, axis=0) - 1:
35 |                 lookup.resize([lookup.shape[0] + 500000, lookup.shape[1]])
36 |             lookup[idx] = np.fromstring(vec, sep=' ')
37 |         n += 1
38 |     lookup.resize([len(word2idx), dim])
39 |     logger.info('Loading GloVe vectors completed.')
40 |     return word2idx, lookup
41 | 


--------------------------------------------------------------------------------
/jack/io/embeddings/memory_map.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import json
 4 | import os
 5 | 
 6 | import numpy as np
 7 | 
 8 | from jack.io.embeddings import Embeddings
 9 | 
10 | 
11 | def load_memory_map_dir(directory: str) -> Embeddings:
12 |     """
13 |     Loads embeddings from a memory map directory to allow lazy loading (and reduce the memory usage).
14 |     Args:
15 |         directory: a file prefix. This function loads two files in the directory: a meta json file with shape information
16 |         and the vocabulary, and the actual memory map file.
17 | 
18 |     Returns:
19 |         Embeddings object with a lookup matrix that is backed by a memory map.
20 | 
21 |     """
22 |     meta_file = os.path.join(directory, "meta.json")
23 |     mem_map_file = os.path.join(directory, "memory_map")
24 |     with open(meta_file, "r") as f:
25 |         meta = json.load(f)
26 |     shape = tuple(meta['shape'])
27 |     vocab = meta['vocab']
28 |     mem_map = np.memmap(mem_map_file, dtype='float32', mode='r+', shape=shape)
29 |     result = Embeddings(vocab, mem_map, filename=directory, emb_format="memory_map_dir")
30 |     return result
31 | 
32 | 
33 | def save_as_memory_map_dir(directory: str, emb: Embeddings):
34 |     """
35 |     Saves the given embeddings as memory map file and corresponding meta data in a directory.
36 |     Args:
37 |         directory: the directory to store the memory map file in (called `memory_map`) and the meta file (called
38 |         `meta.json` that stores the shape of the memory map and the actual vocabulary.
39 |         emb: the embeddings to store.
40 |     """
41 |     if not os.path.exists(directory):
42 |         os.makedirs(directory)
43 | 
44 |     meta_file = os.path.join(directory, "meta.json")
45 |     mem_map_file = os.path.join(directory, "memory_map")
46 |     with open(meta_file, "w") as f:
47 |         json.dump({
48 |             "vocab": emb.vocabulary,
49 |             "shape": emb.shape
50 |         }, f)
51 |     mem_map = np.memmap(mem_map_file, dtype='float32', mode='w+', shape=emb.shape)
52 |     mem_map[:] = emb.lookup[:]
53 |     mem_map.flush()
54 |     del mem_map
55 | 


--------------------------------------------------------------------------------
/jack/io/embeddings/word_to_vec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import gzip
 4 | import numpy as np
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def load_word2vec(filename, vocab=None, normalise=True):
12 |     """Loads a word2vec file and merges existing vocabulary.
13 | 
14 |     Args:
15 |         filename (string): Path to the word2vec file.
16 |         vocab (Vocabulary=None): Existing vocabulary to be merged.
17 |         normalise (bool=True): If the word embeddings should be unit
18 |                   normalized or not.
19 |     Returns:
20 |         return_vocab (dict), lookup (matrix): The dict is a word2idx dict and
21 |         the lookup matrix is the matrix of embedding vectors.
22 |     """
23 |     logger.info("Loading word2vec vectors ..")
24 |     with gzip.open(filename, 'rb') as f:
25 |         vec_n, vec_size = map(int, f.readline().split())
26 |         byte_size = vec_size * 4
27 |         lookup = np.empty([vocab.get_size() if vocab is not None else vec_n, vec_size], dtype=np.float32)
28 |         word2idx = {}
29 |         idx = 0
30 |         for n in range(vec_n):
31 |             word = b''
32 |             while True:
33 |                 c = f.read(1)
34 |                 if c == b' ':
35 |                     break
36 |                 else:
37 |                     word += c
38 | 
39 |             word = word.decode('utf-8')
40 |             vector = np.fromstring(f.read(byte_size), dtype=np.float32)
41 |             if vocab is None or vocab.contains_word(word):
42 |                 word2idx[word] = idx
43 |                 lookup[idx] = _normalise(vector) if normalise else vector
44 |                 idx += 1
45 | 
46 |     lookup.resize([idx, vec_size])
47 |     logger.info('Loading word2vec vectors completed.')
48 |     return word2idx, lookup
49 | 
50 | 
51 | def _normalise(x):
52 |     """Unit normalize x with L2 norm."""
53 |     return (1.0 / np.linalg.norm(x, ord=2)) * x
54 | 
55 | 
56 | def get_word2vec_vocabulary(fname):
57 |     """Loads word2vec file and returns the vocabulary as dict word2idx."""
58 |     voc, _ = load_word2vec(fname)
59 |     return voc
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     pickle_tokens = False
64 |     vocab, _ = load_word2vec('../../data/word2vec/GoogleNews-vectors-negative300.bin.gz')
65 | 
66 |     # pickle token set
67 |     if pickle_tokens:
68 |         import pickle
69 |         w2v_words = set(vocab.get_all_words())
70 |         pickle.dump(w2v_words, open('./data/w2v_tokens.pickle', 'wb'))
71 | 


--------------------------------------------------------------------------------
/jack/io/load.py:
--------------------------------------------------------------------------------
 1 | """Implementation of loaders for common datasets."""
 2 | 
 3 | import json
 4 | 
 5 | from jack.core.data_structures import *
 6 | from jack.io.SNLI2jtr import convert_snli
 7 | from jack.io.SQuAD2jtr import convert_squad
 8 | 
 9 | loaders = dict()
10 | 
11 | 
12 | def _register(name):
13 |     def _decorator(f):
14 |         loaders[name] = f
15 |         return f
16 | 
17 |     return _decorator
18 | 
19 | 
20 | @_register('jack')
21 | def load_jack(path, max_count=None):
22 |     """
23 |     This function loads a jack json file from a specific location.
24 |     Args:
25 |         path: the location to load from.
26 |         max_count: how many instances to load at most
27 | 
28 |     Returns:
29 |         A list of input-answer pairs.
30 | 
31 |     """
32 |     # We load json directly instead
33 |     with open(path) as f:
34 |         jtr_data = json.load(f)
35 | 
36 |     return jack_to_qasetting(jtr_data, max_count)
37 | 
38 | 
39 | @_register('squad')
40 | def load_squad(path, max_count=None):
41 |     """
42 |     This function loads a squad json file from a specific location.
43 |     Args:
44 |         path: the location to load from.
45 |         max_count: how many instances to load at most
46 | 
47 |     Returns:
48 |         A list of input-answer pairs.
49 |     """
50 |     # We load to jtr dict and convert to qa settings for now
51 |     jtr_data = convert_squad(path)
52 |     return jack_to_qasetting(jtr_data, max_count)
53 | 
54 | 
55 | @_register('snli')
56 | def load_snli(path, max_count=None):
57 |     """
58 |     This function loads a jack json file with labelled answers from a specific location.
59 |     Args:
60 |         path: the location to load from.
61 |         max_count: how many instances to load at most
62 | 
63 |     Returns:
64 |         A list of input-answer pairs.
65 |     """
66 |     # We load to jtr dict and convert to qa settings for now
67 |     jtr_data = convert_snli(path)
68 |     return jack_to_qasetting(jtr_data, max_count)
69 | 


--------------------------------------------------------------------------------
/jack/io/merge_JTR_data_files.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This files merges two data files, both in JTR format, into a single JTR data file.
 3 | It assumes that the structure of instances is identical for both input files
 4 | and only concatenates the two instances lists.
 5 | It also assumes that the global variables are identical in both input files.
 6 | """
 7 | 
 8 | import json
 9 | import sys
10 | 
11 | 
12 | def main():
13 | 
14 |     if len(sys.argv) != 4:
15 |         print('Wrong arguments for merging two data files in Jack format into one. Usage:')
16 |         print('\tpython3 merge_JTR_data_files.py input1.json input2.json output.json')
17 |     else:
18 |         # load input 1
19 |         with open(sys.argv[1], 'r') as inputfile1:
20 |             content1 = json.load(inputfile1)
21 | 
22 |         # load input 2
23 |         with open(sys.argv[2], 'r') as inputfile2:
24 |             content2 = json.load(inputfile2)
25 | 
26 |         # define new 'meta' field
27 |         meta_ = "Merged Content of {} and {}".format(content1['meta'], content2['meta'])
28 | 
29 |         # define new 'globals' field. Note: so far assuming same globals in both input files.
30 |         assert (content1['globals']) == content2['globals']
31 |         globals_ = content1['globals']
32 | 
33 |         # concatenating instances of both input files
34 |         instances_ = content1['instances'] + content2['instances']
35 | 
36 |         # defining the dictionary for dumping into json
37 |         merged_content = {'meta': meta_, 'globals': globals_, 'instances': instances_}
38 | 
39 |         # sanity check: nothing unexpected got lost or added
40 |         assert len(content1['instances']) + len(content2['instances']) == len(merged_content['instances'])
41 | 
42 |         # summary print
43 |         print('Merged file {} with {} into {}'.format(sys.argv[1],sys.argv[2],sys.argv[3]))
44 |         print('Number of instances: input1: {} input2: {} output: {}'\
45 |             .format(len(content1['instances']), len(content2['instances']), len(merged_content['instances'])))
46 | 
47 |         # dump merged content into JTR output file.
48 |         with open(sys.argv[3], 'w') as outputfile:
49 |             json.dump(merged_content, outputfile)
50 | 
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/jack/io/newsqa2squad.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import json
 3 | import sys
 4 | from collections import Counter
 5 | 
 6 | input_fn = sys.argv[1]
 7 | output_fn = sys.argv[2]
 8 | 
 9 | dataset = []
10 | squad_style_dataset = {"data": dataset, "version": "1"}
11 | 
12 | with open(input_fn, "r") as f:
13 |     reader = csv.reader(f)
14 |     reader.__next__()
15 |     for row in reader:
16 |         [story_id, question, answer_char_ranges, is_answer_absent, is_question_bad, validated_answers, story_text] = row
17 | 
18 |         spans = None
19 |         if validated_answers:
20 |             answers = json.loads(validated_answers)
21 |             spans = [k for k, v in answers.items() if ":" in k]
22 |         else:
23 |             answers = Counter()
24 |             for rs in answer_char_ranges.split("|"):
25 |                 for r in set(rs.split(",")):
26 |                     if ":" in r:
27 |                         answers[r] += 1
28 |             spans = [k for k, v in answers.items() if ":" in k and v >= 2]
29 | 
30 |         if spans:
31 |             example = {"title": story_id, "paragraphs": [
32 |                 {
33 |                     "context": story_text,
34 |                     "qas": [{
35 |                         "question": question,
36 |                         "id": story_id + "_" + question.replace(" ", "_"),
37 |                         "answers": [{
38 |                                         "answer_start": int(span.split(":")[0]),
39 |                                         "text": story_text[int(span.split(":")[0]):int(span.split(":")[1])]
40 |                                     } for span in spans]
41 |                     }]
42 |                 }
43 |             ]}
44 |             dataset.append(example)
45 |             # else:
46 |             #    print("No span found for %s" % story_id)
47 | 
48 | with open(output_fn, "w") as f:
49 |     json.dump(squad_style_dataset, f)
50 | 


--------------------------------------------------------------------------------
/jack/io/read_semeval2017Task10.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def readAnn(textfolder="../data/SemEval2017Task10/"):
 5 |     '''
 6 |     Read .ann files and look up corresponding spans in .txt files
 7 |     
 8 |     Args:
 9 |         textfolder:
10 |     '''
11 | 
12 |     flist = os.listdir(textfolder)
13 |     for f in flist:
14 |         if not f.endswith(".ann"):
15 |             continue
16 | 
17 |         f_anno = open(os.path.join(textfolder, f), "rU")
18 |         f_text = open(os.path.join(textfolder, f.replace(".ann", ".txt")), "rU")
19 | 
20 |         # there's only one line, as each .ann file is one text paragraph
21 |         for l in f_text:
22 |             text = l
23 | 
24 |         #@TODO: collect all keyphrase and relation annotations, create pairs of all keyphrase that appear in same sentence for USchema style RE
25 | 
26 |         for l in f_anno:
27 |             anno_inst = l.strip().split("\t")
28 |             if len(anno_inst) == 3:
29 |                 keytype, start, end = anno_inst[1].split(" ")
30 |                 if not keytype.endswith("-of"):
31 | 
32 |                     # look up span in text and print error message if it doesn't match the .ann span text
33 |                     keyphr_text_lookup = text[int(start):int(end)]
34 |                     keyphr_ann = anno_inst[2]
35 |                     if keyphr_text_lookup != keyphr_ann:
36 |                         print("Spans don't match for anno " + l.strip() + " in file " + f)
37 | 
38 |                 #if keytype.endswith("-of"):
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     readAnn()


--------------------------------------------------------------------------------
/jack/io/scienceQA2jtr.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import io
 3 | import random
 4 | 
 5 | def convert_scienceCloze_to_jtr(scienceQAFile):
 6 | 
 7 |     instances = []
 8 | 
 9 |     f = io.open(scienceQAFile, "r", encoding="utf-8")
10 | 
11 |     for l in f:
12 |         l = l.strip().lower().split("\t")  # do the lower case preprocessing here
13 |         try:
14 |             quest, answs, cands, context, contextID = l
15 |         except ValueError:
16 |             print(l)
17 |             continue
18 | 
19 |         context = context[2:-2].split('\', \'')
20 | 
21 |         support = []
22 |         for i, c in enumerate(context):
23 |             support.append({"id": contextID + "_" + str(i), "text": c})
24 |         candidates = cands[2:-2].split('\', \'')
25 | 
26 |         qdict = {
27 |             'question': quest,
28 |             'candidates': [
29 |                 {
30 |                     'text': cand
31 |                 } for cand in candidates
32 |                 ],
33 |             'answers': [{'text': answs}]
34 |         }
35 |         qset_dict = {
36 |             'support': support,
37 |             'questions': [qdict]
38 |         }
39 | 
40 |         instances.append(qset_dict)
41 | 
42 | 
43 |     instances.append(qset_dict)
44 |     random.shuffle(instances)
45 | 
46 |     corpus_dict = {
47 |         'meta': "scienceQA.json",
48 |         'instances': instances
49 |     }
50 | 
51 |     f.close()
52 | 
53 |     return corpus_dict
54 | 
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     corpus = convert_scienceCloze_to_jtr("../data/scienceQA/clozeSummaryLocal_test.txt")
59 |     with open("../data/scienceQA/scienceQA_clozeSummaryLocal_test.json", 'w') as outfile:
60 |         json.dump(corpus, outfile, indent=2, ensure_ascii=False)


--------------------------------------------------------------------------------
/jack/io/sentihood2jtr.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from collections import defaultdict
 3 | import sys
 4 | import os
 5 | 
 6 | 
 7 | def main():
 8 |     # = parse_cbt_example(instances[0])
 9 |     if len(sys.argv) == 2:
10 |         with open(sys.argv[1], 'r') as f:
11 |             sentihood_data = json.load(f)
12 | 
13 |         convert_to_jtr(sentihood_data)
14 |     elif len(sys.argv) ==1:
15 |         data_path = '../data/sentihood/'
16 |         filenames = ['sentihood-train.json', 'sentihood-dev.json',
17 |         'sentihood-test.json']
18 |         for i, f in enumerate(filenames):
19 |             raw_data = json.load(open(os.path.join(data_path, f)))
20 |             instances = convert_to_jtr(raw_data)
21 | 
22 |             if i == 0: # training data -> write overfit set
23 |                 json.dump(wrap_into_jtr_global(instances[:100]),
24 |                         open('../../tests/test_data/sentihood/overfit.json','w'),
25 |                         indent=2)
26 | 
27 |             # write data sets for smalldata tests
28 |             json.dump(wrap_into_jtr_global(instances[:1000]),
29 |                     open(os.path.join('../../tests/test_data/sentihood/',f),'w'),
30 |                     indent=2)
31 | 
32 | def wrap_into_jtr_global(instances):
33 |     reading_dataset = {
34 |         'globals': {
35 |             'candidates': [
36 |                 {'text': 'Negative'},
37 |                 {'text': 'Positive'},
38 |                 {'text': 'Neutral'}
39 |             ]
40 |         },
41 |         'instances': instances
42 |     }
43 |     return reading_dataset
44 | 
45 | 
46 | 
47 | def convert_to_jtr(sentihood_data, exhaustive=True):
48 |     instances = []
49 |     # collect all aspects
50 |     aspects = set()
51 |     for instance in sentihood_data:
52 |         if 'opinions' in instance.keys():
53 |             for opinion in instance['opinions']:
54 |                 aspects.add(opinion['aspect'])
55 |     for instance in sentihood_data:
56 |         text = instance['text']
57 |         answers = defaultdict(lambda: 'Neutral')
58 |         if 'opinions' in instance.keys():
59 |             for opinion in instance['opinions']:
60 |                 aspect = opinion['aspect']
61 |                 answers[aspect] = opinion['sentiment']
62 | 
63 |         for aspect in aspects if exhaustive else answers.keys():
64 |             reading_instance = {
65 |                 'support': [{'text': text}],
66 |                 'questions': [{'question': aspect, 'answers': [{'text': answers[aspect]}]}]
67 |             }
68 |             instances.append(reading_instance)
69 | 
70 |     return instances
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/jack/io/simpleQuestions2jtr.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import io
 3 | 
 4 | 
 5 | def create_snippet(file_path, first_n=5):
 6 |     with open(file_path, 'r') as f:
 7 |         return [next(f) for _ in range(first_n)]
 8 | 
 9 | 
10 | def create_jtr_snippet(file_path):
11 |     return convert_simplequestions(file_path, first_n=5)
12 | 
13 | 
14 | def convert_simplequestions(file_path, first_n=None):
15 |     instances = []
16 |     f = io.open(file_path, "r")
17 |     i = 0
18 |     for l in f:
19 |         i += 1
20 |         if first_n and i > first_n:
21 |             break
22 |         subj, rel, obj, qu = l.strip().split("\t")
23 | 
24 |         support = [" ".join([subj, rel])]
25 |         qdict = {
26 |             'question': qu,
27 |             'answers': [obj]
28 |         }
29 |         qset_dict = {
30 |             'support': [{'text': supp} for supp in support],
31 |             'questions': [qdict]
32 |         }
33 |         instances.append(qset_dict)
34 | 
35 |     corpus_dict = {
36 |         'meta': "simpleQuestions.json",
37 |         'instances': instances
38 |     }
39 | 
40 |     f.close()
41 | 
42 |     return corpus_dict
43 | 
44 | 
45 | def main():
46 |     # some tests:
47 |     # raw_data = load_cbt_file(path=None, part='valid', mode='NE')
48 |     # instances = split_cbt(raw_data)
49 |     # = parse_cbt_example(instances[0])
50 | 
51 |     import sys
52 |     if len(sys.argv) == 3:
53 |         # corpus = create_jtr_snippet(sys.argv[1])
54 |         # out = create_snippet(sys.argv[1])
55 |         # with open(sys.argv[2], 'w') as outfile:
56 |         #     outfile.writelines(out)
57 |         corpus = convert_simplequestions(sys.argv[1])
58 |         with open(sys.argv[2], 'w') as outfile:
59 |             json.dump(corpus, outfile, indent=2)
60 |     else:
61 |         print("Usage: python3 simpleQuestions2jtr.py path/to/simpleQuestions save/to/simpleQuestions.jack.json")
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/jack/io/validate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import json
 4 | import jsonschema
 5 | from sys import argv
 6 | 
 7 | def main(arg1, arg2):
 8 |     with open(arg1) as f:
 9 |         data = json.load(f)
10 | 
11 |     with open(arg2) as f:
12 |         schema = json.load(f)
13 | 
14 |     try:
15 |         jsonschema.validate(data, schema)
16 |         return 'JSON successfully validated.'
17 |     except jsonschema.ValidationError as e:
18 |         return e.message
19 |     except jsonschema.SchemaError as e:
20 |         return e
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     response = main(argv[1], argv[2])
25 |     print(response)
26 | 


--------------------------------------------------------------------------------
/jack/readers/__init__.py:
--------------------------------------------------------------------------------
1 | from jack.readers.implementations import *
2 | 


--------------------------------------------------------------------------------
/jack/readers/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/readers/classification/__init__.py


--------------------------------------------------------------------------------
/jack/readers/classification/util.py:
--------------------------------------------------------------------------------
 1 | """Shared utilities for multiple choice."""
 2 | from typing import Iterable
 3 | 
 4 | from jack.core.data_structures import QASetting, Answer
 5 | from jack.util.vocab import Vocab
 6 | 
 7 | 
 8 | def create_answer_vocab(qa_settings: Iterable[QASetting] = None, answers: Iterable[Answer] = None):
 9 |     vocab = Vocab(unk=None)
10 |     if qa_settings is not None:
11 |         for qa in qa_settings:
12 |             if qa.candidates:
13 |                 for c in qa.candidates:
14 |                     vocab(c)
15 |     if answers is not None:
16 |         for a in answers:
17 |             vocab(a.text)
18 |     return vocab
19 | 
20 | 
21 | def candidate_one_hot(candidates, answer_str):
22 |     return [1.0 if candidates[answer_str] == cand else 0.0 for cand in candidates]
23 | 


--------------------------------------------------------------------------------
/jack/readers/extractive_qa/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/jack/readers/extractive_qa/tensorflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/readers/extractive_qa/tensorflow/__init__.py


--------------------------------------------------------------------------------
/jack/readers/extractive_qa/tensorflow/abstract_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | from jack.core import Ports, TensorPort, TensorPortTensors
 4 | from jack.core.tensorflow import TFModelModule
 5 | from jack.readers.extractive_qa.shared import XQAPorts
 6 | from jack.util.tf.xqa import xqa_crossentropy_loss
 7 | 
 8 | 
 9 | class AbstractXQAModelModule(TFModelModule):
10 |     _input_ports = [XQAPorts.emb_question, XQAPorts.question_length,
11 |                     XQAPorts.emb_support, XQAPorts.support_length, XQAPorts.support2question,
12 |                     # char embedding inputs
13 |                     XQAPorts.word_chars, XQAPorts.word_char_length,
14 |                     XQAPorts.question_batch_words, XQAPorts.support_batch_words,
15 |                     # feature input
16 |                     XQAPorts.word_in_question,
17 |                     # optional input, provided only during training
18 |                     XQAPorts.correct_start, XQAPorts.answer2support_training,
19 |                     XQAPorts.is_eval]
20 | 
21 |     _output_ports = [XQAPorts.start_scores, XQAPorts.end_scores,
22 |                      XQAPorts.answer_span]
23 |     _training_input_ports = [XQAPorts.start_scores, XQAPorts.end_scores,
24 |                              XQAPorts.answer_span_target, XQAPorts.answer2support_training, XQAPorts.support2question]
25 |     _training_output_ports = [Ports.loss]
26 | 
27 |     @property
28 |     def output_ports(self) -> Sequence[TensorPort]:
29 |         return self._output_ports
30 | 
31 |     @property
32 |     def input_ports(self) -> Sequence[TensorPort]:
33 |         return self._input_ports
34 | 
35 |     @property
36 |     def training_input_ports(self) -> Sequence[TensorPort]:
37 |         return self._training_input_ports
38 | 
39 |     @property
40 |     def training_output_ports(self) -> Sequence[TensorPort]:
41 |         return self._training_output_ports
42 | 
43 |     def create_training_output(self, shared_resources, input_tensors):
44 |         tensors = TensorPortTensors(input_tensors)
45 |         return {
46 |             Ports.loss: xqa_crossentropy_loss(tensors.start_scores, tensors.end_scores,
47 |                                               tensors.answer_span_target, tensors.answer2support,
48 |                                               tensors.support2question,
49 |                                               use_sum=shared_resources.config.get('loss', 'sum') == 'sum')
50 |         }
51 | 


--------------------------------------------------------------------------------
/jack/readers/extractive_qa/torch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/readers/extractive_qa/torch/__init__.py


--------------------------------------------------------------------------------
/jack/readers/link_prediction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/readers/link_prediction/__init__.py


--------------------------------------------------------------------------------
/jack/readers/link_prediction/similarities.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def negative_l1_distance(x1, x2, axis=1):
 9 |     """
10 |     Negative L1 Distance.
11 | 
12 |     .. math:: L = - \\sum_i \\abs(x1_i - x2_i)
13 | 
14 |     Args:
15 |         x1: First term.
16 |         x2: Second term.
17 |         axis: Reduction Indices.
18 | 
19 |     Returns:
20 |         Similarity Value.
21 |     """
22 |     distance = tf.reduce_sum(tf.abs(x1 - x2), axis=axis)
23 |     return - distance
24 | 
25 | 
26 | def negative_l2_distance(x1, x2, axis=1):
27 |     """
28 |     Negative L2 Distance.
29 | 
30 |     .. math:: L = - \\sqrt{\\sum_i (x1_i - x2_i)^2}
31 | 
32 |     Args:
33 |         x1: First term.
34 |         x2: Second term.
35 |         axis: Reduction Indices.
36 | 
37 |     Returns:
38 |         Similarity Value.
39 |     """
40 | 
41 |     distance = tf.sqrt(tf.reduce_sum(tf.square(x1 - x2), axis=axis))
42 |     return - distance
43 | 
44 | 
45 | def negative_square_l2_distance(x1, x2, axis=1):
46 |     """
47 |     Negative Square L2 Distance.
48 | 
49 |     .. math:: L = - \\sum_i (x1_i - x2_i)^2
50 | 
51 |     Args:
52 |         x1: First term.
53 |         x2: Second term.
54 |         axis: Reduction Indices.
55 | 
56 |     Returns:
57 |         Similarity Value.
58 |     """
59 |     distance = tf.reduce_sum(tf.square(x1 - x2), axis=axis)
60 |     return - distance
61 | 
62 | 
63 | def dot_product(x1, x2, axis=1):
64 |     """
65 |     Dot Product.
66 | 
67 |     .. math:: L = \\sum_i x1_i x2_i
68 | 
69 |     Args:
70 |         x1: First term.
71 |         x2: Second term.
72 |         axis: Reduction Indices.
73 | 
74 |     Returns:
75 |         Similarity Value.
76 |     """
77 | 
78 |     similarity = tf.reduce_sum(x1 * x2, axis=axis)
79 |     return similarity
80 | 
81 | 
82 | # Aliases
83 | l1 = L1 = negative_l1_distance
84 | l2 = L2 = negative_l2_distance
85 | l2_sqr = L2_SQR = negative_square_l2_distance
86 | dot = DOT = dot_product
87 | 
88 | 
89 | def get_function(function_name):
90 |     this_module = sys.modules[__name__]
91 |     if not hasattr(this_module, function_name):
92 |         raise ValueError('Unknown similarity function: {}'.format(function_name))
93 |     return getattr(this_module, function_name)
94 | 


--------------------------------------------------------------------------------
/jack/readers/natural_language_inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/readers/natural_language_inference/__init__.py


--------------------------------------------------------------------------------
/jack/readers/natural_language_inference/conditional_bilstm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from jack.readers.classification.shared import AbstractSingleSupportClassificationModel
 4 | from jack.util.tf.rnn import fused_birnn
 5 | 
 6 | 
 7 | class ConditionalBiLSTMClassificationModel(AbstractSingleSupportClassificationModel):
 8 |     def forward_pass(self, shared_resources, embedded_question, embedded_support, num_classes, tensors):
 9 |         # question - hypothesis; support - premise
10 |         repr_dim = shared_resources.config['repr_dim']
11 |         dropout = shared_resources.config.get("dropout", 0.0)
12 | 
13 |         with tf.variable_scope('embedding_projection') as vs:
14 |             embedded_question = tf.layers.dense(embedded_question, repr_dim, tf.tanh, name='projection')
15 |             vs.reuse_variables()
16 |             embedded_support = tf.layers.dense(embedded_support, repr_dim, tf.tanh, name='projection')
17 |             # keep dropout mask constant over time
18 |             dropout_shape = [tf.shape(embedded_question)[0], 1, tf.shape(embedded_question)[2]]
19 |             embedded_question = tf.nn.dropout(embedded_question, 1.0 - dropout, dropout_shape)
20 |             embedded_support = tf.nn.dropout(embedded_support, 1.0 - dropout, dropout_shape)
21 | 
22 |         fused_rnn = tf.contrib.rnn.LSTMBlockFusedCell(repr_dim)
23 |         # [batch, 2*output_dim] -> [batch, num_classes]
24 |         _, q_states = fused_birnn(fused_rnn, embedded_question, sequence_length=tensors.question_length,
25 |                                   dtype=tf.float32, time_major=False, scope="question_rnn")
26 | 
27 |         outputs, _ = fused_birnn(fused_rnn, embedded_support, sequence_length=tensors.support_length,
28 |                                  dtype=tf.float32, initial_state=q_states, time_major=False, scope="support_rnn")
29 | 
30 |         # [batch, T, 2 * dim] -> [batch, dim]
31 |         outputs = tf.concat([outputs[0], outputs[1]], axis=2)
32 |         hidden = tf.layers.dense(outputs, repr_dim, tf.nn.relu, name="hidden") * tf.expand_dims(
33 |             tf.sequence_mask(tensors.support_length, maxlen=tf.shape(outputs)[1], dtype=tf.float32), 2)
34 |         hidden = tf.reduce_max(hidden, axis=1)
35 |         # [batch, dim] -> [batch, num_classes]
36 |         outputs = tf.layers.dense(hidden, num_classes, name="classification")
37 |         return outputs
38 | 


--------------------------------------------------------------------------------
/jack/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/util/__init__.py


--------------------------------------------------------------------------------
/jack/util/map.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | import numpy as np
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | def get_list_shape(xs):
10 |     if isinstance(xs, int):
11 |         shape = []
12 |     else:
13 |         shape = [len(xs)]
14 |         for i, x in enumerate(xs):
15 |             if isinstance(x, list) or isinstance(x, tuple):
16 |                 if len(shape) == 1:
17 |                     shape.append(0)
18 |                 shape[1] = max(len(x), shape[1])
19 |                 for j, y in enumerate(x):
20 |                     if isinstance(y, list):
21 |                         if len(shape) == 2:
22 |                             shape.append(0)
23 |                         shape[2] = max(len(y), shape[2])
24 |     return shape
25 | 
26 | 
27 | def numpify(xs, pad=0, keys=None, dtypes=None):
28 |     """Converts a dict or list of Python data into a dict of numpy arrays."""
29 |     is_dict = isinstance(xs, dict)
30 |     xs_np = {} if is_dict else [0] * len(xs)
31 |     xs_iter = xs.items() if is_dict else enumerate(xs)
32 | 
33 |     for i, (key, x) in enumerate(xs_iter):
34 |         try:
35 |             if (keys is None or key in keys) and not isinstance(x, np.ndarray):
36 |                 shape = get_list_shape(x)
37 |                 dtype = dtypes[i] if dtypes is not None else np.int64
38 |                 x_np = np.full(shape, pad, dtype)
39 | 
40 |                 nb_dims = len(shape)
41 | 
42 |                 if nb_dims == 0:
43 |                     x_np = x
44 |                 else:
45 |                     def f(tensor, values):
46 |                         t_shp = tensor.shape
47 |                         if len(t_shp) > 1:
48 |                             for _i, _values in enumerate(values):
49 |                                 f(tensor[_i], _values)
50 |                         else:
51 |                             tensor[0:len(values)] = [v for v in values]
52 | 
53 |                     f(x_np, x)
54 | 
55 |                 xs_np[key] = x_np
56 |             else:
57 |                 xs_np[key] = x
58 |         except Exception as e:
59 |             logger.error('Error numpifying value ' + str(x) + ' of key ' + str(key))
60 |             raise e
61 |     return xs_np
62 | 


--------------------------------------------------------------------------------
/jack/util/random.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def singleton(cls):
 7 |     instances = {}
 8 | 
 9 |     def getinstance(*args, **kwargs):
10 |         if cls not in instances:
11 |             instances[cls] = cls(*args, **kwargs)
12 |         return instances[cls]
13 |     return getinstance
14 | 
15 | 
16 | @singleton
17 | class DefaultRandomState(np.random.RandomState):
18 |     def __init__(self, seed=None):
19 |         super().__init__(seed)
20 | 


--------------------------------------------------------------------------------
/jack/util/tf/__init__.py:
--------------------------------------------------------------------------------
1 | """The tf package should contain all tf functionality of jtr for maximal reuse"""
2 | 


--------------------------------------------------------------------------------
/jack/util/tf/activations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def parametric_relu(x, name=None):
 7 |     alphas = tf.get_variable('{}/alpha'.format(name) if name else 'alpha',
 8 |                              x.get_shape()[-1],
 9 |                              initializer=tf.constant_initializer(0.0),
10 |                              dtype=tf.float32)
11 |     return tf.nn.relu(x) + alphas * (x - abs(x)) * 0.5
12 | 
13 | 
14 | def selu(x, name=None):
15 |     with tf.name_scope('{}/elu'.format(name) if name else 'elu') as _:
16 |         alpha = 1.6732632423543772848170429916717
17 |         scale = 1.0507009873554804934193349852946
18 |         return scale*tf.where(x >= 0.0, x, alpha*tf.nn.elu(x))
19 | 
20 | 
21 | # Aliases
22 | prelu = parametric_relu
23 | 
24 | 
25 | def activation_from_string(activation_str):
26 |     if activation_str is None:
27 |         return tf.identity
28 |     return getattr(tf.nn, activation_str)
29 | 


--------------------------------------------------------------------------------
/jack/util/tf/dropout.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def fixed_dropout(xs, keep_prob, noise_shape, seed=None):
 7 |     """
 8 |     Apply dropout with same mask over all inputs
 9 |     Args:
10 |         xs: list of tensors
11 |         keep_prob:
12 |         noise_shape:
13 |         seed:
14 | 
15 |     Returns:
16 |         list of dropped inputs
17 |     """
18 |     with tf.name_scope("dropout", values=xs):
19 |         noise_shape = noise_shape
20 |         # uniform [keep_prob, 1.0 + keep_prob)
21 |         random_tensor = keep_prob
22 |         random_tensor += tf.random_uniform(noise_shape, seed=seed, dtype=xs[0].dtype)
23 |         # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
24 |         binary_tensor = tf.floor(random_tensor)
25 |         outputs = []
26 |         for x in xs:
27 |             ret = tf.div(x, keep_prob) * binary_tensor
28 |             ret.set_shape(x.get_shape())
29 |             outputs.append(ret)
30 |         return outputs
31 | 


--------------------------------------------------------------------------------
/jack/util/tf/highway.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def highway_layer(inputs, activation, name=None):
 7 |     with tf.variable_scope(name or "highway_layer"):
 8 |         d = inputs.get_shape()[-1].value
 9 |         trans_gate = tf.contrib.layers.fully_connected(inputs, 2 * d, activation_fn=None, weights_initializer=None,
10 |                                                        scope='trans_gate')
11 |         trans, gate = tf.split(trans_gate, 2, len(inputs.get_shape()) - 1)
12 |         trans, gate = activation(trans), tf.sigmoid(gate)
13 |         out = gate * trans + (1 - gate) * inputs
14 |         return out
15 | 
16 | 
17 | def highway_network(inputs, num_layers, activation=tf.tanh, name=None, reuse=False):
18 |     with tf.variable_scope(name or "highway_network", reuse=reuse):
19 |         prev = inputs
20 |         cur = None
21 |         for layer_idx in range(num_layers):
22 |             cur = highway_layer(prev, activation, name="layer_{}".format(layer_idx))
23 |             prev = cur
24 |     return cur
25 | 


--------------------------------------------------------------------------------
/jack/util/tf/masking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def mask_3d(sequences, sequence_lengths, mask_value, dimension=2):
 7 |     """
 8 |     Given a batch of matrices, each with shape m x n, mask the values in each
 9 |     row after the positions indicated in sentence_sizes.
10 |     This function is supposed to mask the last columns in the raw attention
11 |     matrix (e_{i, j}) in cases where the sentence2 is smaller than the
12 |     maximum.
13 |     
14 |     Args:
15 |         sequences: tensor with shape (batch_size, m, n)
16 |         sequence_lengths: tensor with shape (batch_size) containing the sentence sizes that
17 |            should be limited
18 |         mask_value: scalar value to assign to items after sentence size
19 |         dimension: over which dimension to mask values
20 |     Returns:
21 |         A tensor with the same shape as `values`
22 |     """
23 |     if dimension == 1:
24 |         sequences = tf.transpose(sequences, [0, 2, 1])
25 |     time_steps1, time_steps2 = tf.shape(sequences)[1], tf.shape(sequences)[2]
26 |     ones = tf.ones_like(sequences, dtype=tf.int32)
27 |     pad_values = mask_value * tf.cast(ones, tf.float32)
28 |     mask = tf.sequence_mask(sequence_lengths, time_steps2)
29 |     # mask is (batch_size, sentence2_size). we have to tile it for 3d
30 |     mask3d = tf.tile(tf.expand_dims(mask, 1), (1, time_steps1, 1))
31 |     masked = tf.where(mask3d, sequences, pad_values)
32 |     return tf.transpose(masked, [0, 2, 1]) if dimension == 1 else masked
33 | 


--------------------------------------------------------------------------------
/jack/util/tf/misc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def mask_for_lengths(lengths, max_length=None, mask_right=True, value=-1000.0):
 7 |     """
 8 |     Creates a [batch_size x max_length] mask.
 9 | 
10 |     Args:
11 |         lengths: int32 1-dim tensor of batch_size lengths
12 |         max_length: int32 0-dim tensor or python int
13 |         mask_right: if True, everything before "lengths" becomes zero and the
14 |             rest "value", else vice versa
15 |         value: value for the mask
16 | 
17 |     Returns:
18 |         [batch_size x max_length] mask of zeros and "value"s
19 |     """
20 |     mask = tf.sequence_mask(lengths, max_length, dtype=tf.float32)
21 |     if mask_right:
22 |         mask = 1.0 - mask
23 |     mask *= value
24 |     return mask
25 | 


--------------------------------------------------------------------------------
/jack/util/tf/simple.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def fully_connected_projection(inputs, output_size):
 7 |     """Projects inputs onto target dimension. Returns logits, loss, and argmax.
 8 | 
 9 |     Creates fully connected projection layer. Then applies cross entropy
10 |     softmax to get the loss. Calculate predictions via argmax.
11 |     Args:
12 |         inputs (tensor): Input into the projection layer.
13 |         output_size (int): Size of the targets (used in projection layer).
14 |     """
15 |     init = tf.contrib.layers.xavier_initializer(uniform=True) #uniform=False for truncated normal
16 |     logits = tf.contrib.layers.fully_connected(inputs, output_size, weights_initializer=init, activation_fn=None)
17 |     return logits
18 | 


--------------------------------------------------------------------------------
/jack/util/tf/xqa.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from jack.util.tf.segment import segment_softmax
 6 | 
 7 | 
 8 | def xqa_crossentropy_loss(start_scores, end_scores, answer_span, answer2support, support2question, use_sum=True):
 9 |     """Very common XQA loss function."""
10 |     num_questions = tf.reduce_max(support2question) + 1
11 | 
12 |     start, end = answer_span[:, 0], answer_span[:, 1]
13 | 
14 |     start_probs = segment_softmax(start_scores, support2question)
15 |     start_probs = tf.gather_nd(start_probs, tf.stack([answer2support, start], 1))
16 | 
17 |     # only start probs are normalized on multi-paragraph, end probs conditioned on start only on per support level
18 |     num_answers = tf.shape(answer_span)[0]
19 |     is_aligned = tf.equal(tf.shape(end_scores)[0], num_answers)
20 |     end_probs = tf.cond(
21 |         is_aligned,
22 |         lambda: tf.gather_nd(tf.nn.softmax(end_scores), tf.stack([tf.range(num_answers, dtype=tf.int32), end], 1)),
23 |         lambda: tf.gather_nd(segment_softmax(end_scores, support2question), tf.stack([answer2support, end], 1))
24 |     )
25 | 
26 |     answer2question = tf.gather(support2question, answer2support)
27 |     # compute losses individually
28 |     if use_sum:
29 |         span_probs = tf.unsorted_segment_sum(
30 |             start_probs, answer2question, num_questions) * tf.unsorted_segment_sum(
31 |             end_probs, answer2question, num_questions)
32 |     else:
33 |         span_probs = tf.unsorted_segment_max(
34 |             start_probs, answer2question, num_questions) * tf.unsorted_segment_max(
35 |             end_probs, answer2question, num_questions)
36 | 
37 |     return -tf.reduce_mean(tf.log(tf.maximum(1e-6, span_probs + 1e-6)))
38 | 


--------------------------------------------------------------------------------
/jack/util/torch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/jack/util/torch/__init__.py


--------------------------------------------------------------------------------
/jack/util/torch/embedding.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.nn import functional
 8 | 
 9 | from jack.util.torch import misc
10 | 
11 | 
12 | class ConvCharEmbeddingModule(nn.Module):
13 |     def __init__(self, num_chars, size, conv_width=5):
14 |         super(ConvCharEmbeddingModule, self).__init__()
15 |         self._size = size
16 |         self._conv_width = conv_width
17 |         self._embeddings = torch.nn.Embedding(num_chars, size)
18 |         self._embeddings.weight.data.mul_(0.1)
19 |         self._conv = torch.nn.Conv1d(size, size, conv_width, padding=math.floor(conv_width / 2))
20 | 
21 |     def forward(self, unique_word_chars, unique_word_lengths, sequences_as_uniqs=None):
22 |         long_tensor = torch.cuda.LongTensor if torch.cuda.device_count() > 0 else torch.LongTensor
23 |         embedded_chars = self._embeddings(unique_word_chars.type(long_tensor))
24 |         # [N, S, L]
25 |         conv_out = self._conv(embedded_chars.transpose(1, 2))
26 |         # [N, L]
27 |         conv_mask = misc.mask_for_lengths(unique_word_lengths)
28 |         conv_out = conv_out + conv_mask.unsqueeze(1)
29 |         embedded_words = conv_out.max(2)[0]
30 | 
31 |         if sequences_as_uniqs is None:
32 |             return embedded_words
33 |         else:
34 |             if not isinstance(sequences_as_uniqs, list):
35 |                 sequences_as_uniqs = [sequences_as_uniqs]
36 | 
37 |             all_embedded = []
38 |             for word_idxs in sequences_as_uniqs:
39 |                 all_embedded.append(functional.embedding(
40 |                     word_idxs.type(long_tensor), embedded_words))
41 |             return all_embedded
42 | 


--------------------------------------------------------------------------------
/jack/util/torch/highway.py:
--------------------------------------------------------------------------------
 1 | """Credits: https://github.com/kefirski/pytorch_Highway"""
 2 | 
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Highway(nn.Module):
 8 |     def __init__(self, size, num_layers, f=F.tanh):
 9 |         super(Highway, self).__init__()
10 | 
11 |         self.num_layers = num_layers
12 | 
13 |         self.nonlinear = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
14 | 
15 |         self.linear = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
16 | 
17 |         self.gate = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
18 | 
19 |         self.f = f
20 | 
21 |     def forward(self, x):
22 |         """
23 |             :param x: tensor with shape of [batch_size, size]
24 |             :return: tensor with shape of [batch_size, size]
25 |             applies σ(x) ⨀ (f(G(x))) + (1 - σ(x)) ⨀ (Q(x)) transformation | G and Q is affine transformation,
26 |             f is non-linear transformation, σ(x) is affine transformation with sigmoid non-linearition
27 |             and ⨀ is element-wise multiplication
28 |             """
29 | 
30 |         for layer in range(self.num_layers):
31 |             gate = F.sigmoid(self.gate[layer](x))
32 | 
33 |             nonlinear = self.f(self.nonlinear[layer](x))
34 |             linear = self.linear[layer](x)
35 | 
36 |             x = gate * nonlinear + (1 - gate) * linear
37 | 
38 |         return x
39 | 


--------------------------------------------------------------------------------
/jack/util/torch/rnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class BiLSTM(nn.Module):
 6 |     def __init__(self, input_size, size, start_state_given=False):
 7 |         super(BiLSTM, self).__init__()
 8 |         self._size = size
 9 |         self._bilstm = nn.LSTM(input_size, size, 1, bidirectional=True, batch_first=True)
10 |         self._bilstm.bias_ih_l0.data[size:2 * size].fill_(1.0)
11 |         self._bilstm.bias_ih_l0_reverse.data[size:2 * size].fill_(1.0)
12 |         self._start_state_given = start_state_given
13 |         if not start_state_given:
14 |             self._lstm_start_hidden = nn.Parameter(torch.zeros(2, size))
15 |             self._lstm_start_state = nn.Parameter(torch.zeros(2, size))
16 | 
17 |     def forward(self, inputs, lengths=None, start_state=None):
18 |         if not self._start_state_given:
19 |             batch_size = inputs.size(0)
20 |             start_hidden = self._lstm_start_hidden.unsqueeze(1).expand(2, batch_size, self._size).contiguous()
21 |             start_state = self._lstm_start_state.unsqueeze(1).expand(2, batch_size, self._size).contiguous()
22 |             start_state = (start_hidden, start_state)
23 | 
24 |         if lengths is not None:
25 |             new_lengths, indices = torch.sort(lengths, dim=0, descending=True)
26 |             inputs = torch.index_select(inputs, 0, indices)
27 |             if self._start_state_given:
28 |                 start_state = (torch.index_select(start_state[0], 1, indices),
29 |                                torch.index_select(start_state[1], 1, indices))
30 |             new_lengths = [l.data[0] for l in new_lengths]
31 |             inputs = nn.utils.rnn.pack_padded_sequence(inputs, new_lengths, batch_first=True)
32 | 
33 |         output, (h_n, c_n) = self._bilstm(inputs, start_state)
34 | 
35 |         if lengths is not None:
36 |             output = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)[0]
37 |             _, back_indices = torch.sort(indices, dim=0)
38 |             output = torch.index_select(output, 0, back_indices)
39 |             h_n = torch.index_select(h_n, 1, back_indices)
40 |             c_n = torch.index_select(c_n, 1, back_indices)
41 | 
42 |         return output, (h_n, c_n)
43 | 


--------------------------------------------------------------------------------
/jack/util/torch/xqa.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch import nn
 6 | 
 7 | 
 8 | class XQAMinCrossentropyLossModule(nn.Module):
 9 |     def forward(self, start_scores, end_scores, answer_span, answer_to_question):
10 |         """very common XQA loss function."""
11 |         long_tensor = torch.cuda.LongTensor if torch.cuda.device_count() > 0 else torch.LongTensor
12 |         answer_span = answer_span.type(long_tensor)
13 |         start, end = answer_span[:, 0], answer_span[:, 1]
14 | 
15 |         batch_size1 = start.data.shape[0]
16 |         batch_size2 = start_scores.data.shape[0]
17 |         is_aligned = batch_size1 == batch_size2
18 | 
19 |         start_scores = start_scores if is_aligned else torch.index_select(start_scores, dim=0, index=answer_to_question)
20 |         end_scores = end_scores if is_aligned else torch.index_select(end_scores, dim=0, index=answer_to_question)
21 | 
22 |         partitioned_loss = []
23 |         for i, j in enumerate(answer_to_question):
24 |             j = j.data[0]
25 |             while j >= len(partitioned_loss):
26 |                 partitioned_loss.append([])
27 |             loss = -torch.index_select(F.log_softmax(start_scores[i], dim=0), dim=0, index=start[i])
28 |             loss -= torch.index_select(F.log_softmax(end_scores[i], dim=0), dim=0, index=end[i])
29 |             partitioned_loss[j].append(loss)
30 | 
31 |         for j, l in enumerate(partitioned_loss):
32 |             partitioned_loss[j] = torch.stack(l).min()
33 | 
34 |         loss = torch.stack(partitioned_loss).mean()
35 |         return loss
36 | 


--------------------------------------------------------------------------------
/notebooks/prettyprint.py:
--------------------------------------------------------------------------------
 1 | class QAPrettyPrint:
 2 |     def __init__(self, support, span):
 3 |         self.support = support
 4 |         self.span = span
 5 | 
 6 |     def _repr_html_(self):
 7 |         start, end = self.span
 8 |         pre_highlight = self.support[:start]
 9 |         highlight = self.support[start:end]
10 |         post_highlight = self.support[end:]
11 |         
12 |         def _highlight(text):
13 |             return '<span style="background-color: #ff00ff; color: white">' + text + '</span>'
14 |         
15 |         text = pre_highlight + _highlight(highlight) + post_highlight
16 |         return text.replace('\n', '<br>')
17 | 
18 | def print_nli(premise, hypothesis, label):
19 | 	print('{}\t--({})-->\t{}'.format(premise, label, hypothesis))
20 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/__init__.py:
--------------------------------------------------------------------------------
1 | import projects.knowledge_integration.readers
2 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/nli/multinli/cbilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   Basic multiple choice configuration.
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | assertion_dir: 'data/knowledge_integration/knowledge_store'
 7 | assertion_limit: 20
 8 | assertion_sources: ['conceptnet']
 9 | 
10 | reading_module:
11 |   - input: 'text'
12 |     module: 'lstm'
13 |     with_projection: True
14 |     activation: relu
15 | 
16 | 
17 | seed: 1337
18 | 
19 | reader: 'cbilstm_nli_assertion_reader'
20 | save_dir: './cbilstm_nli_assertion_reader'
21 | 
22 | loader: snli
23 | 
24 | train: 'data/MultiNLI/multinli_1.0/multinli_1.0_train.jsonl'
25 | dev: 'data/MultiNLI/multinli_1.0/multinli_1.0_dev.jsonl'
26 | 
27 | embedding_format: 'memory_map_dir'
28 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
29 | 
30 | vocab_from_embeddings: True
31 | with_char_embeddings: True
32 | 
33 | repr_dim: 300
34 | epochs: 20
35 | dropout: 0.2
36 | batch_size: 128
37 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/nli/snli/cbilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   Basic multiple choice configuration.
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | assertion_dir: 'data/knowledge_integration/knowledge_store'
 7 | assertion_limit: 20
 8 | assertion_sources: ['conceptnet']
 9 | 
10 | reading_module:
11 |   - input: 'text'
12 |     module: 'lstm'
13 |     with_projection: True
14 |     activation: relu
15 | 
16 | 
17 | seed: 1337
18 | 
19 | reader: 'cbilstm_nli_assertion_reader'
20 | save_dir: './cbilstm_nli_assertion_reader'
21 | 
22 | loader: snli
23 | 
24 | train: 'data/SNLI/snli_1.0/snli_1.0_train.jsonl'
25 | dev: 'data/SNLI/snli_1.0/snli_1.0_dev.jsonl'
26 | test: 'data/SNLI/snli_1.0/snli_1.0_test.jsonl'
27 | 
28 | embedding_format: 'memory_map_dir'
29 | embedding_file: 'data/GloVe/glove.840B.300d.memory_map_dir'
30 | 
31 | vocab_from_embeddings: True
32 | with_char_embeddings: True
33 | 
34 | repr_dim: 300
35 | epochs: 20
36 | dropout: 0.2
37 | batch_size: 128
38 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/bilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: './conf/qa/squad/abstract_squad.yaml'
 5 | 
 6 | name: 'bilstm_assertion_reader'
 7 | reader: 'modular_assertion_qa_reader'
 8 | 
 9 | assertion_dir: 'data/knowledge_integration/knowledge_store'
10 | assertion_limit: 50
11 | assertion_sources: ['conceptnet']
12 | no_reading: False
13 | 
14 | heuristic: 'pair'
15 | 
16 | reading_module:
17 | - input: 'text'
18 |   module: 'lstm'
19 |   name: 'reading'
20 |   with_projection: True
21 |   activation: 'relu'
22 | 
23 | dropout: 0.2
24 | repr_dim: 150
25 | max_span_size: 16
26 | 
27 | model:
28 |   encoder_layer:
29 | 
30 |   - input: 'support'
31 |     module: 'lstm'
32 |     name: 'encoder'
33 |     activation: 'tanh'
34 |     with_projection: True
35 |     dropout: True
36 | 
37 |   - input: 'question'
38 |     module: 'lstm'
39 |     name: 'encoder'
40 |     with_projection: True
41 |     activation: 'tanh'
42 |     dropout: True
43 | 
44 |   answer_layer:
45 |     module: 'mlp'
46 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/squad/bilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/bilstm_assertion.yaml', './conf/qa/squad/abstract_squad.yaml']
 5 | 
 6 | # fixed experiment seed
 7 | seed: 1337
 8 | dropout: 0.2
 9 | repr_dim: 150
10 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/squad/bilstm_assertion_definition.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/squad/bilstm_assertion.yaml', './conf/qa/squad/abstract_squad.yaml']
 5 | 
 6 | name: 'bilstm_assertion_definition_reader'
 7 | reader: 'modular_assertion_definition_qa_reader'
 8 | 
 9 | topk: 16
10 | 
11 | # fraction of training batches where we extract definitions (1.0 means all, 0.0 means never)
12 | # can be lowered to speed up training, because using defintions requires running model twice
13 | training_fraction_with_definition: 0.3
14 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/triviaqa/web/bilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/bilstm_assertion.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
 5 | 
 6 | # fixed experiment seed
 7 | seed: 1337
 8 | dropout: 0.2
 9 | repr_dim: 150
10 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/triviaqa/web/bilstm_assertion_definition.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/triviaqa/wiki/bilstm_assertion.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
 5 | 
 6 | name: 'bilstm_assertion_definition_reader'
 7 | reader: 'modular_assertion_definition_qa_reader'
 8 | 
 9 | topk: 16
10 | 
11 | # fraction of training batches where we extract definitions (1.0 means all, 0.0 means never)
12 | # can be lowered to speed up training, because using defintions requires running model twice
13 | training_fraction_with_definition: 0.3
14 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/triviaqa/wiki/bilstm_assertion.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/bilstm_assertion.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
 5 | 
 6 | # fixed experiment seed
 7 | seed: 1337
 8 | dropout: 0.2
 9 | repr_dim: 150
10 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/conf/qa/triviaqa/wiki/bilstm_assertion_definition.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   BiLSTM with assertions.
 3 | 
 4 | parent_config: ['./projects/knowledge_integration/conf/qa/triviaqa/wiki/bilstm_assertion.yaml', './conf/qa/triviaqa/wiki/abstract_triviaqa.yaml']
 5 | 
 6 | name: 'bilstm_assertion_definition_reader'
 7 | reader: 'modular_assertion_definition_qa_reader'
 8 | 
 9 | topk: 16
10 | 
11 | # fraction of training batches where we extract definitions (1.0 means all, 0.0 means never)
12 | # can be lowered to speed up training, because using defintions requires running model twice
13 | training_fraction_with_definition: 0.3
14 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/qa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/projects/knowledge_integration/qa/__init__.py


--------------------------------------------------------------------------------
/projects/knowledge_integration/readers.py:
--------------------------------------------------------------------------------
 1 | """Reader definitions that use back"""
 2 | 
 3 | from jack.core.tensorflow import TFReader
 4 | from jack.readers.implementations import nli_reader, create_shared_resources, extractive_qa_reader
 5 | 
 6 | 
 7 | @extractive_qa_reader
 8 | def modular_assertion_qa_reader(resources_or_conf=None):
 9 |     from projects.knowledge_integration.qa.shared import XQAAssertionInputModule
10 |     from jack.readers.extractive_qa.shared import XQAOutputModule
11 |     from projects.knowledge_integration.qa.shared import ModularAssertionQAModel
12 |     shared_resources = create_shared_resources(resources_or_conf)
13 | 
14 |     input_module = XQAAssertionInputModule(shared_resources)
15 |     model_module = ModularAssertionQAModel(shared_resources)
16 |     output_module = XQAOutputModule()
17 |     return TFReader(shared_resources, input_module, model_module, output_module)
18 | 
19 | 
20 | @extractive_qa_reader
21 | def modular_assertion_definition_qa_reader(resources_or_conf=None):
22 |     from projects.knowledge_integration.qa.definition_model import XQAAssertionDefinitionInputModule
23 |     from projects.knowledge_integration.qa.definition_model import ModularAssertionDefinitionQAModel
24 |     from jack.readers.extractive_qa.shared import XQAOutputModule
25 |     shared_resources = create_shared_resources(resources_or_conf)
26 | 
27 |     input_module = XQAAssertionDefinitionInputModule(shared_resources)
28 |     model_module = ModularAssertionDefinitionQAModel(shared_resources)
29 |     output_module = XQAOutputModule()
30 |     reader = TFReader(shared_resources, input_module, model_module, output_module)
31 |     input_module.set_reader(reader)
32 |     return TFReader(shared_resources, input_module, model_module, output_module)
33 | 
34 | 
35 | @nli_reader
36 | def cbilstm_nli_assertion_reader(resources_or_conf=None):
37 |     from projects.knowledge_integration.nli import NLIAssertionModel
38 |     from projects.knowledge_integration.nli import MultipleChoiceAssertionInputModule
39 |     from jack.readers.classification.shared import SimpleClassificationOutputModule
40 |     shared_resources = create_shared_resources(resources_or_conf)
41 |     input_module = MultipleChoiceAssertionInputModule(shared_resources)
42 |     model_module = NLIAssertionModel(shared_resources)
43 |     output_module = SimpleClassificationOutputModule(shared_resources)
44 |     return TFReader(shared_resources, input_module, model_module, output_module)
45 | 


--------------------------------------------------------------------------------
/projects/knowledge_integration/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/projects/knowledge_integration/scripts/__init__.py


--------------------------------------------------------------------------------
/projects/knowledge_integration/shared.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from jack.core import TensorPort, Ports
 4 | 
 5 | 
 6 | class AssertionMRPorts:
 7 |     # When feeding embeddings directly
 8 |     question_length = Ports.Input.question_length
 9 |     support_length = Ports.Input.support_length
10 | 
11 |     # but also ids, for char-based embeddings
12 |     question = Ports.Input.question
13 |     support = Ports.Input.support
14 | 
15 |     word_char_length = TensorPort(np.int32, [None], "word_char_length", "words length", "[U]")
16 | 
17 |     token_char_offsets = TensorPort(np.int32, [None, None], "token_char_offsets",
18 |                                     "Character offsets of tokens in support.", "[S, support_length]")
19 | 
20 |     keep_prob = Ports.keep_prob
21 |     is_eval = Ports.is_eval
22 | 
23 |     word_embeddings = TensorPort(np.float32, [None, None], "word_embeddings",
24 |                                  "Embeddings only for words occuring in batch.", "[None, N]")
25 | 
26 |     assertion_lengths = TensorPort(np.int32, [None], "assertion_lengths", "Length of assertion.", "[R]")
27 | 
28 |     assertions = TensorPort(np.int32, [None, None], "assertions",
29 |                             "Represents batch dependent assertion word ids.",
30 |                             "[R, L]")
31 |     assertion2question = TensorPort(np.int32, [None], "assertion2question", "Question idx per assertion", "[R]")
32 | 
33 |     word2lemma = TensorPort(np.int32, [None], "word2lemma", "Lemma idx per word", "[U]")
34 | 
35 |     word_chars = TensorPort(np.int32, [None, None], "word_chars", "Represents words as sequence of chars",
36 |                             "[U, max_num_chars]")
37 | 
38 |     question_arg_span = TensorPort(np.int32, [None, 2], "question_arg_span",
39 |                                    "span of an argument in the question", "[Q, 2]")
40 | 
41 |     support_arg_span = TensorPort(np.int32, [None, 2], "support_arg_span",
42 |                                   "span of an argument in the suppoort", "[S, 2]")
43 | 
44 |     assertion2question_arg_span = TensorPort(np.int32, [None], "assertion2question_arg_span",
45 |                                              "assertion to question span mapping", "[A]")
46 |     assertion2support_arg_span = TensorPort(np.int32, [None], "assertion2support_arg_span",
47 |                                             "assertion to support span mapping", "[A]")
48 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | # Configuration of py.test
 2 | [pytest]
 3 | addopts=-v --forked --numprocesses=auto
 4 |         -n 4
 5 | 
 6 | # Do not run tests in the build folder
 7 | norecursedirs = docs *.egg-info .git appdir .tox
 8 | 
 9 | # PEP-8 The following are ignored:
10 | # E501 line too long (82 > 79 characters)
11 | # E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path
12 | # E731 do not assign a lambda expression, use a def
13 | 
14 | pep8ignore=* E501 \
15 |            * E402 \
16 |            * E731 \
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | jsonschema
 2 | numpy
 3 | parse
 4 | scipy
 5 | sklearn
 6 | typing
 7 | sacred==0.7.2
 8 | sqlalchemy
 9 | pyyaml
10 | progressbar2
11 | spacy==1.9
12 | diskcache
13 | pytest
14 | pytest-runner
15 | pytest-xdist
16 | pytest-pep8
17 | pytest-xdist
18 | pytest-cov
19 | codecov
20 | diskcache
21 | progressbar
22 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from setuptools import find_packages
 4 | from setuptools import setup
 5 | from setuptools.command.develop import develop as _develop
 6 | from setuptools.command.install import install as _install
 7 | 
 8 | 
 9 | def spacy_download_en():
10 |     import spacy
11 |     try:
12 |         spacy.load('en')
13 |     except:
14 |         import subprocess
15 |         args = ['python3 -m spacy download en']
16 |         subprocess.call(args, shell=True)
17 | 
18 | 
19 | class Install(_install):
20 |     def run(self):
21 |         _install.do_egg_install(self)
22 |         spacy_download_en()
23 |         _install.run(self)
24 | 
25 | 
26 | class Develop(_develop):
27 |     def run(self):
28 |         spacy_download_en()
29 |         _develop.run(self)
30 | 
31 | 
32 | with open('requirements.txt', 'r') as f:
33 |     install_requires = [l for l in f.readlines() if not l.startswith('http://')]
34 | 
35 | extras_require = {
36 |     'tf': ['tensorflow==1.8.0'],
37 |     'tf_gpu': ['tensorflow-gpu==1.8.0'],
38 |     'torch': ['torch']
39 | }
40 | 
41 | with open("README.md", "r+", encoding="utf-8") as f:
42 |     long_description = f.read()
43 | 
44 | setup(name='uclmr-jack',
45 |       version='0.2.1',
46 |       description='Jack the Reader is a Python framework for Machine Reading',
47 |       long_description=long_description,
48 |       long_description_content_type="text/markdown",
49 |       author='UCL Machine Reading',
50 |       author_email='s.riedel@cs.ucl.ac.uk',
51 |       url='https://github.com/uclmr/jack',
52 |       test_suite='tests',
53 |       license='MIT',
54 |       packages=find_packages(),
55 |       cmdclass={
56 |           'install': Install,
57 |           'develop': Develop
58 |       },
59 |       install_requires=install_requires,
60 |       extras_require=extras_require,
61 |       setup_requires=install_requires,
62 |       tests_require=install_requires,
63 |       classifiers=[
64 |           'Development Status :: 4 - Beta',
65 |           'Intended Audience :: Developers',
66 |           'Intended Audience :: Education',
67 |           'Intended Audience :: Science/Research',
68 |           'License :: OSI Approved :: MIT License',
69 |           'Programming Language :: Python :: 3',
70 |           'Programming Language :: Python :: 3.6',
71 |           'Topic :: Software Development :: Libraries',
72 |           'Topic :: Software Development :: Libraries :: Python Modules',
73 |           'Topic :: Scientific/Engineering :: Artificial Intelligence',
74 |           'Operating System :: OS Independent'
75 |       ],
76 |       keywords='tensorflow machine learning natural language processing question answering')
77 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | def pytest_collection_modifyitems(items):
 5 |     for item in items:
 6 |         if "sentihood" in item.nodeid:
 7 |             item.add_marker(pytest.mark.sentihood)
 8 |         elif "SNLI" in item.nodeid:
 9 |             item.add_marker(pytest.mark.SNLI)
10 | 
11 |         if "overfit" in item.nodeid:
12 |             item.add_marker(pytest.mark.overfit)
13 |         elif "smalldata" in item.nodeid:
14 |             item.add_marker(pytest.mark.smalldata)
15 |         elif "readme" in item.nodeid:
16 |             item.add_marker(pytest.mark.readme)
17 | 


--------------------------------------------------------------------------------
/tests/jack/debug/test_debug.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | 


--------------------------------------------------------------------------------
/tests/jack/eval/test_kbp_eval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.eval.link_prediction import compute_ranks
 4 | 
 5 | triple_to_score_map = {
 6 |     ('a', 'p', 'a'): 1,
 7 |     ('a', 'p', 'b'): 2,
 8 |     ('a', 'p', 'c'): 3,
 9 |     ('a', 'p', 'd'): 4
10 | }
11 | 
12 | triples = sorted(triple for triple, _ in triple_to_score_map.items())
13 | entity_set = {s for (s, _, _) in triples} | {o for (_, _, o) in triples}
14 | 
15 | 
16 | def scoring_function(triples):
17 |     return [triple_to_score_map.get(triple, 0) for triple in triples]
18 | 
19 | 
20 | def test_kbp_eval():
21 |     ranks, f_ranks = compute_ranks(scoring_function=scoring_function, triples=triples, entity_set=entity_set)
22 | 
23 |     ranks_l, ranks_r = ranks
24 |     f_ranks_l, f_ranks_r = f_ranks
25 | 
26 |     assert ranks_l == [1, 1, 1, 1]
27 |     assert ranks_r == [4, 3, 2, 1]
28 | 
29 |     assert f_ranks_l == ranks_l
30 |     assert f_ranks_r == ranks_r
31 | 


--------------------------------------------------------------------------------
/tests/jack/preprocess/test_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.util import batch
 4 | 
 5 | 
 6 | def test_get_buckets():
 7 |     data = {
 8 |         'data0': [i * [i] for i in range(1, 10)],
 9 |         'data1': [i * [i] for i in range(3, 12)]
10 |     }
11 | 
12 |     buckets2ids, ids2buckets = batch.get_buckets(data=data,
13 |                                                  order=('data0', 'data1'),
14 |                                                  structure=(2, 2))
15 | 
16 |     assert buckets2ids == {
17 |         '(1, 0)': [5, 6],
18 |         '(1, 1)': [7, 8],
19 |         '(0, 0)': [0, 1, 2],
20 |         '(0, 1)': [3, 4]
21 |     }
22 |     assert ids2buckets == {
23 |         0: '(0, 0)',
24 |         1: '(0, 0)',
25 |         2: '(0, 0)',
26 |         3: '(0, 1)',
27 |         4: '(0, 1)',
28 |         5: '(1, 0)',
29 |         6: '(1, 0)',
30 |         7: '(1, 1)',
31 |         8: '(1, 1)'
32 |     }
33 | 
34 | 
35 | def test_get_batches():
36 |     data = {
37 |         'data0': [[i] * 2 for i in range(10)],
38 |         'data1': [[i] * 3 for i in range(10)]
39 |     }
40 | 
41 |     batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=True)
42 |     batches = list(batch_generator)
43 | 
44 |     assert batches[0]['data0'].shape == batches[1]['data0'].shape == batches[2]['data0'].shape == (3, 2)
45 |     assert batches[0]['data1'].shape == batches[1]['data1'].shape == batches[2]['data1'].shape == (3, 3)
46 | 
47 |     assert batches[3]['data0'].shape == (1, 2)
48 |     assert batches[3]['data1'].shape == (1, 3)
49 | 
50 |     assert len(batches) == 4
51 | 
52 |     batch_generator = batch.get_batches(data, batch_size=3, exact_epoch=False)
53 |     batches = list(batch_generator)
54 | 
55 |     assert len(batches) == 3
56 | 


--------------------------------------------------------------------------------
/tests/jack/preprocess/test_map.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | from jack.util import map
 6 | from jack.util import preprocessing
 7 | 
 8 | text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et ' \
 9 |        'dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \
10 |        'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat ' \
11 |        'nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit ' \
12 |        'anim id est laborum.'
13 | 
14 | tokenized_text = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', ',', 'consectetur', 'adipiscing', 'elit', ',', 'sed',
15 |                   'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', 'magna', 'aliqua', '.',
16 |                   'Ut', 'enim', 'ad', 'minim', 'veniam', ',', 'quis', 'nostrud', 'exercitation', 'ullamco',
17 |                   'laboris', 'nisi', 'ut', 'aliquip', 'ex', 'ea', 'commodo', 'consequat', '.', 'Duis', 'aute',
18 |                   'irure', 'dolor', 'in', 'reprehenderit', 'in', 'voluptate', 'velit', 'esse', 'cillum', 'dolore',
19 |                   'eu', 'fugiat', 'nulla', 'pariatur', '.', 'Excepteur', 'sint', 'occaecat', 'cupidatat', 'non',
20 |                   'proident', ',', 'sunt', 'in', 'culpa', 'qui', 'officia', 'deserunt', 'mollit', 'anim', 'id',
21 |                   'est', 'laborum', '.']
22 | 
23 | 
24 | def test_tokenize():
25 |     assert preprocessing.tokenize(text) == tokenized_text
26 |     question_text = "where is the cat?"
27 |     desired_tokenised_question = ["where","is","the","cat","?"]
28 |     assert preprocessing.tokenize(question_text) == desired_tokenised_question
29 | 
30 | 
31 | def test_get_list_shape():
32 |     data = [[1, 2, 3], [4, 5]]
33 |     assert map.get_list_shape(data) == [2, 3]
34 | 
35 |     data = [[[1, 2, 3]], [[4, 5], [6, 7]]]
36 |     assert map.get_list_shape(data) == [2, 2, 3]
37 | 
38 | 
39 | def test_numpify():
40 |     def _fillna(xs):
41 |         data = np.array(xs)
42 |         lens = np.array([len(i) for i in data])
43 |         mask = np.arange(lens.max()) < lens[:, None]
44 |         out = np.zeros(mask.shape, dtype=data.dtype)
45 |         out[mask] = np.concatenate(data)
46 |         return out
47 | 
48 |     data = [[1, 2, 3], [4, 5], [6, 7, 8]]
49 |     data_np = map.numpify(data)
50 | 
51 |     for a, b in zip([np.array(x) for x in data], data_np):
52 |         assert (a == b).all()
53 | 
54 |     data = {0: [[1, 2, 3]], 1: [[4, 5], [6, 7, 8]], 2: [[6, 7, 8]]}
55 |     data_np = map.numpify(data)
56 | 
57 |     for ak, bk in zip(data.keys(), data_np.keys()):
58 |         a, b = data[ak], data_np[bk]
59 |         assert (_fillna(a) == b).all()
60 | 


--------------------------------------------------------------------------------
/tests/jack/preprocess/test_vocab_prune.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from pprint import pprint
 4 | 
 5 | from jack.core import QASetting
 6 | from jack.util import preprocessing
 7 | 
 8 | 
 9 | def test_vocab():
10 |     train_data = [
11 |         QASetting(question='A person is training his horse for a competition.',
12 |                   support=['A person on a horse jumps over a broken down airplane.'],
13 |                   candidates=['entailment', 'neutral', 'contradiction'])
14 |     ]
15 | 
16 |     print('build vocab based on train data')
17 |     train_vocab = preprocessing.fill_vocab(train_data)
18 |     train_vocab.freeze()
19 |     pprint(train_vocab._sym2freqs)
20 |     pprint(train_vocab._sym2id)
21 | 
22 |     MIN_VOCAB_FREQ, MAX_VOCAB_CNT = 2, 10
23 |     train_vocab = train_vocab.prune(MIN_VOCAB_FREQ, MAX_VOCAB_CNT)
24 | 
25 |     pprint(train_vocab._sym2freqs)
26 |     pprint(train_vocab._sym2id)
27 | 
28 |     print('encode train data')
29 |     train_data = preprocessing.nlp_preprocess(train_data[0].question, train_vocab)[0]
30 |     print(train_data)
31 | 


--------------------------------------------------------------------------------
/tests/jack/readers/extractive_qa/test_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.core import QASetting, Answer
 4 | from jack.readers.extractive_qa.util import prepare_data
 5 | from jack.util.vocab import Vocab
 6 | 
 7 | qa_setting = QASetting(question="What is the answer?",
 8 |                        support=["It is not A.", "It is B."])
 9 | answers = [Answer(text="B", span=(6, 7), doc_idx=1)]
10 | 
11 | 
12 | def test_prepare_data():
13 | 
14 |     result = prepare_data(qa_setting, answers, Vocab(),
15 |                           with_answers=True)
16 | 
17 |     question_tokens, question_ids, question_lemmas, question_length, \
18 |     support_tokens, support_ids, support_lemmas, support_length, \
19 |     word_in_question, token_offsets, answer_spans = result
20 | 
21 |     assert question_tokens == ['What', 'is', 'the', 'answer', '?']
22 |     assert question_ids == [1, 2, 3, 4, 5]
23 |     assert question_lemmas is None
24 |     assert question_length == 5
25 | 
26 |     assert support_tokens == [['It', 'is', 'not', 'A', '.', ], ['It', 'is', 'B', '.']]
27 |     assert support_ids == [[6, 2, 7, 8, 9], [6, 2, 10, 9]]
28 |     assert support_lemmas == [None, None]
29 |     assert support_length == [5, 4]
30 |     assert word_in_question == [[0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]]
31 |     assert token_offsets == [[0, 3, 6, 10, 11], [0, 3, 6, 7]]
32 |     assert answer_spans == [[], [(2, 2)]]
33 | 


--------------------------------------------------------------------------------
/tests/jack/readers/multiple_choice/test_simple_mcqa.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from jack.readers.classification.shared import *
 4 | 
 5 | from jack.util.vocab import Vocab
 6 | 
 7 | 
 8 | def test_single_support_fixed_class_inputs():
 9 |     import logging
10 |     logging.basicConfig(level=logging.INFO)
11 |     data_set = [
12 |         (QASetting("Where is the cat?", ["the cat is on the mat."]), [Answer("mat")])
13 |     ]
14 |     shared_resources = SharedResources(Vocab(), {})
15 |     input_module = ClassificationSingleSupportInputModule(shared_resources)
16 |     input_module.setup_from_data(data_set)
17 |     input_module.setup()
18 | 
19 |     assert len(input_module.shared_resources.answer_vocab) == 1
20 |     assert len(input_module.shared_resources.vocab) == 9
21 | 
22 |     tensor_data_set = list(input_module.batch_generator(data_set, batch_size=3, is_eval=False))
23 | 
24 |     expected_support = ["the", "cat", "is", "on", "the", "mat", "."]
25 |     expected_support_ids = [[shared_resources.vocab.get_id(sym) for sym in expected_support]]
26 |     first_instance = tensor_data_set[0]
27 |     actual_support_ids = first_instance[Ports.Input.support]
28 |     assert np.array_equal(actual_support_ids, expected_support_ids)
29 |     assert first_instance[Ports.Input.support_length][0] == len(expected_support)
30 | 
31 |     actual_answer_ids = first_instance[Ports.Target.target_index]
32 |     expected_answer = [input_module.shared_resources.answer_vocab.get_id("mat")]
33 |     assert np.array_equal(actual_answer_ids, expected_answer)
34 | 
35 |     actual_question_ids = first_instance[Ports.Input.question]
36 |     expected_question = ["where", "is", "the", "cat", "?"]
37 |     expected_question_ids = [[shared_resources.vocab.get_id(sym) for sym in expected_question]]
38 |     assert np.array_equal(actual_question_ids, expected_question_ids)
39 |     assert first_instance[Ports.Input.question_length][0] == len(expected_question)
40 | 


--------------------------------------------------------------------------------
/tests/jack/readers/test_fastqa.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | import jack.readers as readers
 7 | from jack.core import SharedResources
 8 | from jack.io.embeddings.embeddings import Embeddings
 9 | from jack.io.load import load_jack
10 | from jack.readers.extractive_qa.util import tokenize
11 | from jack.util.vocab import Vocab
12 | 
13 | 
14 | def test_fastqa():
15 |     tf.reset_default_graph()
16 | 
17 |     data = load_jack('tests/test_data/squad/snippet_jtr.json')
18 |     questions = []
19 |     # fast qa must be initialized with existing embeddings, so we create some
20 |     vocab = dict()
21 |     for question, _ in data:
22 |         questions.append(question)
23 |         for t in tokenize(question.question):
24 |             if t not in vocab:
25 |                 vocab[t] = len(vocab)
26 |     embeddings = Embeddings(vocab, np.random.random([len(vocab), 10]))
27 | 
28 |     # we need a vocabulary (with embeddings for our fastqa_reader, but this is not always necessary)
29 |     vocab = Vocab(vocab=vocab)
30 | 
31 |     # ... and a config
32 |     config = {"batch_size": 1, "repr_dim": 10, "with_char_embeddings": True}
33 | 
34 |     # create/setup reader
35 |     shared_resources = SharedResources(vocab, config, embeddings)
36 |     fastqa_reader = readers.fastqa_reader(shared_resources)
37 |     fastqa_reader.setup_from_data(data)
38 | 
39 |     answers = fastqa_reader(questions)
40 | 
41 |     assert answers, "FastQA reader should produce answers"
42 | 


--------------------------------------------------------------------------------
/tests/jack/readers/test_fastqa_loop.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | from jack.core import SharedResources
 6 | from jack.core.tensorflow import TFReader
 7 | from jack.core.tensorport import Ports
 8 | from jack.io.embeddings.embeddings import load_embeddings
 9 | from jack.io.load import load_jack
10 | from jack.readers.extractive_qa.shared import XQAInputModule, XQAOutputModule
11 | from jack.readers.extractive_qa.tensorflow.fastqa import FastQAModule
12 | from jack.util.vocab import Vocab
13 | 
14 | 
15 | def test_fastqa():
16 |     tf.reset_default_graph()
17 | 
18 |     data = load_jack('tests/test_data/squad/snippet_jtr.json')
19 | 
20 |     # fast qa must be initialized with existing embeddings, so we create some
21 |     embeddings = load_embeddings('./tests/test_data/glove.840B.300d_top256.txt', 'glove')
22 | 
23 |     # we need a vocabulary (with embeddings for our fastqa_reader, but this is not always necessary)
24 |     vocab = Vocab(vocab=embeddings.vocabulary)
25 | 
26 |     # ... and a config
27 |     config = {
28 |         "batch_size": 1,
29 |         "repr_dim": 10,
30 |         "with_char_embeddings": True
31 |     }
32 | 
33 |     # create/setup reader
34 |     shared_resources = SharedResources(vocab, config, embeddings)
35 | 
36 |     input_module = XQAInputModule(shared_resources)
37 |     model_module = FastQAModule(shared_resources)
38 |     output_module = XQAOutputModule()
39 | 
40 |     reader = TFReader(shared_resources, input_module, model_module, output_module)
41 |     reader.setup_from_data(data, is_training=True)
42 | 
43 |     loss = reader.model_module.tensors[Ports.loss]
44 |     optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
45 |     min_op = optimizer.minimize(loss)
46 | 
47 |     session = model_module.tf_session
48 |     session.run(tf.global_variables_initializer())
49 | 
50 |     for epoch in range(0, 10):
51 |         for batch in reader.input_module.batch_generator(data, 1, False):
52 |             feed_dict = reader.model_module.convert_to_feed_dict(batch)
53 |             loss_value, _ = session.run((loss, min_op), feed_dict=feed_dict)
54 |             print(loss_value)
55 | 


--------------------------------------------------------------------------------
/tests/jack/readers/test_kbp.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | import jack.readers as readers
 6 | from jack.io.load import loaders
 7 | 
 8 | 
 9 | def test_kbp():
10 |     data = loaders['jack']('tests/test_data/WN18/wn18-snippet.jack.json')
11 |     questions = [question for question, _ in data]
12 | 
13 |     for model_name in ['transe', 'distmult', 'complex']:
14 | 
15 |         with tf.variable_scope(model_name):
16 |             config = {
17 |                 'batch_size': 1,
18 |                 'repr_dim': 10
19 |             }
20 | 
21 |             reader = readers.readers['{}_reader'.format(model_name)](config)
22 |             reader.setup_from_data(data)
23 | 
24 |             answers = reader(questions)
25 | 
26 |             assert len(answers) == 5000
27 | 
28 |             assert answers, 'KBP reader should produce answers'
29 | 


--------------------------------------------------------------------------------
/tests/jack/readers/test_readers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Smoke test: train all readers for one iteration & run inference."""
 4 | 
 5 | from functools import partial
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | from jack import readers
11 | from jack.core.data_structures import QASetting, Answer
12 | from jack.core.shared_resources import SharedResources
13 | from jack.core.tensorflow import TFReader
14 | from jack.io.embeddings import Embeddings
15 | from jack.readers.extractive_qa.util import tokenize
16 | from jack.util.vocab import Vocab
17 | 
18 | 
19 | def teardown_function(_):
20 |     tf.reset_default_graph()
21 | 
22 | 
23 | def build_vocab(questions):
24 |     """Since some readers require an initialized vocabulary, initialize it here."""
25 | 
26 |     vocab = dict()
27 |     for question in questions:
28 |         for t in tokenize(question.question):
29 |             if t not in vocab:
30 |                 vocab[t] = len(vocab)
31 |     embeddings = Embeddings(vocab, np.random.random([len(vocab), 10]))
32 | 
33 |     vocab = Vocab(vocab=embeddings.vocabulary)
34 |     return vocab, embeddings
35 | 
36 | 
37 | def smoke_test(reader_name):
38 |     """Instantiate the reader, train for one epoch, and run inference."""
39 | 
40 |     data_set = [
41 |         (QASetting(
42 |             question="Which is it?",
43 |             support=["While b seems plausible, answer a is correct."],
44 |             id="1",
45 |             candidates=["a", "b", "c"]),
46 |          [Answer("a", (6, 6))])
47 |     ]
48 |     questions = [q for q, _ in data_set]
49 |     v, e = build_vocab(questions)
50 |     shared_resources = SharedResources(v, {"repr_dim": 10, "dropout": 0.5}, e)
51 |     tf.reset_default_graph()
52 |     reader = readers.readers[reader_name](shared_resources)
53 |     if isinstance(reader, TFReader):
54 |         reader.train(tf.train.AdamOptimizer(), data_set, batch_size=1, max_epochs=1)
55 |     else:
56 |         import torch
57 |         reader.setup_from_data(data_set, is_training=True)
58 |         params = list(reader.model_module.prediction_module.parameters())
59 |         params.extend(reader.model_module.loss_module.parameters())
60 |         optimizer = torch.optim.Adam(params, lr=0.01)
61 |         reader.train(optimizer, data_set, batch_size=1, max_epochs=1)
62 | 
63 |     answers = reader(questions)
64 | 
65 |     assert answers, "{} should produce answers".format(reader_name)
66 | 
67 | 
68 | BLACKLIST = ['fastqa_reader_torch', 'modular_qa_reader', 'modular_nli_reader']
69 | READERS = [r for r in readers.readers.keys()
70 |            if r not in BLACKLIST]
71 | 
72 | # Dynamically generate one test for each reader
73 | current_module = __import__(__name__)
74 | 
75 | for reader_name in READERS:
76 |     setattr(current_module, "test_{}".format(reader_name), partial(smoke_test, reader_name))
77 | 


--------------------------------------------------------------------------------
/tests/jack/readers/test_serialization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tempfile
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | from jack.io.embeddings import load_embeddings
 8 | from jack.io.load import loaders
 9 | from jack.readers.implementations import *
10 | from jack.util.vocab import Vocab
11 | 
12 | 
13 | def test_serialization():
14 |     all_readers = [
15 |         fastqa_reader,
16 |         modular_qa_reader,
17 |         # fastqa_reader_torch,
18 |         dam_snli_reader,
19 |         cbilstm_nli_reader,
20 |         modular_nli_reader,
21 |         distmult_reader,
22 |         complex_reader,
23 |         transe_reader,
24 |     ]
25 | 
26 |     for reader in all_readers:
27 |         vocab, config = {}, {}
28 | 
29 |         data = None
30 |         if reader in {distmult_reader, complex_reader, transe_reader}:
31 |             data = loaders['jack']('tests/test_data/WN18/wn18-snippet.jack.json')
32 |             config['repr_dim'] = 50
33 |         elif reader in {cbilstm_nli_reader, dam_snli_reader}:
34 |             data = loaders['snli']('tests/test_data/SNLI/1000_samples_snli_1.0_train.jsonl')
35 | 
36 |             embeddings = load_embeddings("data/GloVe/glove.the.50d.txt", 'glove')
37 |             vocab = Vocab(vocab=embeddings.vocabulary)
38 |             config['repr_dim'] = 50
39 |         elif reader in {fastqa_reader}:
40 |             data = loaders['squad']('data/SQuAD/snippet.json')
41 | 
42 |             embeddings = load_embeddings("data/GloVe/glove.the.50d.txt", 'glove')
43 |             vocab = Vocab(vocab=embeddings.vocabulary)
44 |             config['repr_dim'] = 50
45 | 
46 |         if data is not None:
47 |             tf.reset_default_graph()
48 | 
49 |             shared_resources = SharedResources(vocab, config, embeddings)
50 |             reader_instance = reader(shared_resources)
51 |             reader_instance.setup_from_data(data)
52 | 
53 |             temp_dir_path = tempfile.mkdtemp()
54 |             reader_instance.store(temp_dir_path)
55 | 
56 |             reader_instance.load(temp_dir_path)
57 | 
58 |             assert reader_instance is not None
59 | 


--------------------------------------------------------------------------------
/tests/jack/test_core.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | 
 5 | from jack.core import SharedResources
 6 | from jack.io.embeddings import load_embeddings
 7 | from jack.util.vocab import Vocab
 8 | 
 9 | 
10 | def test_shared_resources_store():
11 |     embeddings_file = "data/GloVe/glove.the.50d.txt"
12 |     embeddings = load_embeddings(embeddings_file, 'glove')
13 |     config = {
14 |         "embedding_file": embeddings_file,
15 |         "embedding_format": "glove"
16 |     }
17 |     some_vocab = Vocab(vocab=embeddings.vocabulary)
18 |     some_vocab('foo')
19 |     shared_resources = SharedResources(some_vocab, config, embeddings)
20 | 
21 |     import tempfile
22 |     with tempfile.TemporaryDirectory() as tmp_dir:
23 |         path = tmp_dir + "_resources"
24 |         shared_resources.store(path)
25 | 
26 |         new_shared_resources = SharedResources()
27 |         new_shared_resources.load(path)
28 | 
29 |         type_a, type_b = type(new_shared_resources.vocab), type(shared_resources.vocab)
30 |         assert type_a == type_b
31 | 
32 |         for k in new_shared_resources.vocab.__dict__:
33 |             assert new_shared_resources.vocab.__dict__[k] == shared_resources.vocab.__dict__[k]
34 |         assert new_shared_resources.config == shared_resources.config
35 |         assert new_shared_resources.embeddings.lookup.shape == embeddings.lookup.shape
36 |         assert np.array_equal(new_shared_resources.embeddings.get(b"the"), embeddings.get(b"the"))
37 | 


--------------------------------------------------------------------------------
/tests/jack/test_embeddings.py:
--------------------------------------------------------------------------------
 1 | from jack.io.embeddings import load_embeddings
 2 | import numpy as np
 3 | 
 4 | 
 5 | def test_memory_map_dir():
 6 |     import tempfile
 7 |     from jack.io.embeddings.memory_map import save_as_memory_map_dir, load_memory_map_dir
 8 |     embeddings_file = "data/GloVe/glove.the.50d.txt"
 9 |     embeddings = load_embeddings(embeddings_file, 'glove')
10 |     with tempfile.TemporaryDirectory() as tmp_dir:
11 |         mem_map_dir = tmp_dir + "/glove.the.50d.memmap"
12 |         save_as_memory_map_dir(mem_map_dir, embeddings)
13 |         loaded_embeddings = load_memory_map_dir(mem_map_dir)
14 |         assert loaded_embeddings.shape == embeddings.shape
15 |         assert len(loaded_embeddings.vocabulary) == 1
16 |         assert loaded_embeddings.vocabulary["the"] == 0
17 |         assert "foo" not in loaded_embeddings.vocabulary
18 |         assert np.isclose(loaded_embeddings.get("the"), embeddings.get("the"), 1.e-5).all()
19 | 


--------------------------------------------------------------------------------
/tests/test_conf/dam_test.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   A configuration inheriting from the default jack.yaml
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | name: "dam_nli"
 7 | 
 8 | seed: 1337
 9 | 
10 | # where to store the reader
11 | save_dir: './tests/test_results/dam_reader_test'
12 | 
13 | # jack training file
14 | train: 'tests/test_data/SNLI/train.json'
15 | 
16 | # jack dev file
17 | dev: 'tests/test_data/SNLI/dev.json'
18 | 
19 | # jack test file
20 | test: 'tests/test_data/SNLI/test.json'
21 | 
22 | # Reading model to use
23 | reader: 'dam_snli_reader'
24 | 
25 | repr_dim: 50
26 | 
27 | repr_dim_task_embedding: 50
28 | 
29 | max_epochs: 10
30 | 
31 | dropout: 0
32 | 
33 | batch_size: 64
34 | 


--------------------------------------------------------------------------------
/tests/test_conf/fastqa_test.yaml:
--------------------------------------------------------------------------------
 1 | description: >
 2 |   A configuration inheriting from the default jack.yaml
 3 | 
 4 | parent_config: './conf/jack.yaml'
 5 | 
 6 | name: "fastqa_reader"
 7 | 
 8 | seed: 1337
 9 | 
10 | # where to store the reader
11 | save_dir: './tests/test_results/fastqa_reader_test'
12 | 
13 | # How large the support should be. Can be used for cutting or filtering QA examples
14 | max_support_length: -1
15 | 
16 | # Use also character based embeddings in readers which support it
17 | with_char_embeddings: True
18 | 
19 | # jack training file
20 | train: 'data/SQuAD/snippet.jtr.json'
21 | 
22 | # jack dev file
23 | dev: 'data/SQuAD/snippet.jtr.json'
24 | 
25 | # jack test file
26 | test: null
27 | 
28 | # Reading model to use
29 | reader: 'fastqa_reader'
30 | 
31 | # [word2vec] or [glove] format of embeddings to be loaded
32 | embedding_format: 'glove'
33 | 
34 | # format of embeddings to be loaded
35 | embedding_file: 'tests/test_data/glove.840B.300d_top256.txt'
36 | 
37 | # Use fixed vocab of pretrained embeddings
38 | vocab_from_embeddings: True
39 | 
40 | repr_dim: 300
41 | 
42 | max_epochs: 20
43 | 
44 | dropout: 0
45 | 
46 | batch_size: 64
47 | 


--------------------------------------------------------------------------------
/tests/test_conf/snli_small_adagrad_test.yaml:
--------------------------------------------------------------------------------
 1 | parent_config: 'conf/jack.yaml'
 2 | 
 3 | reader: 'dam_snli_reader'
 4 | 
 5 | seed: 1337
 6 | train: 'data/SNLI/snippet.jtr_v1.json'
 7 | dev: 'data/SNLI/snippet.jtr_v1.json'
 8 | test: 'data/SNLI/snippet.jtr_v1.json'
 9 | 
10 | epochs: 20
11 | optimizer: 'adagrad'
12 | learning_rate: 0.001
13 | batch_size: 32
14 | learning_rate_decay: 0.99
15 | l2: 0.0
16 | dev_batch_size: 32
17 | 


--------------------------------------------------------------------------------
/tests/test_data/MultiNLI/2000_samples_train_jtr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [],
 3 |   "globals": {
 4 |     "candidates": [
 5 |       {
 6 |         "text": "entailment"
 7 |       },
 8 |       {
 9 |         "text": "neutral"
10 |       },
11 |       {
12 |         "text": "contradiction"
13 |       }
14 |     ]
15 |   },
16 |   "meta": "MultiSNLI"
17 | }


--------------------------------------------------------------------------------
/tests/test_data/MultiNLI/overfit.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [],
 3 |   "globals": {
 4 |     "candidates": [
 5 |       {
 6 |         "text": "entailment"
 7 |       },
 8 |       {
 9 |         "text": "neutral"
10 |       },
11 |       {
12 |         "text": "contradiction"
13 |       }
14 |     ]
15 |   },
16 |   "meta": "MultiSNLI"
17 | }


--------------------------------------------------------------------------------
/tests/test_data/wiki.json:
--------------------------------------------------------------------------------
1 | ["Who is this?", "Born and raised in a Hindu merchant caste family in coastal Gujarat, western India, and trained in law at the Inner Temple, London, Gandhi first employed nonviolent civil disobedience as an expatriate lawyer in South Africa, in the resident Indian community's struggle for civil rights. After his return to India in 1915, he set about organising peasants, farmers, and urban labourers to protest against excessive land-tax and discrimination. Assuming leadership of the Indian National Congress in 1921, Gandhi led nationwide campaigns for easing poverty, expanding women's rights, building religious and ethnic amity, ending untouchability, but above all for achieving Swaraj or self-rule.\n", "Gandhi"]
2 | ["Who is this?", "Near the beginning of his career, Einstein thought that Newtonian mechanics was no longer enough to reconcile the laws of classical mechanics with the laws of the electromagnetic field. This led him to develop his special theory of relativity. He realized, however, that the principle of relativity could also be extended to gravitational fields, and with his subsequent theory of gravitation in 1916, he published a paper on general relativity. He continued to deal with problems of statistical mechanics and quantum theory, which led to his explanations of particle theory and the motion of molecules. He also investigated the thermal properties of light which laid the foundation of the photon theory of light. In 1917, Einstein applied the general theory of relativity to model the large-scale structure of the universe.\n", "Albert Einstein"]
3 | ["Who is this?", "He was a pioneer of the application of operator theory to quantum mechanics, in the development of functional analysis, and a key figure in the development of game theory and the concepts of cellular automata, the universal constructor and the digital computer. He published over 150 papers in his life: about 60 in pure mathematics, 20 in physics, and 60 in applied mathematics, the remainder being on special mathematical subjects or non-mathematical ones. His last work, an unfinished manuscript written while in the hospital, was later published in book form as The Computer and the Brain.", "John von Neumann"]
4 | 


--------------------------------------------------------------------------------
/tests/test_readme.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import subprocess
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | 
 8 | from jack import readers
 9 | from jack.core.data_structures import QASetting
10 | 
11 | 
12 | def test_readme_fastqa():
13 |     args = ['python3', './bin/jack-train.py', 'with', 'config=tests/test_conf/fastqa_test.yaml']
14 |     p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
15 |     out, err = p.communicate()
16 | 
17 |     tf.reset_default_graph()
18 | 
19 |     fastqa_reader = readers.fastqa_reader()
20 |     fastqa_reader.load_and_setup("tests/test_results/fastqa_reader_test")
21 | 
22 |     support = """"Architecturally, the school has a Catholic character.
23 |     Atop the Main Building's gold dome is a golden statue of the Virgin Mary.
24 |     Immediately in front of the Main Building and facing it, is a copper statue of
25 |     Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the
26 |     Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto,
27 |     a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes,
28 |     France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858.
29 |     At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome),
30 |     is a simple, modern stone statue of Mary."""
31 | 
32 |     answers = fastqa_reader([QASetting(
33 |         question="To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?",
34 |         support=[support]
35 |     )])
36 | 
37 |     assert answers[0][0].text is not None
38 | 
39 | 
40 | def test_readme_dam():
41 |     args = ['python3', './bin/jack-train.py', 'with', 'config=tests/test_conf/dam_test.yaml']
42 |     p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
43 |     out, err = p.communicate()
44 | 
45 |     tf.reset_default_graph()
46 | 
47 |     dam_reader = readers.dam_snli_reader()
48 |     dam_reader.load_and_setup("tests/test_results/dam_reader_test")
49 | 
50 |     atomic_candidates = ['entailment', 'neutral', 'contradiction']
51 |     answers = dam_reader([QASetting(
52 |         question="The boy plays with the ball.",
53 |         support=["The boy plays with the ball."],
54 |         candidates=atomic_candidates
55 |     )])
56 | 
57 |     assert answers[0] is not None
58 |     assert isinstance(answers[0][0].score, np.float32)
59 |     assert answers[0][0].text in atomic_candidates
60 | 


--------------------------------------------------------------------------------
/tests/test_results/dam_test/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model_module"
2 | all_model_checkpoint_paths: "model_module"
3 | 


--------------------------------------------------------------------------------
/tests/test_results/dam_test/model_module.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/model_module.data-00000-of-00001


--------------------------------------------------------------------------------
/tests/test_results/dam_test/model_module.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/model_module.index


--------------------------------------------------------------------------------
/tests/test_results/dam_test/model_module.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/model_module.meta


--------------------------------------------------------------------------------
/tests/test_results/dam_test/shared_resources/answer_vocab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/shared_resources/answer_vocab


--------------------------------------------------------------------------------
/tests/test_results/dam_test/shared_resources/config.yaml:
--------------------------------------------------------------------------------
 1 | {batch_size: 64, clip_value: 0.0, config: tests/test_conf/dam_test.yaml, debug: false,
 2 |   debug_examples: 10, description: 'A configuration inheriting from the default jack.yaml
 3 | 
 4 |     ', dev: tests/test_data/SNLI/dev.json, dev_batch_size: null, dropout: 0, embedding_file: null,
 5 |   embedding_format: null, epochs: 5, file_cache: false, l2: 0.0, learning_rate: 0.001,
 6 |   learning_rate_decay: 1.0, load_dir: null, loader: jack, log_interval: 100, lowercase: true,
 7 |   max_epochs: 10, max_num_support: null, min_learning_rate: 0.0001, name: dam_nli,
 8 |   num_dev_examples: null, num_train_examples: null, optimizer: adam, output_dir: ./out/,
 9 |   parent_config: ./conf/jack.yaml, reader: dam_snli_reader, repr_dim: 50, repr_dim_task_embedding: 50,
10 |   save_dir: ./tests/test_results/dam_reader_test, seed: 1337, tensorboard_folder: null,
11 |   test: tests/test_data/SNLI/test.json, train: tests/test_data/SNLI/train.json, validation_interval: null,
12 |   vocab_from_embeddings: false, with_char_embeddings: true, write_metrics_to: null}
13 | 


--------------------------------------------------------------------------------
/tests/test_results/dam_test/shared_resources/remainder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/shared_resources/remainder


--------------------------------------------------------------------------------
/tests/test_results/dam_test/shared_resources/vocab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/dam_test/shared_resources/vocab


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model_module"
2 | all_model_checkpoint_paths: "model_module"
3 | 


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/model_module.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/fastqa_test/model_module.data-00000-of-00001


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/model_module.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/fastqa_test/model_module.index


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/model_module.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/fastqa_test/model_module.meta


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/shared_resources/config.yaml:
--------------------------------------------------------------------------------
 1 | {batch_size: 64, clip_value: 0.0, config: tests/test_conf/fastqa_test.yaml, debug: false,
 2 |   debug_examples: 10, description: 'A configuration inheriting from the default jack.yaml
 3 | 
 4 |     ', dev: data/SQuAD/snippet.jtr.json, dev_batch_size: null, dropout: 0, embedding_file: tests/test_data/glove.840B.300d_top256.txt,
 5 |   embedding_format: glove, epochs: 5, file_cache: false, l2: 0.0, learning_rate: 0.001,
 6 |   learning_rate_decay: 1.0, load_dir: null, loader: jack, log_interval: 100, lowercase: true,
 7 |   max_epochs: 20, max_num_support: null, max_support_length: -1, min_learning_rate: 0.0001,
 8 |   name: fastqa_reader, num_dev_examples: null, num_train_examples: null, optimizer: adam,
 9 |   output_dir: ./out/, parent_config: ./conf/jack.yaml, reader: fastqa_reader, repr_dim: 300,
10 |   repr_dim_task_embedding: 0, save_dir: ./tests/test_results/fastqa_reader_test, seed: 1337,
11 |   tensorboard_folder: null, test: null, train: data/SQuAD/snippet.jtr.json, validation_interval: null,
12 |   vocab_from_embeddings: true, with_char_embeddings: true, write_metrics_to: null}
13 | 


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/shared_resources/embeddings/config.yaml:
--------------------------------------------------------------------------------
1 | {emb_format: glove, embedding_file: tests/test_data/glove.840B.300d_top256.txt}
2 | 


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/shared_resources/remainder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/fastqa_test/shared_resources/remainder


--------------------------------------------------------------------------------
/tests/test_results/fastqa_test/shared_resources/vocab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uclnlp/jack/9e5ffbd4fb2b0bd6b816fe6e14b9045ac776bb8e/tests/test_results/fastqa_test/shared_resources/vocab


--------------------------------------------------------------------------------
/tests/test_results/overfit_test/SNLI/dam/expected_results.txt:
--------------------------------------------------------------------------------
 1 | 2018-06-10 14:40:49.637297 ClassificationEvalHook_Accuracy 0.35
 2 | 2018-06-10 14:40:49.842983 ClassificationEvalHook_Accuracy 0.35
 3 | 2018-06-10 14:40:50.056031 ClassificationEvalHook_Accuracy 0.35
 4 | 2018-06-10 14:40:50.247261 ClassificationEvalHook_Accuracy 0.35
 5 | 2018-06-10 14:40:50.431764 ClassificationEvalHook_Accuracy 0.35
 6 | 2018-06-10 14:40:50.621095 ClassificationEvalHook_Accuracy 0.35
 7 | 2018-06-10 14:40:50.810825 ClassificationEvalHook_Accuracy 0.35
 8 | 2018-06-10 14:40:50.997291 ClassificationEvalHook_Accuracy 0.34
 9 | 2018-06-10 14:40:51.269345 ClassificationEvalHook_Accuracy 0.39
10 | 2018-06-10 14:40:51.475754 ClassificationEvalHook_Accuracy 0.36
11 | 2018-06-10 14:40:51.682479 ClassificationEvalHook_Accuracy 0.34
12 | 2018-06-10 14:40:51.868328 ClassificationEvalHook_Accuracy 0.37
13 | 2018-06-10 14:40:52.053837 ClassificationEvalHook_Accuracy 0.39
14 | 2018-06-10 14:40:52.244374 ClassificationEvalHook_Accuracy 0.4
15 | 2018-06-10 14:40:52.450705 ClassificationEvalHook_Accuracy 0.41
16 | 


--------------------------------------------------------------------------------
/tests/test_results/overfit_test/SNLI/esim/expected_results.txt:
--------------------------------------------------------------------------------
 1 | 2018-06-29 10:12:25.512333 ClassificationEvalHook_Accuracy 0.35
 2 | 2018-06-29 10:12:25.841608 ClassificationEvalHook_Accuracy 0.34
 3 | 2018-06-29 10:12:26.446310 ClassificationEvalHook_Accuracy 0.47
 4 | 2018-06-29 10:12:27.306957 ClassificationEvalHook_Accuracy 0.38
 5 | 2018-06-29 10:12:27.892721 ClassificationEvalHook_Accuracy 0.45
 6 | 2018-06-29 10:12:28.400163 ClassificationEvalHook_Accuracy 0.52
 7 | 2018-06-29 10:12:28.761276 ClassificationEvalHook_Accuracy 0.49
 8 | 2018-06-29 10:12:29.185469 ClassificationEvalHook_Accuracy 0.45
 9 | 2018-06-29 10:12:29.592369 ClassificationEvalHook_Accuracy 0.47
10 | 2018-06-29 10:12:29.962783 ClassificationEvalHook_Accuracy 0.5
11 | 2018-06-29 10:12:30.432859 ClassificationEvalHook_Accuracy 0.52
12 | 2018-06-29 10:12:30.866125 ClassificationEvalHook_Accuracy 0.61
13 | 2018-06-29 10:12:31.676597 ClassificationEvalHook_Accuracy 0.55
14 | 2018-06-29 10:12:32.151324 ClassificationEvalHook_Accuracy 0.62
15 | 2018-06-29 10:12:32.631887 ClassificationEvalHook_Accuracy 0.58


--------------------------------------------------------------------------------
/tests/test_results/overfit_test/squad/fastqa/expected_results.txt:
--------------------------------------------------------------------------------
 1 | 2018-04-18 15:12:57.105613 XQAEvalHook_exact 0.01852
 2 | 2018-04-18 15:12:57.105753 XQAEvalHook_f1 0.11447
 3 | 2018-04-18 15:12:58.238815 XQAEvalHook_exact 0.0463
 4 | 2018-04-18 15:12:58.238969 XQAEvalHook_f1 0.11974
 5 | 2018-04-18 15:12:59.478085 XQAEvalHook_exact 0.06481
 6 | 2018-04-18 15:12:59.478308 XQAEvalHook_f1 0.14797
 7 | 2018-04-18 15:13:00.865977 XQAEvalHook_exact 0.0463
 8 | 2018-04-18 15:13:00.866217 XQAEvalHook_f1 0.13511
 9 | 2018-04-18 15:13:02.067794 XQAEvalHook_exact 0.12963
10 | 2018-04-18 15:13:02.068049 XQAEvalHook_f1 0.19646
11 | 2018-04-18 15:13:03.339741 XQAEvalHook_exact 0.12037
12 | 2018-04-18 15:13:03.339996 XQAEvalHook_f1 0.2186
13 | 2018-04-18 15:13:04.727538 XQAEvalHook_exact 0.17593
14 | 2018-04-18 15:13:04.727785 XQAEvalHook_f1 0.298
15 | 2018-04-18 15:13:05.944528 XQAEvalHook_exact 0.13889
16 | 2018-04-18 15:13:05.944777 XQAEvalHook_f1 0.27456
17 | 2018-04-18 15:13:07.160099 XQAEvalHook_exact 0.24074
18 | 2018-04-18 15:13:07.160367 XQAEvalHook_f1 0.32948
19 | 2018-04-18 15:13:08.417764 XQAEvalHook_exact 0.25926
20 | 2018-04-18 15:13:08.418021 XQAEvalHook_f1 0.39285
21 | 2018-04-18 15:13:09.710954 XQAEvalHook_exact 0.2963
22 | 2018-04-18 15:13:09.711209 XQAEvalHook_f1 0.42949
23 | 2018-04-18 15:13:10.970876 XQAEvalHook_exact 0.33333
24 | 2018-04-18 15:13:10.971064 XQAEvalHook_f1 0.42985
25 | 2018-04-18 15:13:12.242883 XQAEvalHook_exact 0.33333
26 | 2018-04-18 15:13:12.243865 XQAEvalHook_f1 0.42266
27 | 2018-04-18 15:13:13.419180 XQAEvalHook_exact 0.40741
28 | 2018-04-18 15:13:13.419408 XQAEvalHook_f1 0.53319
29 | 2018-04-18 15:13:14.851027 XQAEvalHook_exact 0.4537
30 | 2018-04-18 15:13:14.851192 XQAEvalHook_f1 0.56119
31 | 


--------------------------------------------------------------------------------
/tests/test_results/rename_recursively.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | path = sys.argv[1]
 5 | execute = False
 6 | try:
 7 |     execute = sys.argv[2] == '1'
 8 | except:
 9 |     pass
10 | 
11 | root_dir = os.getcwd()
12 | 
13 | files = []
14 | dirs = []
15 | for root, directories, filenames in os.walk(path):
16 |     for filename in filenames:
17 |             dirs.append(os.path.join(root_dir, root))
18 |             files.append(os.path.join(root_dir, root, filename))
19 | 
20 | 
21 | for f in files:
22 |     if 'expected_results.txt' in f:
23 |         if execute:
24 |             os.remove(f)
25 | 
26 | for f, d in zip(files, dirs):
27 |     if not 'expected_results.txt' in f:
28 |         if execute:
29 |             os.rename(f,os.path.join(d,'expected_results.txt'))
30 |         else:
31 |             print('{0} --> {1}'.format(f,
32 |                 os.path.join(d,'expected_results.txt')))
33 | 


--------------------------------------------------------------------------------
/wercker.yml:
--------------------------------------------------------------------------------
 1 | box: python:3.6
 2 | 
 3 | no-response-timeout: 60
 4 | command-timeout: 60
 5 | build:
 6 |   steps:
 7 |     - pip-install
 8 | 
 9 |     - script:
10 |         name: install
11 |         code: |
12 |           sudo apt-get update
13 |           sudo apt-get -y install libtk8.6
14 | 
15 |           # pip install --upgrade -r requirements.txt
16 |           pip install -e .[tf] --upgrade
17 | 
18 |     - script:
19 |         name: echo python information
20 |         code: |
21 |           echo "python version $(python --version) running"
22 |           echo "pip version $(pip --version) running"
23 |           ls
24 | 
25 |     - script:
26 |         name: Run all tests
27 |         code: |
28 |           pytest tests -v --cov=jack --max-slave-restart=2
29 | 
30 |     - script:
31 |         name: Code coverage upload
32 |         code: |
33 |           codecov
34 | 


--------------------------------------------------------------------------------