├── docs
    ├── .nojekyll
    ├── public
    │   ├── .nojekyll
    │   ├── objects.inv
    │   ├── _static
    │   │   ├── up.png
    │   │   ├── down.png
    │   │   ├── file.png
    │   │   ├── plus.png
    │   │   ├── comment.png
    │   │   ├── minus.png
    │   │   ├── ajax-loader.gif
    │   │   ├── down-pressed.png
    │   │   ├── up-pressed.png
    │   │   ├── comment-bright.png
    │   │   ├── comment-close.png
    │   │   ├── fonts
    │   │   │   ├── Lato
    │   │   │   │   ├── lato-bold.eot
    │   │   │   │   ├── lato-bold.ttf
    │   │   │   │   ├── lato-bold.woff
    │   │   │   │   ├── lato-bold.woff2
    │   │   │   │   ├── lato-italic.eot
    │   │   │   │   ├── lato-italic.ttf
    │   │   │   │   ├── lato-italic.woff
    │   │   │   │   ├── lato-italic.woff2
    │   │   │   │   ├── lato-regular.eot
    │   │   │   │   ├── lato-regular.ttf
    │   │   │   │   ├── lato-regular.woff
    │   │   │   │   ├── lato-bolditalic.eot
    │   │   │   │   ├── lato-bolditalic.ttf
    │   │   │   │   ├── lato-bolditalic.woff
    │   │   │   │   ├── lato-regular.woff2
    │   │   │   │   └── lato-bolditalic.woff2
    │   │   │   ├── fontawesome-webfont.eot
    │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   ├── fontawesome-webfont.woff
    │   │   │   ├── fontawesome-webfont.woff2
    │   │   │   └── RobotoSlab
    │   │   │   │   ├── roboto-slab-v7-bold.eot
    │   │   │   │   ├── roboto-slab-v7-bold.ttf
    │   │   │   │   ├── roboto-slab-v7-bold.woff
    │   │   │   │   ├── roboto-slab-v7-bold.woff2
    │   │   │   │   ├── roboto-slab-v7-regular.eot
    │   │   │   │   ├── roboto-slab-v7-regular.ttf
    │   │   │   │   ├── roboto-slab-v7-regular.woff
    │   │   │   │   └── roboto-slab-v7-regular.woff2
    │   │   ├── css
    │   │   │   └── badge_only.css
    │   │   ├── js
    │   │   │   └── theme.js
    │   │   ├── pygments.css
    │   │   └── doctools.js
    │   ├── .buildinfo
    │   ├── _sources
    │   │   ├── package_reference.rst.txt
    │   │   ├── submission.rst.txt
    │   │   ├── index.rst.txt
    │   │   ├── faq.rst.txt
    │   │   ├── evaluations.rst.txt
    │   │   └── datasets.rst.txt
    │   ├── genindex.html
    │   ├── search.html
    │   ├── searchindex.js
    │   ├── _modules
    │   │   └── index.html
    │   ├── py-modindex.html
    │   ├── submission.html
    │   └── faq.html
    ├── requirements.txt
    ├── Makefile
    └── source
    │   ├── submission.rst
    │   ├── faq.rst
    │   ├── index.rst
    │   ├── evaluations.rst
    │   ├── conf.py
    │   └── datasets.rst
├── noesis
    ├── __init__.py
    ├── util
    │   ├── __init__.py
    │   └── checkpoint.py
    ├── dataset
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── vocabulary.py
    │   └── dataset.py
    ├── evaluator
    │   ├── __init__.py
    │   └── evaluator.py
    ├── networks
    │   ├── __init__.py
    │   └── dual_encoder.py
    ├── trainers
    │   ├── __init__.py
    │   └── supervised_trainer.py
    └── sample.py
├── evaluations
    ├── __init__.py
    └── metrics.py
├── noesis-tf
    ├── models
    │   ├── __init__.py
    │   ├── helpers.py
    │   └── dual_encoder.py
    ├── util
    │   ├── __init__.py
    │   └── blocks.py
    ├── images
    │   └── architecture.png
    ├── metrics.py
    ├── inputs.py
    ├── train.py
    ├── hparams.py
    ├── README.md
    ├── model.py
    └── scripts
    │   └── prepare_data.py
├── requirements.txt
├── LICENSE
├── .gitignore
├── README.md
└── setup.py


/docs/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/public/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis-tf/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis-tf/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/noesis/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme
3 | recommonmark


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | dill
3 | ijson
4 | tensorflow
5 | numpy


--------------------------------------------------------------------------------
/docs/public/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/objects.inv


--------------------------------------------------------------------------------
/docs/public/_static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/up.png


--------------------------------------------------------------------------------
/docs/public/_static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/down.png


--------------------------------------------------------------------------------
/docs/public/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/file.png


--------------------------------------------------------------------------------
/docs/public/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/plus.png


--------------------------------------------------------------------------------
/docs/public/_static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/comment.png


--------------------------------------------------------------------------------
/docs/public/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/minus.png


--------------------------------------------------------------------------------
/noesis-tf/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/noesis-tf/images/architecture.png


--------------------------------------------------------------------------------
/docs/public/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/public/_static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/down-pressed.png


--------------------------------------------------------------------------------
/docs/public/_static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/up-pressed.png


--------------------------------------------------------------------------------
/docs/public/_static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/comment-bright.png


--------------------------------------------------------------------------------
/docs/public/_static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/comment-close.png


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bold.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-italic.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-italic.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-italic.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-italic.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-regular.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bolditalic.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bolditalic.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bolditalic.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/Lato/lato-bolditalic.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff


--------------------------------------------------------------------------------
/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/dstc-noesis/HEAD/docs/public/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2


--------------------------------------------------------------------------------
/docs/public/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 6c17cb11edf46b86171e238e44a376c7
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/noesis-tf/metrics.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import functools
 3 | from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
 4 | 
 5 | 
 6 | def create_evaluation_metrics():
 7 |     eval_metrics = {}
 8 |     for k in [1, 2, 5, 10, 50, 100]:
 9 |         eval_metrics["recall_at_%d" % k] = MetricSpec(metric_fn=functools.partial(
10 |             tf.contrib.metrics.streaming_sparse_recall_at_k,
11 |             k=k))
12 |     return eval_metrics
13 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = dstc7-noesis
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/public/_sources/package_reference.rst.txt:
--------------------------------------------------------------------------------
 1 | Package Reference
 2 | =================
 3 | Networks
 4 | --------
 5 | .. automodule:: noesis.networks.dual_encoder
 6 |     :members:
 7 |     :undoc-members:
 8 | 
 9 | Dataset
10 | -------
11 | .. automodule:: noesis.dataset.dataset
12 |     :members:
13 |     :undoc-members:
14 | 
15 | Trainers
16 | --------
17 | .. automodule:: noesis.trainers.supervised_trainer
18 |     :members:
19 |     :undoc-members:
20 | 
21 | Evaluator
22 | ---------
23 | .. automodule:: noesis.evaluator.evaluator
24 |     :members:
25 |     :undoc-members:
26 | 
27 | Vocabulary
28 | ----------
29 | .. automodule:: noesis.dataset.vocabulary
30 |     :members:
31 |     :undoc-members:
32 | 
33 | Utilities
34 | ---------
35 | .. automodule:: noesis.dataset.utils
36 |     :members:
37 |     :undoc-members:
38 | 
39 | Checkpoint
40 | ----------
41 | .. automodule:: noesis.util.checkpoint
42 |     :members:
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 International Business Machines
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .idea
 3 | 
 4 | # Operating system related file
 5 | .DS_Store
 6 | 
 7 | # Byte-compiled / optimized / DLL files
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 | 
51 | # Translations
52 | *.mo
53 | *.pot
54 | 
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | 
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 | 
63 | # Scrapy stuff:
64 | .scrapy
65 | 
66 | # Sphinx documentation
67 | docs/_build/
68 | 
69 | # PyBuilder
70 | target/
71 | 
72 | # IPython Notebook
73 | .ipynb_checkpoints
74 | 
75 | # Vagrant
76 | .vagrant
77 | 
78 | 


--------------------------------------------------------------------------------
/noesis-tf/models/helpers.py:
--------------------------------------------------------------------------------
 1 | import array
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | from collections import defaultdict
 5 | 
 6 | 
 7 | def load_vocab(filename):
 8 |     vocab = None
 9 |     with open(filename) as f:
10 |         vocab = f.read().splitlines()
11 |     dct = defaultdict(int)
12 |     vocab = set(vocab)
13 |     for idx, word in enumerate(vocab):
14 |         dct[word] = idx
15 |     return [vocab, dct]
16 | 
17 | 
18 | def load_glove_vectors(filename, vocab):
19 |     """
20 |     Load glove vectors from a .txt file.
21 |     Optionally limit the vocabulary to save memory. `vocab` should be a set.
22 |     """
23 |     dct = {}
24 |     vectors = array.array('d')
25 |     current_idx = 0
26 |     with open(filename, "r", encoding="utf-8") as f:
27 |         for _, line in enumerate(f):
28 |             tokens = line.split(" ")
29 |             word = tokens[0]
30 |             entries = tokens[1:]
31 |             if not vocab or word in vocab:
32 |                 dct[word] = current_idx
33 |                 vectors.extend(float(x) for x in entries)
34 |                 current_idx += 1
35 |         word_dim = len(entries)
36 |         num_vectors = len(dct)
37 |         tf.logging.info("Found {} out of {} vectors in Glove".format(num_vectors, len(vocab)))
38 |         return [np.array(vectors).reshape(num_vectors, word_dim), dct]
39 | 
40 | 
41 | def build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, embedding_dim):
42 |     initial_embeddings = np.random.uniform(-0.25, 0.25, (len(vocab_dict), embedding_dim)).astype("float32")
43 |     for word, glove_word_idx in glove_dict.items():
44 |         word_idx = vocab_dict.get(word)
45 |         initial_embeddings[word_idx, :] = glove_vectors[glove_word_idx]
46 |     return initial_embeddings
47 | 


--------------------------------------------------------------------------------
/docs/source/submission.rst:
--------------------------------------------------------------------------------
 1 | Submission
 2 | ==========
 3 | 
 4 | Your submissions should be emailed to chulaka.gunasekara@ibm.com, with the subject line **DSTC7_Track1_Submission**. The results should be submitted from an email address that is registered for Track 1.
 5 | 
 6 | You need to submit a single zipped directory containing the result files for each of the subtasks that you need to be evaluated on. The files should be named in the following format.
 7 | ``<dataset>_subtask_<subtask_number>.json``
 8 | 
 9 | The <dataset> should be replaced by either ‘Ubuntu’ or ‘Advising’, and the <subtask_number> should be replaced by the subtask number(1-5).
10 | For example, the results file for subtask 1 on Ubuntu dataset should be named as Ubuntu_subtask_1.json
11 | 
12 | Each results file should follow the following json format.
13 | 
14 | .. code-block:: json
15 | 
16 |     [
17 |         {
18 |             "example-id": xxxxxxx,
19 |             "candidate-ranking":[
20 |                 {
21 |                     "candidate-id": aaaaaa,
22 |                     "confidence": b.bbb
23 |                 },
24 |                 {
25 |                     "candidate-id": cccccc,
26 |                     "confidence": d.ddd
27 |                 },
28 |                 ...
29 |             ]
30 |         },
31 |         ...
32 |     ]
33 | 
34 | 
35 | The value for the field "example-id" should contain the corresponding example-id of the test dataset. The candidate ranking field should ONLY include 100 candidates in the order of confidence.
36 | 
37 | For subtask 2, where the selection is made from a global list of candidates, candidate-ranking fields should **only include the top 100 candidates** from the global list.
38 | 
39 | For subtask 4, when the correct candidate not available in the candidate set, return ``"candidate-id": NONE`` with the confidence score as an item in the candidate-ranking list.


--------------------------------------------------------------------------------
/docs/public/_sources/submission.rst.txt:
--------------------------------------------------------------------------------
 1 | Submission
 2 | ==========
 3 | 
 4 | Your submissions should be emailed to chulaka.gunasekara@ibm.com, with the subject line **DSTC7_Track1_Submission**. The results should be submitted from an email address that is registered for Track 1.
 5 | 
 6 | You need to submit a single zipped directory containing the result files for each of the subtasks that you need to be evaluated on. The files should be named in the following format.
 7 | ``<dataset>_subtask_<subtask_number>.json``
 8 | 
 9 | The <dataset> should be replaced by either ‘Ubuntu’ or ‘Advising’, and the <subtask_number> should be replaced by the subtask number(1-5).
10 | For example, the results file for subtask 1 on Ubuntu dataset should be named as Ubuntu_subtask_1.json
11 | 
12 | Each results file should follow the following json format.
13 | 
14 | .. code-block:: json
15 | 
16 |     [
17 |         {
18 |             "example-id": xxxxxxx,
19 |             "candidate-ranking":[
20 |                 {
21 |                     "candidate-id": aaaaaa,
22 |                     "confidence": b.bbb
23 |                 },
24 |                 {
25 |                     "candidate-id": cccccc,
26 |                     "confidence": d.ddd
27 |                 },
28 |                 ...
29 |             ]
30 |         },
31 |         ...
32 |     ]
33 | 
34 | 
35 | The value for the field "example-id" should contain the corresponding example-id of the test dataset. The candidate ranking field should ONLY include 100 candidates in the order of confidence.
36 | 
37 | For subtask 2, where the selection is made from a global list of candidates, candidate-ranking fields should **only include the top 100 candidates** from the global list.
38 | 
39 | For subtask 4, when the correct candidate not available in the candidate set, return ``"candidate-id": NONE`` with the confidence score as an item in the candidate-ranking list.


--------------------------------------------------------------------------------
/docs/public/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | .. dstc7-noesis documentation master file, created by
 2 |    sphinx-quickstart on Wed Jun  6 01:03:45 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Noetic End-to-End Response Selection Challenge
 7 | ==============================================
 8 | 
 9 | **Update** - A tensorflow based baseline for subtask 1 of the track is available `here <https://github.com/IBM/dstc7-noesis/tree/models/noesis-tf>`_.
10 | 
11 | This challenge is part of dialog state tracking challenge (DSTC 7) series. It provides a partial conversation, and requires participants to select the correct next utterances from a set of candidates.
12 | Unlike previous similar challenges, this task tries to push towards real world problems by introducing:
13 | 
14 | - A large number of candidates
15 | - Cases where no candidate is correct
16 | - External data
17 | 
18 | This challenge is offered with two goal oriented dialog datasets, used in 5 subtasks.
19 | A participant may participate in one, several, or all the subtasks.
20 | A full description of the track is available `here <http://workshop.colips.org/dstc7/proposals/Track%201%20Merged%20Challenge%20Extended%20Desscription_v2.pdf>`_.
21 | 
22 | Organizers
23 | ----------
24 | * `Lazaros Polymenako <mailto:lcpolyme@us.ibm.com>`_, `Chulaka Gunasekara <mailto:chulaka.gunasekara@ibm.com>`_ – IBM Research AI
25 | * `Walter Lasecki <mailto:wlasecki@umich.edu>`_, `Jonathan K. Kummerfeld <mailto:jkummerf@umich.edu>`_ – University of Michigan
26 | 
27 | 
28 | Maintainers
29 | -----------
30 | * `Chulaka Gunasekara <https://researcher.watson.ibm.com/researcher/view.php?person=ibm-chulaka.gunasekara>`_ 
31 | 
32 | To get a guaranteed support you are kindly requested to open an issue.
33 | 
34 | Thank you for understanding!
35 | 
36 | .. toctree::
37 |    :hidden:
38 |    :glob:
39 | 
40 |    *
41 | 


--------------------------------------------------------------------------------
/noesis-tf/inputs.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | TEXT_FEATURE_SIZE = 160
 4 | 
 5 | def get_feature_columns(mode):
 6 |     feature_columns = []
 7 | 
 8 |     feature_columns.append(tf.contrib.layers.real_valued_column(
 9 |         column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
10 |     feature_columns.append(tf.contrib.layers.real_valued_column(
11 |         column_name="context_len", dimension=1, dtype=tf.int64))
12 | 
13 |     feature_columns.append(tf.contrib.layers.real_valued_column(
14 |         column_name="target", dimension=1, dtype=tf.int64))
15 | 
16 |     for i in range(100):
17 |         feature_columns.append(tf.contrib.layers.real_valued_column(
18 |             column_name="option_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
19 |         feature_columns.append(tf.contrib.layers.real_valued_column(
20 |             column_name="option_{}_len".format(i), dimension=1, dtype=tf.int64))
21 | 
22 |     return set(feature_columns)
23 | 
24 | 
25 | def create_input_fn(mode, input_files, batch_size, num_epochs):
26 |     def input_fn():
27 |         features = tf.contrib.layers.create_feature_spec_for_parsing(
28 |             get_feature_columns(mode))
29 | 
30 |         feature_map = tf.contrib.learn.io.read_batch_features(
31 |             file_pattern=input_files,
32 |             batch_size=batch_size,
33 |             features=features,
34 |             reader=tf.TFRecordReader,
35 |             randomize_input=True,
36 |             num_epochs=num_epochs,
37 |             queue_capacity=200000 + batch_size * 10,
38 |             name="read_batch_features_{}".format(mode))
39 | 
40 |         # This is an ugly hack because of a current bug in tf.learn
41 |         # During evaluation TF tries to restore the epoch variable which isn't defined during training
42 |         # So we define the variable manually here
43 |         if mode == tf.contrib.learn.ModeKeys.TRAIN:
44 |             tf.get_variable(
45 |                 "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
46 |                 initializer=tf.constant(0, dtype=tf.int64))
47 | 
48 |         target = feature_map.pop("target")
49 | 
50 |         return feature_map, target
51 | 
52 |     return input_fn
53 | 


--------------------------------------------------------------------------------
/docs/source/faq.rst:
--------------------------------------------------------------------------------
 1 | FAQs
 2 | ====
 3 | 
 4 | 1. **What is the timeline of the competition**?
 5 | 
 6 |     +-----------------------------------+-----------------------+
 7 |     |       Task                        |        Dates          |
 8 |     +===================================+=======================+
 9 |     | Development phase (14 weeks)      | Jun 1 – Sep 9, 2018   |
10 |     +-----------------------------------+-----------------------+
11 |     | Evaluation phase (2 weeks)        | Sep 10 – Oct 8, 2018  |
12 |     +-----------------------------------+-----------------------+
13 |     | Release of the results            | 16th Oct 2018         |
14 |     +-----------------------------------+-----------------------+
15 |     | Paper submission deadline         | Oct-Nov 2018          |
16 |     +-----------------------------------+-----------------------+
17 |     | DSTC7 special session or workshop | Spring 2019           |
18 |     +-----------------------------------+-----------------------+
19 | 
20 | |
21 | 
22 | 2. **What should we submit**?
23 | 
24 |     You are required to submit the responses to the test dataset that will be released on the 10th of September. The format of the responses can be found under `submissions` section.
25 | 
26 | 3. **Do we need to work on both datasets**?
27 | 
28 |     Not necessary. You can select one dataset and work on all or a subset of the subtasks. But, submitting results for both datasets and all subtasks will increase your chance of winning the competition.
29 | 
30 | 4. **How are we evaluated**?
31 | 
32 |     For each test instance, we expect you to return a set of 100 choices (candidate ids) from the set candidates and a probability distribution over those 100 choices. For more details please check the `evaluations` section.
33 | 
34 | 4. **What do you mean by end-to-end models**?
35 | 
36 |     We don't need the whole system to be end-to-end trainable. You can have separate components, which are not trained with back-propagation. However, we expect the functionality of each of the components in your system to be learned from the given dataset. We discourage the use of hand-coded features for any component in your system, as one of the focus points of the challenge is automation.
37 | 
38 | 5. **Can we use pre-trained word embeddings**?
39 | 
40 |     You can use any pre-trained embeddings that was publicly available before the 1st of June.


--------------------------------------------------------------------------------
/docs/public/_sources/faq.rst.txt:
--------------------------------------------------------------------------------
 1 | FAQs
 2 | ====
 3 | 
 4 | 1. **What is the timeline of the competition**?
 5 | 
 6 |     +-----------------------------------+-----------------------+
 7 |     |       Task                        |        Dates          |
 8 |     +===================================+=======================+
 9 |     | Development phase (14 weeks)      | Jun 1 – Sep 9, 2018   |
10 |     +-----------------------------------+-----------------------+
11 |     | Evaluation phase (2 weeks)        | Sep 10 – Oct 8, 2018  |
12 |     +-----------------------------------+-----------------------+
13 |     | Release of the results            | 16th Oct 2018         |
14 |     +-----------------------------------+-----------------------+
15 |     | Paper submission deadline         | Oct-Nov 2018          |
16 |     +-----------------------------------+-----------------------+
17 |     | DSTC7 special session or workshop | Spring 2019           |
18 |     +-----------------------------------+-----------------------+
19 | 
20 | |
21 | 
22 | 2. **What should we submit**?
23 | 
24 |     You are required to submit the responses to the test dataset that will be released on the 10th of September. The format of the responses can be found under `submissions` section.
25 | 
26 | 3. **Do we need to work on both datasets**?
27 | 
28 |     Not necessary. You can select one dataset and work on all or a subset of the subtasks. But, submitting results for both datasets and all subtasks will increase your chance of winning the competition.
29 | 
30 | 4. **How are we evaluated**?
31 | 
32 |     For each test instance, we expect you to return a set of 100 choices (candidate ids) from the set candidates and a probability distribution over those 100 choices. For more details please check the `evaluations` section.
33 | 
34 | 4. **What do you mean by end-to-end models**?
35 | 
36 |     We don't need the whole system to be end-to-end trainable. You can have separate components, which are not trained with back-propagation. However, we expect the functionality of each of the components in your system to be learned from the given dataset. We discourage the use of hand-coded features for any component in your system, as one of the focus points of the challenge is automation.
37 | 
38 | 5. **Can we use pre-trained word embeddings**?
39 | 
40 |     You can use any pre-trained embeddings that was publicly available before the 1st of June.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dialog System Technology Challenges 7 (DSTC 7)
 2 | 
 3 | ## [Track 1 - Sentence Selection](http://workshop.colips.org/dstc7/proposals/Track%201%20Merged%20Challenge%20Extended%20Desscription_v2.pdf)
 4 | 
 5 | This challenge provides a partial conversation, and requires participants to select the correct next utterances from a set of candidates.
 6 | Unlike previous similar challenges, this task tries to push towards real world problems by introducing:
 7 | 
 8 | - A large number of candidates
 9 | - Cases where no candidate is correct
10 | - External data
11 | 
12 | This challenge is offered with two goal oriented dialog datasets, used in 5 tasks.
13 | A participant may participate in one, several, or all the subtasks.
14 | 
15 | If you use this data or code in your work, please cite the task description paper:
16 | 
17 | ```
18 | @InProceedings{dstc19task1,
19 |   title     = {DSTC7 Task 1: Noetic End-to-End Response Selection},
20 |   author    = {Chulaka Gunasekara, Jonathan K. Kummerfeld, Lazaros Polymenakos, and Walter S. Lasecki},
21 |   year      = {2019},
22 |   booktitle = {7th Edition of the Dialog System Technology Challenges at AAAI 2019},
23 |   url       = {http://workshop.colips.org/dstc7/papers/dstc7_task1_final_report.pdf},
24 |   month     = {January},
25 | }
26 | ```
27 | 
28 | If you use the Ubuntu data, please also cite the paper in which we describe its creation:
29 | 
30 | ```
31 | @Article{arxiv18disentangle,
32 |   author    = {Jonathan K. Kummerfeld, Sai R. Gouravajhala, Joseph Peper, Vignesh Athreya, Chulaka Gunasekara, Jatin Ganhotra, Siva Sankalp Patel, Lazaros Polymenakos, and Walter S. Lasecki},
33 |   title     = {Analyzing Assumptions in Conversation Disentanglement Research Through the Lens of a New Dataset and Model},
34 |   journal   = {ArXiv e-prints},
35 |   archivePrefix = {arXiv},
36 |   eprint    = {1810.11118},
37 |   primaryClass = {cs.CL},
38 |   year      = {2018},
39 |   month     = {October},
40 |   url       = {https://arxiv.org/pdf/1810.11118.pdf},
41 | }
42 | ```
43 | 
44 | **For more detail, please visit our [website](https://ibm.github.io/dstc7-noesis/public/index.html)**.
45 | 
46 | ### Organizers
47 | 
48 | [Lazaros Polymenakos](mailto:lcpolyme@us.ibm.com), [Chulaka Gunasekara](mailto:chulaka.gunasekara@ibm.com) – IBM Research AI <br>
49 | [Walter Lasecki](mailto:wlasecki@umich.edu), [Jonathan K. Kummerfeld](mailto:jkummerf@umich.edu) – University of Michigan
50 | 


--------------------------------------------------------------------------------
/noesis-tf/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import tensorflow as tf
 5 | import model
 6 | from hparams import create_hparams
 7 | import metrics
 8 | import inputs
 9 | 
10 | from models.dual_encoder import dual_encoder_model
11 | 
12 | 
13 | tf.flags.DEFINE_string("train_in", None, "Path to input data file")
14 | tf.flags.DEFINE_string("validation_in", None, "Path to validation data file")
15 | 
16 | tf.flags.DEFINE_string("model_dir", None, "Directory to store model checkpoints (defaults to ./runs)")
17 | tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level")
18 | tf.flags.DEFINE_integer("num_epochs", None, "Number of training Epochs. Defaults to indefinite.")
19 | tf.flags.DEFINE_integer("eval_every", 2000, "Evaluate after this many train steps")
20 | 
21 | FLAGS = tf.flags.FLAGS
22 | 
23 | TIMESTAMP = int(time.time())
24 | 
25 | if FLAGS.model_dir:
26 |     MODEL_DIR = FLAGS.model_dir
27 | else:
28 |     MODEL_DIR = os.path.abspath(os.path.join("./runs", str(TIMESTAMP)))
29 | 
30 | TRAIN_FILE = os.path.abspath(os.path.join(FLAGS.train_in))
31 | VALIDATION_FILE = os.path.abspath(os.path.join(FLAGS.validation_in))
32 | 
33 | tf.logging.set_verbosity(FLAGS.loglevel)
34 | 
35 | 
36 | def main(unused_argv):
37 |     config = tf.ConfigProto()
38 |     config.gpu_options.allow_growth = True
39 | 
40 |     hyper_params = create_hparams()
41 | 
42 |     model_fn = model.create_model_fn(
43 |         hyper_params,
44 |         model_impl=dual_encoder_model)
45 | 
46 |     estimator = tf.contrib.learn.Estimator(
47 |         model_fn=model_fn,
48 |         model_dir=MODEL_DIR,
49 |         config=tf.contrib.learn.RunConfig(session_config=config))
50 | 
51 |     input_fn_train = inputs.create_input_fn(
52 |         mode=tf.contrib.learn.ModeKeys.TRAIN,
53 |         input_files=[TRAIN_FILE],
54 |         batch_size=hyper_params.batch_size,
55 |         num_epochs=FLAGS.num_epochs)
56 | 
57 |     input_fn_eval = inputs.create_input_fn(
58 |         mode=tf.contrib.learn.ModeKeys.EVAL,
59 |         input_files=[VALIDATION_FILE],
60 |         batch_size=hyper_params.eval_batch_size,
61 |         num_epochs=1)
62 | 
63 |     eval_metrics = metrics.create_evaluation_metrics()
64 | 
65 |     eval_monitor = tf.contrib.learn.monitors.ValidationMonitor(
66 |         input_fn=input_fn_eval,
67 |         every_n_steps=FLAGS.eval_every,
68 |         metrics=eval_metrics)
69 | 
70 |     estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor])
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     tf.app.run()
75 | 


--------------------------------------------------------------------------------
/noesis-tf/hparams.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from collections import namedtuple
 3 | 
 4 | # Model Parameters
 5 | tf.flags.DEFINE_integer(
 6 |     "vocab_size",
 7 |     100000,
 8 |     "The size of the vocabulary. Only change this if you changed the preprocessing")
 9 | 
10 | # Model Parameters
11 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of the embeddings")
12 | tf.flags.DEFINE_integer("rnn_dim", 256, "Dimensionality of the RNN cell")
13 | tf.flags.DEFINE_integer("max_context_len", 160, "Truncate contexts to this length")
14 | tf.flags.DEFINE_integer("max_utterance_len", 160, "Truncate utterance to this length")
15 | 
16 | # Pre-trained embeddings
17 | tf.flags.DEFINE_string("glove_path", None, "Path to pre-trained Glove vectors")
18 | tf.flags.DEFINE_string("vocab_path", None, "Path to vocabulary.txt file")
19 | 
20 | # Training Parameters
21 | tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate")
22 | tf.flags.DEFINE_integer("batch_size", 16, "Batch size during training")
23 | tf.flags.DEFINE_integer("eval_batch_size", 20, "Batch size during evaluation")
24 | tf.flags.DEFINE_string("optimizer", "Adam", "Optimizer Name (Adam, Adagrad, etc)")
25 | tf.flags.DEFINE_float("keep_rate", 1.0, "Drop out probability")
26 | tf.flags.DEFINE_float("decay_rate", 0.95, "Exponential decay rate")
27 | tf.flags.DEFINE_integer("decay_steps", 5000, "Decay steps")
28 | tf.flags.DEFINE_bool("staircase", False, "Staircase decay")
29 | 
30 | FLAGS = tf.flags.FLAGS
31 | 
32 | HParams = namedtuple(
33 |     "HParams",
34 |     [
35 |         "batch_size",
36 |         "embedding_dim",
37 |         "eval_batch_size",
38 |         "learning_rate",
39 |         "max_context_len",
40 |         "max_utterance_len",
41 |         "optimizer",
42 |         "rnn_dim",
43 |         "vocab_size",
44 |         "glove_path",
45 |         "vocab_path",
46 |         "keep_rate",
47 |         "decay_rate",
48 |         "decay_steps",
49 |         "staircase"
50 |     ])
51 | 
52 | 
53 | def create_hparams():
54 |     return HParams(
55 |         batch_size=FLAGS.batch_size,
56 |         eval_batch_size=FLAGS.eval_batch_size,
57 |         vocab_size=FLAGS.vocab_size,
58 |         optimizer=FLAGS.optimizer,
59 |         learning_rate=FLAGS.learning_rate,
60 |         embedding_dim=FLAGS.embedding_dim,
61 |         max_context_len=FLAGS.max_context_len,
62 |         max_utterance_len=FLAGS.max_utterance_len,
63 |         glove_path=FLAGS.glove_path,
64 |         vocab_path=FLAGS.vocab_path,
65 |         rnn_dim=FLAGS.rnn_dim,
66 |         keep_rate=FLAGS.keep_rate,
67 |         decay_rate=FLAGS.decay_rate,
68 |         decay_steps=FLAGS.decay_steps,
69 |         staircase=FLAGS.staircase
70 |     )
71 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | # To use a consistent encoding
 3 | from codecs import open
 4 | from os import path
 5 | 
 6 | here = path.abspath(path.dirname(__file__))
 7 | 
 8 | # Get the long description from the README file
 9 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
10 |     long_description = f.read()
11 | 
12 | setup(
13 |     name='noesis',
14 | 
15 |     # Versions should comply with PEP440.  For a discussion on single-sourcing
16 |     # the version across setup.py and the project code, see
17 |     # https://packaging.python.org/en/latest/single_source_version.html
18 |     version='0.0.1',
19 | 
20 |     description='Dialog State Tracking Challenge 7 - Noetic End-to-End Response Selection',
21 |     long_description=long_description,
22 | 
23 |     # The project's main homepage.
24 |     url='https://github.com/IBM/dstc7-noesis',
25 | 
26 |     # Choose your license
27 |     license='Apache License 2.0',
28 | 
29 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
30 |     classifiers=[
31 |         # How mature is this project? Common values are
32 |         #   3 - Alpha
33 |         #   4 - Beta
34 |         #   5 - Production/Stable
35 |         'Development Status :: 3 - Alpha',
36 | 
37 |         # Indicate who your project is intended for
38 |         'Intended Audience :: Research',
39 |         'Topic :: Software Development',
40 | 
41 |         # Pick your license as you wish (should match "license" above)
42 |         'License :: Apache License 2.0',
43 | 
44 |         # Specify the Python versions you support here. In particular, ensure
45 |         # that you indicate whether you support Python 2, Python 3 or both.
46 |         'Programming Language :: Python :: 2.7',
47 |         'Programming Language :: Python :: 3.6'
48 |     ],
49 | 
50 |     # What does your project relate to?
51 |     keywords='dual-encoder py-torch development',
52 | 
53 |     # You can just specify the packages manually here if your project is
54 |     # simple. Or you can use find_packages().
55 |     packages=find_packages(exclude=['contrib', 'docs', 'tests']),
56 | 
57 |     # Alternatively, if you want to distribute just a my_module.py, uncomment
58 |     # this:
59 |     #   py_modules=["my_module"],
60 | 
61 |     # List run-time dependencies here.  These will be installed by pip when
62 |     # your project is installed. For an analysis of "install_requires" vs pip's
63 |     # requirements files see:
64 |     # https://packaging.python.org/en/latest/requirements.html
65 |     install_requires=['numpy', 'torch', 'dill'],
66 | 
67 |     # List additional groups of dependencies here (e.g. development
68 |     # dependencies). You can install these using the following syntax,
69 |     # for example:
70 |     # $ pip install -e .[dev,test]
71 |     extras_require={
72 |         'dev': ['check-manifest'],
73 |         'test': ['coverage'],
74 |     }
75 | )
76 | 


--------------------------------------------------------------------------------
/noesis-tf/README.md:
--------------------------------------------------------------------------------
 1 | ## Response Selection for Conversation Systems in Tensorflow
 2 | 
 3 | #### Overview
 4 | This code provides a baseline for the subtask 1 of DSTC-7 [Sentence Selection track](https://ibm.github.io/dstc7-noesis/public/index.html).
 5 | 
 6 | This code extends the work from Denny Britz which implements the Dual LSTM Encoder model from [The Ubuntu Dialogue Corpus: A Large Dataset for Research in Unstructured Multi-Turn Dialogue Systems](http://arxiv.org/abs/1506.08909)
 7 | 
 8 | [Refer to the original blog post here](http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow)
 9 | 
10 | #### Setup
11 | 
12 | This code uses Python 3.6 and Tensorflow-GPU 1.6. Clone the repository and install all required packages. It is recommended to use the [Anaconda package manager](https://www.anaconda.com/download/#macos). After installing Anaconda - 
13 | 
14 | ```
15 | cd noesis-tf
16 | conda create --name dstc7 python=3.6
17 | source activate dstc7
18 | pip install -r ../requirements.txt
19 | ```
20 | 
21 | #### Get the data
22 | 
23 | Make sure you register for the track 1 of DSTC7 to download the data and copy it inside the `data` directory. 
24 | 
25 | #### Prepare the data
26 | 
27 | Before training, the data needs to converted to suitable format for tensorflow. The script `prepare_data.py` can be used to convert data from JSON format to TFRecords for subtasks 1 of both datasets. 
28 | 
29 | ```
30 | python scripts/prepare_data.py --train_in data/ubuntu_train_subtask_1.json --validation_in data/ubuntu_dev_subtask_1.json --train_out data/ubuntu_subtask_1.tfrecords --validation_out data/ubuntu_dev_subtask_1.tfrecords --vocab_path data/ubuntu_subtask_1.txt  --vocab_processor data/ubuntu_subtask_1.bin
31 | ```
32 | 
33 | #### Training
34 | 
35 | The following command can be used to train the model for the Ubuntu subtask 1. Similar command works for subtask 1 of Advising data as well.
36 | 
37 | ```
38 | python train.py --train_in data/ubuntu_subtask_1.tfrecords --validation_in data/ubuntu_dev_subtask_1.tfrecords -glove_path data/glove.42B.300d.txt -vocab_path data/ubuntu_subtask_1.txt --embedding_dim=300 --batch_size=16
39 | ```
40 | 
41 | The glove embeddings can be downloaded from [here](https://nlp.stanford.edu/projects/glove/)
42 | 
43 | Check `hparams.py` for all command-line arguments. 
44 | 
45 | #### Model
46 | 
47 | This baseline model extends the dual-encoder model used [here](http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow). The architecture of the model is shown in the figure below. ![](images/architecture.png)
48 | 
49 | 
50 | 
51 | #### Dual Encoder Baselines (Recall)
52 | 
53 | Baselines are reported on validation set. 
54 | 
55 | | Dataset           | 1 in 100 R@1 | 1 in 100 R@2 | 1 in 100 R@5 | 1 in 100 R@10 | 1 in 100 R@50
56 | | :---------------: | :-------------: | :--------------------: |:----------: | :---------: | :---------: |
57 | | Ubuntu - Subtask 1 | 8.32% | 13.36% | 24.26% | 35.98% | 80.04% |
58 | | Advising - Subtask 1 | 6.20% | 9.80% | 18.40% | 29.60% | 72.80% |
59 | 
60 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. dstc7-noesis documentation master file, created by
 2 |    sphinx-quickstart on Wed Jun  6 01:03:45 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Noetic End-to-End Response Selection Challenge
 7 | ==============================================
 8 | 
 9 | **Update** - A tensorflow based baseline for subtask 1 of the track is available `here <https://github.com/IBM/dstc7-noesis/tree/models/noesis-tf>`_.
10 | 
11 | This challenge is part of dialog state tracking challenge (DSTC 7) series. It provides a partial conversation, and requires participants to select the correct next utterances from a set of candidates.
12 | Unlike previous similar challenges, this task tries to push towards real world problems by introducing:
13 | 
14 | - A large number of candidates
15 | - Cases where no candidate is correct
16 | - External data
17 | 
18 | This challenge is offered with two goal oriented dialog datasets, used in 5 subtasks.
19 | A participant may participate in one, several, or all the subtasks.
20 | A full description of the track is available `here <http://workshop.colips.org/dstc7/proposals/Track%201%20Merged%20Challenge%20Extended%20Desscription_v2.pdf>`_.
21 | 
22 | If you use this data or code in your work, please cite the task description paper::
23 | 
24 |   @InProceedings{dstc19task1,
25 |     title     = {DSTC7 Task 1: Noetic End-to-End Response Selection},
26 |     author    = {Chulaka Gunasekara, Jonathan K. Kummerfeld, Lazaros Polymenakos, and Walter S. Lasecki},
27 |     year      = {2019},
28 |     booktitle = {7th Edition of the Dialog System Technology Challenges at AAAI 2019},
29 |     url       = {http://workshop.colips.org/dstc7/papers/dstc7_task1_final_report.pdf},
30 |     month     = {January},
31 |   }
32 | 
33 | If you use the Ubuntu data, please also cite the paper in which we describe its creation::
34 | 
35 |   @Article{arxiv18disentangle,
36 |     author    = {Jonathan K. Kummerfeld, Sai R. Gouravajhala, Joseph Peper, Vignesh Athreya, Chulaka Gunasekara, Jatin Ganhotra, Siva Sankalp Patel, Lazaros Polymenakos, and Walter S. Lasecki},
37 |     title     = {Analyzing Assumptions in Conversation Disentanglement Research Through the Lens of a New Dataset and Model},
38 |     journal   = {ArXiv e-prints},
39 |     archivePrefix = {arXiv},
40 |     eprint    = {1810.11118},
41 |     primaryClass = {cs.CL},
42 |     year      = {2018},
43 |     month     = {October},
44 |     url       = {https://arxiv.org/pdf/1810.11118.pdf},
45 |   }
46 | 
47 | Organizers
48 | ----------
49 | * `Lazaros Polymenako <mailto:lcpolyme@us.ibm.com>`_, `Chulaka Gunasekara <mailto:chulaka.gunasekara@ibm.com>`_ – IBM Research AI
50 | * `Walter Lasecki <mailto:wlasecki@umich.edu>`_, `Jonathan K. Kummerfeld <mailto:jkummerf@umich.edu>`_ – University of Michigan
51 | 
52 | 
53 | Maintainers
54 | -----------
55 | * `Chulaka Gunasekara <https://researcher.watson.ibm.com/researcher/view.php?person=ibm-chulaka.gunasekara>`_ 
56 | 
57 | To get a guaranteed support you are kindly requested to open an issue.
58 | 
59 | Thank you for understanding!
60 | 
61 | .. toctree::
62 |    :hidden:
63 |    :glob:
64 | 
65 |    *
66 | 


--------------------------------------------------------------------------------
/docs/public/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
2 | 


--------------------------------------------------------------------------------
/noesis-tf/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def get_id_feature(features, key, len_key, max_len):
 5 |     ids = features[key]
 6 |     ids_len = tf.squeeze(features[len_key], [1])
 7 |     ids_len = tf.minimum(ids_len, tf.constant(max_len, dtype=tf.int64))
 8 |     return ids, ids_len
 9 | 
10 | 
11 | def create_train_op(loss, hparams):
12 |     def exp_decay(learning_rate, global_step):
13 |         return tf.train.exponential_decay(learning_rate, global_step, decay_steps=hparams.decay_steps, decay_rate=hparams.decay_rate,
14 |                                           staircase=hparams.staircase, name="lr_decay")
15 |     train_op = tf.contrib.layers.optimize_loss(
16 |         loss=loss,
17 |         global_step=tf.contrib.framework.get_global_step(),
18 |         learning_rate=hparams.learning_rate,
19 |         clip_gradients=10.0,
20 |         optimizer=hparams.optimizer,
21 |         learning_rate_decay_fn=exp_decay
22 |     )
23 |     return train_op
24 | 
25 | 
26 | def create_model_fn(hparams, model_impl):
27 |     def model_fn(features, targets, mode):
28 |         context, context_len = get_id_feature(
29 |             features, "context", "context_len", hparams.max_context_len)
30 | 
31 |         all_utterances = []
32 |         all_utterances_lens = []
33 | 
34 |         for i in range(100):
35 |             option, option_len = get_id_feature(features,
36 |                                                 "option_{}".format(i),
37 |                                                 "option_{}_len".format(i),
38 |                                                 hparams.max_utterance_len)
39 |             all_utterances.append(option)
40 |             all_utterances_lens.append(option_len)
41 | 
42 |         if mode == tf.contrib.learn.ModeKeys.TRAIN:
43 |             probs, loss = model_impl(
44 |                 hparams,
45 |                 mode,
46 |                 context,
47 |                 context_len,
48 |                 all_utterances,
49 |                 tf.transpose(tf.stack(all_utterances_lens, axis=0)),
50 |                 targets,
51 |                 hparams.batch_size)
52 |             train_op = create_train_op(loss, hparams)
53 |             return probs, loss, train_op
54 | 
55 |         if mode == tf.contrib.learn.ModeKeys.INFER:
56 | 
57 |             probs, loss = model_impl(
58 |                 hparams,
59 |                 mode,
60 |                 tf.concat(0, context),
61 |                 tf.concat(0, context_len),
62 |                 tf.concat(0, all_utterances),
63 |                 tf.concat(0, all_utterances_lens),
64 |                 None,
65 |                 hparams.eval_batch_size)
66 | 
67 |             split_probs = tf.split(0, features["len"], probs)
68 |             probs = tf.concat(1, split_probs)
69 | 
70 |             return probs, 0.0, None
71 | 
72 |         if mode == tf.contrib.learn.ModeKeys.EVAL:
73 |             probs, loss = model_impl(
74 |                 hparams,
75 |                 mode,
76 |                 context,
77 |                 context_len,
78 |                 all_utterances,
79 |                 tf.transpose(tf.stack(all_utterances_lens, axis=0)),
80 |                 targets,
81 |                 hparams.eval_batch_size)
82 | 
83 |             shaped_probs = probs
84 | 
85 |             return shaped_probs, loss, None
86 | 
87 |     return model_fn
88 | 


--------------------------------------------------------------------------------
/noesis/evaluator/evaluator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from torch.nn import CrossEntropyLoss
 6 | 
 7 | 
 8 | class Evaluator(object):
 9 |     """ Class to evaluate models with given datasets.
10 | 
11 |     Args:
12 |         loss (torch.NN.CrossEntropyLoss, optional): loss for evaluator (default: torch.NN.CrossEntropyLoss)
13 |         batch_size (int, optional): batch size for evaluator (default: 64)
14 |     """
15 | 
16 |     def __init__(self, loss_func=CrossEntropyLoss(), batch_size=64):
17 |         self.loss_func = loss_func
18 |         self.batch_size = batch_size
19 | 
20 |     def evaluate(self, model, data):
21 |         """ Evaluate a model on given dataset and return performance.
22 | 
23 |         Args:
24 |             model (models.networks): model to evaluate
25 |             data (dataset.dataset.Dataset): dataset to evaluate against
26 | 
27 |         Returns:
28 |             loss (float): loss of the given model on the given dataset
29 |         """
30 |         model.eval()
31 | 
32 |         match = 0
33 |         total = 0
34 |         recall = {'@1': 0, '@2': 0, '@5': 0, '@10': 0, '@50': 0, '@100': 0}
35 |         loss = 0
36 | 
37 |         # device = None if torch.cuda.is_available() else -1
38 | 
39 |         with torch.no_grad():
40 |             for batch in data.make_batches(self.batch_size):
41 |                 if torch.cuda.is_available():
42 |                     context_variable = torch.tensor(batch[0]).cuda()
43 |                     responses_variable = torch.tensor(batch[1]).cuda()
44 |                     target_variable = torch.tensor(batch[2]).cuda()
45 |                     context_lengths_variable = torch.tensor(batch[3]).cuda()
46 |                     responses_lengths_variable = torch.tensor(batch[4]).cuda()
47 |                 else:
48 |                     context_variable = torch.tensor(batch[0])
49 |                     responses_variable = torch.tensor(batch[1])
50 |                     target_variable = torch.tensor(batch[2])
51 |                     context_lengths_variable = torch.tensor(batch[3])
52 |                     responses_lengths_variable = torch.tensor(batch[4])
53 | 
54 |                 outputs = model(context_variable, responses_variable, context_lengths_variable, responses_lengths_variable)
55 | 
56 |                 # Get loss
57 |                 if len(outputs.size()) == 1:
58 |                     outputs = outputs.unsqueeze(0)
59 |                 loss += self.loss_func(outputs, target_variable)
60 | 
61 |                 # Evaluation
62 |                 predictions = np.argsort(outputs.numpy(), axis=1)
63 |                 num_samples = predictions.shape[0]
64 | 
65 |                 ranks = predictions[np.arange(num_samples), target_variable]
66 |                 match += sum(ranks == 0)
67 |                 recall['@1'] = match
68 |                 recall['@2'] += sum(ranks <= 2)
69 |                 recall['@5'] += sum(ranks <= 5)
70 |                 recall['@10'] += sum(ranks <= 10)
71 |                 recall['@50'] += sum(ranks <= 50)
72 |                 recall['@100'] += sum(ranks <= 100)
73 |                 total += num_samples
74 | 
75 |         if total == 0:
76 |             accuracy = float('nan')
77 |         else:
78 |             accuracy = match / total
79 | 
80 |         return loss, accuracy, {k: v/total for k, v in recall.items()}
81 | 


--------------------------------------------------------------------------------
/noesis-tf/util/blocks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions and components that can be slotted into tensorflow models.
 3 | TODO: Write functions for various types of attention.
 4 | """
 5 | 
 6 | import tensorflow as tf
 7 | 
 8 | 
 9 | def length(sequence):
10 |     """
11 |     Get true length of sequences (without padding), and mask for true-length in max-length.
12 |     Input of shape: (batch_size, max_seq_length, hidden_dim)
13 |     Output shapes,
14 |     length: (batch_size)
15 |     mask: (batch_size, max_seq_length, 1)
16 |     """
17 |     populated = tf.sign(tf.abs(sequence))
18 |     length = tf.cast(tf.reduce_sum(populated, axis=1), tf.int32)
19 |     mask = tf.cast(tf.expand_dims(populated, -1), tf.float32)
20 |     return length, mask
21 | 
22 | 
23 | 
24 | def biLSTM(inputs, dim, seq_len, name):
25 |     """
26 |     A Bi-Directional LSTM layer. Returns forward and backward hidden states as a tuple, and cell states as a tuple.
27 |     Output of hidden states: [(batch_size, max_seq_length, hidden_dim), (batch_size, max_seq_length, hidden_dim)]
28 |     Same shape for cell states.
29 |     """
30 |     with tf.name_scope(name):
31 |         with tf.variable_scope('forward' + name):
32 |             lstm_fwd = tf.contrib.rnn.LSTMCell(num_units=dim)
33 |         with tf.variable_scope('backward' + name):
34 |             lstm_bwd = tf.contrib.rnn.LSTMCell(num_units=dim)
35 | 
36 |         hidden_states, cell_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fwd, cell_bw=lstm_bwd,
37 |                                                                      inputs=inputs, sequence_length=seq_len,
38 |                                                                      dtype=tf.float32, scope=name)
39 | 
40 |     return hidden_states, cell_states
41 | 
42 | 
43 | def LSTM(inputs, dim, seq_len, name):
44 |     """
45 |     An LSTM layer. Returns hidden states and cell states as a tuple.
46 |     Output shape of hidden states: (batch_size, max_seq_length, hidden_dim)
47 |     Same shape for cell states.
48 |     """
49 |     with tf.name_scope(name):
50 |         cell = tf.contrib.rnn.LSTMCell(num_units=dim)
51 |         hidden_states, cell_states = tf.nn.dynamic_rnn(cell, inputs=inputs, sequence_length=seq_len,
52 |                                                        dtype=tf.float32, scope=name)
53 | 
54 |     return hidden_states, cell_states
55 | 
56 | 
57 | def last_output(output, true_length):
58 |     """
59 |     To get the last hidden layer form a dynamically unrolled RNN.
60 |     Input of shape (batch_size, max_seq_length, hidden_dim).
61 |     true_length: Tensor of shape (batch_size). Such a tensor is given by the length() function.
62 |     Output of shape (batch_size, hidden_dim).
63 |     """
64 |     max_length = int(output.get_shape()[1])
65 |     length_mask = tf.expand_dims(tf.one_hot(true_length-1, max_length, on_value=1., off_value=0.), -1)
66 |     last_output = tf.reduce_sum(tf.multiply(output, length_mask), 1)
67 |     return last_output
68 | 
69 | 
70 | def masked_softmax(scores, mask):
71 |     """
72 |     Used to calculate a softmax score with true sequence length (without padding), rather than max-sequence length.
73 |     Input shape: (batch_size, max_seq_length, hidden_dim).
74 |     mask parameter: Tensor of shape (batch_size, max_seq_length). Such a mask is given by the length() function.
75 |     """
76 |     numerator = tf.exp(tf.subtract(scores, tf.reduce_max(scores, 1, keep_dims=True))) * mask
77 |     denominator = tf.reduce_sum(numerator, 1, keep_dims=True)
78 |     weights = tf.div(numerator, denominator)
79 |     return weights


--------------------------------------------------------------------------------
/docs/source/evaluations.rst:
--------------------------------------------------------------------------------
 1 | Evaluations
 2 | ===========
 3 | 
 4 | Metrics
 5 | -------
 6 | 
 7 | For each test instance, we will expect you to return a set of 100 choices (candidate ids) from the set of possible follow-up sentences and a probability distribution over those 100 choices.
 8 | As competition metrics we will compute range of scores, including recall@k, MRR(mean reciprocal rank) and MAP(mean average precision).
 9 | 
10 | Following evaluation metrics will be used to evaluate your submissions.
11 | 
12 |     +----------+------------------------------------------+-----------------------------------------------+
13 |     | Sub-Task | Ubuntu                                   | Advising                                      |
14 |     +==========+==========================================+===============================================+
15 |     | 1        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
16 |     +----------+------------------------------------------+-----------------------------------------------+
17 |     | 2        | Recall @1, Recall @10, Recall @50, MRR   |                                               |
18 |     +----------+------------------------------------------+-----------------------------------------------+
19 |     | 3        |                                          |   Recall @1, Recall @10, Recall @50, MRR, MAP |
20 |     +----------+------------------------------------------+-----------------------------------------------+
21 |     | 4        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
22 |     +----------+------------------------------------------+-----------------------------------------------+
23 |     | 5        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
24 |     +----------+------------------------------------------+-----------------------------------------------+
25 | 
26 | **Note:**
27 | We will evaluate MAP for sub-task 3 with Advising data as the you are supposed to return the correct response and all the paraphrases associated with it.
28 | 
29 | Best Scores
30 | -----------
31 | 
32 | The ranking considers the average of Recall\@10 and MRR. Best Recall\@10 and MRR scores for each subtask is is shown in the below table.
33 | 
34 | Recall\@10
35 | 
36 |     +----------+---------+-----------------+-----------------+
37 |     | Sub-Task | Ubuntu  | Advising-Case-1 | Advising-Case-2 |
38 |     +==========+=========+=================+=================+
39 |     | 1        | 0.902   |    0.85         |   0.63          |
40 |     +----------+---------+-----------------+-----------------+
41 |     | 2        | 0.361   |        NA       |      NA         |
42 |     +----------+---------+-----------------+-----------------+
43 |     | 3        |   NA    |    0.906        |   0.75          |
44 |     +----------+---------+-----------------+-----------------+
45 |     | 4        | 0.739   |    0.652        |  0.508          |
46 |     +----------+---------+-----------------+-----------------+
47 |     | 5        | 0.905   |  0.864          |  0.63           |
48 |     +----------+---------+-----------------+-----------------+
49 | 
50 | 
51 | MRR
52 | 
53 |     +----------+---------+-----------------+-----------------+
54 |     | Sub-Task | Ubuntu  | Advising-Case-1 | Advising-Case-2 |
55 |     +==========+=========+=================+=================+
56 |     | 1        | 0.7350  |    0.6078       |   0.3390        |
57 |     +----------+---------+-----------------+-----------------+
58 |     | 2        | 0.2528  |        NA       |      NA         |
59 |     +----------+---------+-----------------+-----------------+
60 |     | 3        |   NA    |    0.6238       |   0.4341        |
61 |     +----------+---------+-----------------+-----------------+
62 |     | 4        | 0.5891  |    0.3495       |  0.2422         |
63 |     +----------+---------+-----------------+-----------------+
64 |     | 5        | 0.7399  |  0.6455         |  0.3390         |
65 |     +----------+---------+-----------------+-----------------+


--------------------------------------------------------------------------------
/docs/public/_sources/evaluations.rst.txt:
--------------------------------------------------------------------------------
 1 | Evaluations
 2 | ===========
 3 | 
 4 | Metrics
 5 | -------
 6 | 
 7 | For each test instance, we will expect you to return a set of 100 choices (candidate ids) from the set of possible follow-up sentences and a probability distribution over those 100 choices.
 8 | As competition metrics we will compute range of scores, including recall@k, MRR(mean reciprocal rank) and MAP(mean average precision).
 9 | 
10 | Following evaluation metrics will be used to evaluate your submissions.
11 | 
12 |     +----------+------------------------------------------+-----------------------------------------------+
13 |     | Sub-Task | Ubuntu                                   | Advising                                      |
14 |     +==========+==========================================+===============================================+
15 |     | 1        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
16 |     +----------+------------------------------------------+-----------------------------------------------+
17 |     | 2        | Recall @1, Recall @10, Recall @50, MRR   |                                               |
18 |     +----------+------------------------------------------+-----------------------------------------------+
19 |     | 3        |                                          |   Recall @1, Recall @10, Recall @50, MRR, MAP |
20 |     +----------+------------------------------------------+-----------------------------------------------+
21 |     | 4        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
22 |     +----------+------------------------------------------+-----------------------------------------------+
23 |     | 5        | Recall @1, Recall @10, Recall @50, MRR   |   Recall @1, Recall @10, Recall @50, MRR      |
24 |     +----------+------------------------------------------+-----------------------------------------------+
25 | 
26 | **Note:**
27 | We will evaluate MAP for sub-task 3 with Advising data as the you are supposed to return the correct response and all the paraphrases associated with it.
28 | 
29 | Best Scores
30 | -----------
31 | 
32 | The ranking considers the average of Recall\@10 and MRR. Best Recall\@10 and MRR scores for each subtask is is shown in the below table.
33 | 
34 | Recall\@10
35 | 
36 |     +----------+---------+-----------------+-----------------+
37 |     | Sub-Task | Ubuntu  | Advising-Case-1 | Advising-Case-2 |
38 |     +==========+=========+=================+=================+
39 |     | 1        | 0.902   |    0.85         |   0.63          |
40 |     +----------+---------+-----------------+-----------------+
41 |     | 2        | 0.361   |        NA       |      NA         |
42 |     +----------+---------+-----------------+-----------------+
43 |     | 3        |   NA    |    0.906        |   0.75          |
44 |     +----------+---------+-----------------+-----------------+
45 |     | 4        | 0.739   |    0.652        |  0.508          |
46 |     +----------+---------+-----------------+-----------------+
47 |     | 5        | 0.905   |  0.864          |  0.63           |
48 |     +----------+---------+-----------------+-----------------+
49 | 
50 | 
51 | MRR
52 | 
53 |     +----------+---------+-----------------+-----------------+
54 |     | Sub-Task | Ubuntu  | Advising-Case-1 | Advising-Case-2 |
55 |     +==========+=========+=================+=================+
56 |     | 1        | 0.7350  |    0.6078       |   0.3390        |
57 |     +----------+---------+-----------------+-----------------+
58 |     | 2        | 0.2528  |        NA       |      NA         |
59 |     +----------+---------+-----------------+-----------------+
60 |     | 3        |   NA    |    0.6238       |   0.4341        |
61 |     +----------+---------+-----------------+-----------------+
62 |     | 4        | 0.5891  |    0.3495       |  0.2422         |
63 |     +----------+---------+-----------------+-----------------+
64 |     | 5        | 0.7399  |  0.6455         |  0.3390         |
65 |     +----------+---------+-----------------+-----------------+


--------------------------------------------------------------------------------
/noesis/sample.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import logging
 4 | 
 5 | import torch
 6 | from torch.nn import CrossEntropyLoss
 7 | 
 8 | from networks.dual_encoder import Encoder, DualEncoder
 9 | from trainers.supervised_trainer import SupervisedTrainer
10 | from util.checkpoint import Checkpoint
11 | from dataset.dataset import Dataset
12 | from evaluator.evaluator import Evaluator
13 | 
14 | # Sample usage:
15 | #     # training
16 | #     python sample.py --train_path $TRAIN_PATH --dev_path $DEV_PATH --expt_dir $EXPT_PATH
17 | #     # resuming from the latest checkpoint of the experiment
18 | #      python sample.py --train_path $TRAIN_PATH --dev_path $DEV_PATH --expt_dir $EXPT_PATH --resume
19 | #      # resuming from a specific checkpoint
20 | #      python sample.py --train_path $TRAIN_PATH --dev_path $DEV_PATH --expt_dir $EXPT_PATH --load_checkpoint $CHECKPOINT_DIR
21 | 
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('--train_path', action='store', dest='train_path',
24 |                     help='Path to train data')
25 | parser.add_argument('--dev_path', action='store', dest='dev_path',
26 |                     help='Path to dev data')
27 | parser.add_argument('--test_path', action='store', dest='test_path',
28 |                     help='Path to test data')
29 | parser.add_argument('--expt_dir', action='store', dest='expt_dir', default='./experiment',
30 |                     help='Path to experiment directory. If load_checkpoint is True, then path to checkpoint directory has to be provided')
31 | parser.add_argument('--load_checkpoint', action='store', dest='load_checkpoint',
32 |                     help='The name of the checkpoint to load, usually an encoded time string')
33 | parser.add_argument('--resume', action='store_true', dest='resume',
34 |                     default=False,
35 |                     help='Indicates if training has to be resumed from the latest checkpoint')
36 | parser.add_argument('--log-level', dest='log_level',
37 |                     default='info',
38 |                     help='Logging level.')
39 | 
40 | opt = parser.parse_args()
41 | 
42 | LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
43 | logging.basicConfig(format=LOG_FORMAT, level=getattr(logging, opt.log_level.upper()))
44 | logging.info(opt)
45 | 
46 | if opt.load_checkpoint is not None:
47 |     logging.info("loading checkpoint from {}".format(os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
48 |     checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
49 |     checkpoint = Checkpoint.load(checkpoint_path)
50 |     dual_encoder = checkpoint.model
51 |     vocab = checkpoint.vocab
52 | else:
53 |     # Prepare dataset
54 |     train = Dataset.from_file(opt.train_path)
55 |     dev = Dataset.from_file(opt.dev_path, vocab=train.vocab)
56 |     vocab = train.vocab
57 |     max_len = 500
58 | 
59 |     # Prepare loss
60 |     loss_func = CrossEntropyLoss()
61 |     if torch.cuda.is_available():
62 |         loss_func.cuda()
63 | 
64 |     optimizer = None
65 |     if not opt.resume:
66 |         # Initialize model
67 |         hidden_size = 128
68 |         bidirectional = True
69 |         context_encoder = Encoder(vocab.get_vocab_size(), max_len, hidden_size,
70 |                              bidirectional=bidirectional, variable_lengths=True)
71 |         response_encoder = Encoder(vocab.get_vocab_size(), max_len, hidden_size,
72 |                              bidirectional=bidirectional, variable_lengths=True)
73 | 
74 |         dual_encoder = DualEncoder(context_encoder, response_encoder)
75 |         if torch.cuda.is_available():
76 |             dual_encoder.cuda()
77 | 
78 |         for param in dual_encoder.parameters():
79 |             param.data.uniform_(-0.08, 0.08)
80 | 
81 |     # train
82 |     t = SupervisedTrainer(loss_func=loss_func, batch_size=1,
83 |                           checkpoint_every=30,
84 |                           print_every=100, expt_dir=opt.expt_dir)
85 | 
86 |     t.train(dual_encoder, train, batch_size=1, num_epochs=20, dev_data=dev, optimizer=optimizer, resume=opt.resume)
87 | 
88 |     evaluator = Evaluator(batch_size=1)
89 |     l, precision, recall = evaluator.evaluate(dual_encoder, dev)
90 |     print("Precision: {}, Recall: {}".format(precision, recall))
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/docs/public/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | /* sphinx_rtd_theme version 0.4.1 | MIT license */
2 | /* Built 20180727 10:07 */
3 | require=function n(e,i,t){function o(s,a){if(!i[s]){if(!e[s]){var l="function"==typeof require&&require;if(!a&&l)return l(s,!0);if(r)return r(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var u=i[s]={exports:{}};e[s][0].call(u.exports,function(n){var i=e[s][1][n];return o(i||n)},u,u.exports,n,e,i,t)}return i[s].exports}for(var r="function"==typeof require&&require,s=0;s<t.length;s++)o(t[s]);return o}({"sphinx-rtd-theme":[function(n,e,i){var jQuery="undefined"!=typeof window?window.jQuery:n("jquery");e.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var e=this;void 0===n&&(n=!0),e.isRunning||(e.isRunning=!0,jQuery(function(i){e.init(i),e.reset(),e.win.on("hashchange",e.reset),n&&e.win.on("scroll",function(){e.linkScroll||e.winScroll||(e.winScroll=!0,requestAnimationFrame(function(){e.onScroll()}))}),e.win.on("resize",function(){e.winResize||(e.winResize=!0,requestAnimationFrame(function(){e.onResize()}))}),e.onResize()}))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")}).on("click",".wy-menu-vertical .current ul li a",function(){var i=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(i),e.hashChange()}).on("click","[data-toggle='rst-current-version']",function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")}),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var i=n(this);expand=n('<span class="toctree-expand"></span>'),expand.on("click",function(n){return e.toggleCurrent(i),n.stopPropagation(),!1}),i.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}i.length>0&&($(".wy-menu-vertical .current").removeClass("current"),i.addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l1").parent().addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l2").addClass("current"),i.closest("li.toctree-l3").addClass("current"),i.closest("li.toctree-l4").addClass("current"))}catch(o){console.log("Error expanding nav for anchor",o)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,i=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],i=0;i<e.length&&!window.requestAnimationFrame;++i)window.requestAnimationFrame=window[e[i]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[i]+"CancelAnimationFrame"]||window[e[i]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,i){var t=(new Date).getTime(),o=Math.max(0,16-(t-n)),r=window.setTimeout(function(){e(t+o)},o);return n=t+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()},{jquery:"jquery"}]},{},["sphinx-rtd-theme"]);


--------------------------------------------------------------------------------
/docs/public/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/evaluations/metrics.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import ijson
  4 | from collections import OrderedDict
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def read_predictions(filename):
 10 |     predictions = OrderedDict()
 11 |     with open(filename, 'r') as fp:
 12 |         for item in ijson.items(fp, 'item'):
 13 |             predictions[item['example-id']] = [candidate['candidate-id'] for candidate in item['candidate-ranking']]
 14 |     return predictions
 15 | 
 16 | 
 17 | def read_targets(filename):
 18 |     targets = OrderedDict()
 19 |     with open(filename, 'r') as fp:
 20 |         for line in fp:
 21 |             line = line.rstrip('\n').split(sep='\t')
 22 |             targets[str(line[0])] = [str(target).upper() for target in line[1].split(sep=',')]
 23 |     return targets
 24 | 
 25 | 
 26 | def rank(src, tgt):
 27 |     """
 28 |     The function calculates rank for each prediction given target
 29 | 
 30 |     Args:
 31 |         src (dict): predictions by the model
 32 |         tgt (dict): ground truth/ targets
 33 | 
 34 |     Returns:
 35 |          ranks (list): rank of a correct responses (default = 0)
 36 |     """
 37 |     ranks = []
 38 |     for idx, target in tgt.items():
 39 |         ranks.append(0)
 40 |         try:
 41 |             predictions = src[idx]
 42 |             for i, entry in enumerate(predictions):
 43 |                 if entry in target:
 44 |                     ranks[-1] = i + 1
 45 |                     break
 46 |         except KeyError:
 47 |             msg = "No matching entry found for test case with dialog-id {}".format(idx)
 48 |             logging.warning(msg)
 49 | 
 50 |     return ranks
 51 | 
 52 | 
 53 | def calculate_recall(ranks):
 54 |     """
 55 |     The function calculates recall at different cutoff points.
 56 | 
 57 |     Args:
 58 |         ranks (list): represents position of a correct response for given problem
 59 |     """
 60 |     ranks = np.array(ranks)
 61 |     nonzero = ranks[np.nonzero(ranks)]
 62 | 
 63 |     total = len(ranks)
 64 |     result = dict()
 65 |     result["R@1"] = len(np.flatnonzero(nonzero <= 1)) / total
 66 |     result["R@10"] = len(np.flatnonzero(nonzero <= 10)) / total
 67 |     result["R@50"] = len(np.flatnonzero(nonzero <= 50)) / total
 68 |     result["R@100"] = len(nonzero) / total
 69 |     logging.info(result)
 70 | 
 71 | 
 72 | def calculate_MRR(ranks):
 73 |     """
 74 |     The function calculate Mean Reciprocal Rank (MRR).
 75 |     Args:
 76 |         ranks (list): represents position of a correct response for given problem
 77 |     """
 78 |     ranks = np.array(ranks)
 79 |     idx = np.nonzero(ranks)
 80 |     msg = "Mean Reciprocal Rank (MRR): {}".format((sum(1.0 / ranks[idx])) / len(ranks))
 81 |     logging.info(msg)
 82 | 
 83 | 
 84 | def calculate_MAP(src, tgt):
 85 |     """
 86 |     The function calculate Mean Average Precision (MAP).
 87 |     Args:
 88 |         src (dict): predictions by the model
 89 |         tgt (dict): ground truth/ targets
 90 |     """
 91 |     avg_precision = list()
 92 |     for idx, targets in tgt.items():
 93 |         try:
 94 |             predictions = src[idx]
 95 |             precision = list()
 96 |             for i, target in enumerate(targets):
 97 |                 try:
 98 |                     precision.append(((i + 1) / (predictions.index(target) + 1)))
 99 |                 except ValueError:
100 |                     msg = "Answer: {} isn't part of the predictions by the model.".format(target)
101 |                     logging.warning(msg)
102 | 
103 |             avg_precision.append(sum(precision) / len(targets))
104 |         except KeyError:
105 |             msg = "No matching entry found for test case with dialog-id {}".format(idx)
106 |             logging.warning(msg)
107 | 
108 |     map = sum(avg_precision)/len(tgt)
109 |     msg = "Mean Average Precision (MAP): {}".format(map)
110 |     logging.info(msg)
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     parser = argparse.ArgumentParser()
115 |     parser.add_argument('--predictions', action='store', dest='predictions',
116 |                         help='Path to predictions file in the requested format.')
117 |     parser.add_argument('--targets', action='store', dest='targets',
118 |                         help='Path to ground truth/targets file.')
119 |     parser.add_argument('--log-level', dest='log_level',
120 |                         default='debug',
121 |                         help='Logging level.')
122 | 
123 |     opt = parser.parse_args()
124 | 
125 |     LOG_FORMAT = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
126 |     logging.basicConfig(format=LOG_FORMAT, level=getattr(logging, opt.log_level.upper()))
127 |     logging.info(opt)
128 | 
129 |     predictions = read_predictions(opt.predictions)
130 |     targets = read_targets(opt.targets)
131 | 
132 |     ranks = rank(predictions, targets)
133 |     calculate_recall(ranks)
134 |     calculate_MRR(ranks)
135 |     calculate_MAP(predictions, targets)
136 | 


--------------------------------------------------------------------------------
/noesis-tf/models/dual_encoder.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from models import helpers
  3 | 
  4 | FLAGS = tf.flags.FLAGS
  5 | 
  6 | 
  7 | def get_embeddings(hparams):
  8 |     if hparams.glove_path and hparams.vocab_path:
  9 |         tf.logging.info("Loading Glove embeddings...")
 10 |         vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
 11 |         glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array))
 12 |         initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors,
 13 |                                                              hparams.embedding_dim)
 14 |     else:
 15 |         tf.logging.info("No glove/vocab path specificed, starting with random embeddings.")
 16 |         initializer = tf.random_uniform_initializer(-0.25, 0.25)
 17 | 
 18 |     if hparams.glove_path and hparams.vocab_path:
 19 |         return tf.get_variable(
 20 |             "word_embeddings",
 21 |             initializer=initializer)
 22 |     elif hparams.vocab_path:
 23 |         vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
 24 |         return tf.get_variable(
 25 |             "word_embeddings",
 26 |             shape=[len(vocab_dict), hparams.embedding_dim],
 27 |             initializer=initializer)
 28 |     else:
 29 |         return tf.get_variable(
 30 |             "word_embeddings",
 31 |             shape=[hparams.vocab_size, hparams.embedding_dim],
 32 |             initializer=initializer)
 33 | 
 34 | 
 35 | def dual_encoder_model(
 36 |         hparams,
 37 |         mode,
 38 |         context,
 39 |         context_len,
 40 |         utterances,
 41 |         utterances_len,
 42 |         targets,
 43 |         batch_size):
 44 |     # Initialize embeddings randomly or with pre-trained vectors if available
 45 |     embeddings_W = get_embeddings(hparams)
 46 | 
 47 |     # Embed the context and the utterance
 48 |     context_embedded = tf.nn.embedding_lookup(
 49 |         embeddings_W, context, name="embed_context")
 50 |     utterances_embedded = tf.nn.embedding_lookup(
 51 |         embeddings_W, utterances, name="embed_utterance")
 52 | 
 53 | 
 54 |     # Build the Context Encoder RNN
 55 |     with tf.variable_scope("encoder-rnn") as vs:
 56 |         # We use an LSTM Cell
 57 |         cell_context = tf.nn.rnn_cell.LSTMCell(
 58 |             hparams.rnn_dim,
 59 |             forget_bias=2.0,
 60 |             use_peepholes=True,
 61 |             state_is_tuple=True)
 62 | 
 63 |         # Run context through the RNN
 64 |         context_encoded_outputs, context_encoded = tf.nn.dynamic_rnn(cell_context, context_embedded,
 65 |                                                                             context_len, dtype=tf.float32)
 66 | 
 67 |     # Build the Utterance Encoder RNN
 68 |     with tf.variable_scope("decoder-rnn") as vs:
 69 |         # We use an LSTM Cell
 70 |         cell_utterance = tf.nn.rnn_cell.LSTMCell(
 71 |             hparams.rnn_dim,
 72 |             forget_bias=2.0,
 73 |             use_peepholes=True,
 74 |             state_is_tuple=True)
 75 |         # Run all utterances through the RNN batch by batch
 76 |         # TODO: Needs to be parallelized
 77 |         all_utterances_encoded = []
 78 |         for i in range(batch_size):
 79 |             temp_outputs, temp_states = tf.nn.dynamic_rnn(cell_utterance, utterances_embedded[:,i],
 80 |                                                           utterances_len[i], dtype=tf.float32)
 81 |             all_utterances_encoded.append(temp_states[1]) # since it's a tuple, use the hidden states
 82 | 
 83 |         all_utterances_encoded = tf.stack(all_utterances_encoded, axis=0)
 84 | 
 85 |     with tf.variable_scope("prediction") as vs:
 86 |         M = tf.get_variable("M",
 87 |                             shape=[hparams.rnn_dim, hparams.rnn_dim],
 88 |                             initializer=tf.truncated_normal_initializer())
 89 | 
 90 |         # "Predict" a  response: c * M
 91 |         generated_response = tf.matmul(context_encoded[1], M) # using the hidden states
 92 |         generated_response = tf.expand_dims(generated_response, 1)
 93 |         all_utterances_encoded = tf.transpose(all_utterances_encoded, perm=[0, 2, 1]) # transpose last two dimensions
 94 | 
 95 |         # Dot product between generated response and actual response
 96 |         # (c * M) * r
 97 |         logits = tf.matmul(generated_response, all_utterances_encoded)
 98 |         logits = tf.squeeze(logits, [1])
 99 | 
100 |         # Apply sigmoid to convert logits to probabilities
101 |         probs = tf.nn.softmax(logits)
102 | 
103 |         if mode == tf.contrib.learn.ModeKeys.INFER:
104 |             return probs, None
105 | 
106 |         # Calculate the binary cross-entropy loss
107 |         losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.squeeze(targets))
108 | 
109 |     # Mean loss across the batch of examples
110 |     mean_loss = tf.reduce_mean(losses, name="mean_loss")
111 |     return probs, mean_loss
112 | 


--------------------------------------------------------------------------------
/docs/public/genindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | <!DOCTYPE html>
  5 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  6 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  7 | <head>
  8 |   <meta charset="utf-8">
  9 |   
 10 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 11 |   
 12 |   <title>Index &mdash; dstc7-noesis 0.0.1 documentation</title>
 13 |   
 14 | 
 15 |   
 16 |   
 17 |   
 18 |   
 19 | 
 20 |   
 21 |   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
 22 |   
 23 |     
 24 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 25 |         <script type="text/javascript" src="_static/jquery.js"></script>
 26 |         <script type="text/javascript" src="_static/underscore.js"></script>
 27 |         <script type="text/javascript" src="_static/doctools.js"></script>
 28 |         <script type="text/javascript" src="_static/language_data.js"></script>
 29 |     
 30 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 31 | 
 32 |     
 33 | 
 34 |   
 35 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 36 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 37 |     <link rel="index" title="Index" href="#" />
 38 |     <link rel="search" title="Search" href="search.html" /> 
 39 | </head>
 40 | 
 41 | <body class="wy-body-for-nav">
 42 | 
 43 |    
 44 |   <div class="wy-grid-for-nav">
 45 |     
 46 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 47 |       <div class="wy-side-scroll">
 48 |         <div class="wy-side-nav-search" >
 49 |           
 50 | 
 51 |           
 52 |             <a href="index.html" class="icon icon-home"> dstc7-noesis
 53 |           
 54 | 
 55 |           
 56 |           </a>
 57 | 
 58 |           
 59 |             
 60 |             
 61 |           
 62 | 
 63 |           
 64 | <div role="search">
 65 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 66 |     <input type="text" name="q" placeholder="Search docs" />
 67 |     <input type="hidden" name="check_keywords" value="yes" />
 68 |     <input type="hidden" name="area" value="default" />
 69 |   </form>
 70 | </div>
 71 | 
 72 |           
 73 |         </div>
 74 | 
 75 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 76 |           
 77 |             
 78 |             
 79 |               
 80 |             
 81 |             
 82 |               <ul>
 83 | <li class="toctree-l1"><a class="reference internal" href="data_description.html">Data Description</a></li>
 84 | <li class="toctree-l1"><a class="reference internal" href="datasets.html">Datasets</a></li>
 85 | <li class="toctree-l1"><a class="reference internal" href="evaluations.html">Evaluations</a></li>
 86 | <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQs</a></li>
 87 | <li class="toctree-l1"><a class="reference internal" href="submission.html">Submission</a></li>
 88 | </ul>
 89 | 
 90 |             
 91 |           
 92 |         </div>
 93 |       </div>
 94 |     </nav>
 95 | 
 96 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 97 | 
 98 |       
 99 |       <nav class="wy-nav-top" aria-label="top navigation">
100 |         
101 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
102 |           <a href="index.html">dstc7-noesis</a>
103 |         
104 |       </nav>
105 | 
106 | 
107 |       <div class="wy-nav-content">
108 |         
109 |         <div class="rst-content">
110 |         
111 |           
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | <div role="navigation" aria-label="breadcrumbs navigation">
128 | 
129 |   <ul class="wy-breadcrumbs">
130 |     
131 |       <li><a href="index.html">Docs</a> &raquo;</li>
132 |         
133 |       <li>Index</li>
134 |     
135 |     
136 |       <li class="wy-breadcrumbs-aside">
137 |         
138 |             
139 |         
140 |       </li>
141 |     
142 |   </ul>
143 | 
144 |   
145 |   <hr/>
146 | </div>
147 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
148 |            <div itemprop="articleBody">
149 |             
150 | 
151 | <h1 id="index">Index</h1>
152 | 
153 | <div class="genindex-jumpbox">
154 |  
155 | </div>
156 | 
157 | 
158 |            </div>
159 |            
160 |           </div>
161 |           <footer>
162 |   
163 | 
164 |   <hr/>
165 | 
166 |   <div role="contentinfo">
167 |     <p>
168 |         &copy; Copyright 2018, International Business Machines Corp
169 | 
170 |     </p>
171 |   </div>
172 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
173 | 
174 | </footer>
175 | 
176 |         </div>
177 |       </div>
178 | 
179 |     </section>
180 | 
181 |   </div>
182 |   
183 | 
184 | 
185 |   <script type="text/javascript">
186 |       jQuery(function () {
187 |           SphinxRtdTheme.Navigation.enable(true);
188 |       });
189 |   </script>
190 | 
191 |   
192 |   
193 |     
194 |    
195 | 
196 | </body>
197 | </html>


--------------------------------------------------------------------------------
/noesis/util/checkpoint.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import time
  4 | import shutil
  5 | 
  6 | import torch
  7 | import dill
  8 | 
  9 | 
 10 | class Checkpoint(object):
 11 |     """
 12 |     The Checkpoint class manages the saving and loading of a model during training. It allows training to be suspended
 13 |     and resumed at a later time (e.g. when running on a cluster using sequential jobs).
 14 | 
 15 |     To make a checkpoint, initialize a Checkpoint object with the following args; then call that object's save() method
 16 |     to write parameters to disk.
 17 | 
 18 |     Args:
 19 |         model (seq2seq): seq2seq model being trained
 20 |         optimizer (Optimizer): stores the state of the optimizer
 21 |         epoch (int): current epoch (an epoch is a loop through the full training data)
 22 |         step (int): number of examples seen within the current epoch
 23 |         vocab (Vocabulary): vocabulary for the input language
 24 | 
 25 |     Attributes:
 26 |         CHECKPOINT_DIR_NAME (str): name of the checkpoint directory
 27 |         TRAINER_STATE_NAME (str): name of the file storing trainer states
 28 |         MODEL_NAME (str): name of the file storing model
 29 |         VOCAB_FILE (str): name of the input vocab file
 30 |     """
 31 | 
 32 |     CHECKPOINT_DIR_NAME = 'checkpoints'
 33 |     TRAINER_STATE_NAME = 'trainer_states.pt'
 34 |     MODEL_NAME = 'model.pt'
 35 |     VOCAB_FILE = 'vocab.pt'
 36 | 
 37 |     def __init__(self, model, optimizer, epoch, step, vocab, path=None):
 38 |         self.model = model
 39 |         self.optimizer = optimizer
 40 |         self.vocab = vocab
 41 |         self.epoch = epoch
 42 |         self.step = step
 43 |         self._path = path
 44 | 
 45 |     @property
 46 |     def path(self):
 47 |         if self._path is None:
 48 |             raise LookupError("The checkpoint has not been saved.")
 49 |         return self._path
 50 | 
 51 |     def save(self, experiment_dir):
 52 |         """
 53 |         Saves the current model and related training parameters into a subdirectory of the checkpoint directory.
 54 |         The name of the subdirectory is the current local time in Y_M_D_H_M_S format.
 55 | 
 56 |         Args:
 57 |             experiment_dir (str): path to the experiment root directory
 58 | 
 59 |         Returns:
 60 |              str: path to the saved checkpoint subdirectory
 61 |         """
 62 |         date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime())
 63 | 
 64 |         self._path = os.path.join(experiment_dir, self.CHECKPOINT_DIR_NAME, date_time)
 65 |         path = self._path
 66 | 
 67 |         if os.path.exists(path):
 68 |             shutil.rmtree(path)
 69 |         os.makedirs(path)
 70 |         torch.save({'epoch': self.epoch,
 71 |                     'step': self.step,
 72 |                     'optimizer': self.optimizer
 73 |                    },
 74 |                    os.path.join(path, self.TRAINER_STATE_NAME))
 75 |         torch.save(self.model, os.path.join(path, self.MODEL_NAME))
 76 | 
 77 |         with open(os.path.join(path, self.VOCAB_FILE), 'wb') as fout:
 78 |             dill.dump(self.vocab, fout)
 79 | 
 80 |         return path
 81 | 
 82 |     @classmethod
 83 |     def load(cls, path):
 84 |         """
 85 |         Loads a Checkpoint object that was previously saved to disk.
 86 | 
 87 |         Args:
 88 |             path (str): path to the checkpoint subdirectory
 89 | 
 90 |         Returns:
 91 |             checkpoint (Checkpoint): checkpoint object with fields copied from those stored on disk
 92 |         """
 93 |         if torch.cuda.is_available():
 94 |             resume_checkpoint = torch.load(os.path.join(path, cls.TRAINER_STATE_NAME))
 95 |             model = torch.load(os.path.join(path, cls.MODEL_NAME))
 96 |         else:
 97 |             resume_checkpoint = torch.load(os.path.join(path, cls.TRAINER_STATE_NAME), map_location=lambda storage, loc: storage)
 98 |             model = torch.load(os.path.join(path, cls.MODEL_NAME), map_location=lambda storage, loc: storage)
 99 | 
100 |         model.flatten_parameters() # make RNN parameters contiguous
101 |         with open(os.path.join(path, cls.VOCAB_FILE), 'rb') as fin:
102 |             vocab = dill.load(fin)
103 |         optimizer = resume_checkpoint['optimizer']
104 |         return Checkpoint(model=model, vocab=vocab,
105 |                           optimizer=optimizer,
106 |                           epoch=resume_checkpoint['epoch'],
107 |                           step=resume_checkpoint['step'],
108 |                           path=path)
109 | 
110 |     @classmethod
111 |     def get_latest_checkpoint(cls, experiment_path):
112 |         """
113 |         Given the path to an experiment directory, returns the path to the last saved checkpoint's subdirectory.
114 | 
115 |         Precondition: at least one checkpoint has been made (i.e., latest checkpoint subdirectory exists).
116 | 
117 |         Args:
118 |             experiment_path (str): path to the experiment directory
119 | 
120 |         Returns:
121 |              str: path to the last saved checkpoint's subdirectory
122 |         """
123 |         checkpoints_path = os.path.join(experiment_path, cls.CHECKPOINT_DIR_NAME)
124 |         all_times = sorted(os.listdir(checkpoints_path), reverse=True)
125 |         return os.path.join(checkpoints_path, all_times[0])
126 | 


--------------------------------------------------------------------------------
/noesis/dataset/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import ijson
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | 
  8 | 
  9 | def space_tokenize(text):
 10 |     """
 11 |     Tokenizes a piece of text by splitting it up based on single spaces (" ").
 12 |     Args:
 13 |      text (str): input text as a single string
 14 |     Returns:
 15 |          list(str): list of tokens obtained by splitting the text on single spaces
 16 |     """
 17 |     return text.split(" ")
 18 | 
 19 | 
 20 | def read_json(input_file):
 21 |     json_objects_lst = list()
 22 |     json_objects = ijson.items(input_file, 'item')
 23 |     for obj in json_objects:
 24 |         json_objects_lst.append(obj)
 25 |     return json_objects_lst
 26 | 
 27 | 
 28 | def prepare_data(path, tokenize_func=space_tokenize, format='JSON'):
 29 |     """
 30 |     Reads a tab-separated data file where each line contains a source sentence and a target sentence. Pairs containing
 31 |     a sentence that exceeds the maximum length allowed for its language are not added.
 32 |     Args:
 33 |         path (str): path to the data file
 34 |         tokenize_func (func): function for splitting words in a sentence (default is single-space-delimited)
 35 |         format (str): data format for input file. Default is JSON.
 36 |     Returns:
 37 |         list((str, list(str)), str): list of ((context, list of candidates), target) string pairs
 38 |     """
 39 | 
 40 |     logger.info("Reading Lines from {}".format(path))
 41 |     # Read the file and split into lines
 42 |     pairs = []
 43 |     with open(path, 'r') as fin:
 44 |         if format == 'JSON':
 45 |             pairs = process(read_json(fin), tokenize_func)
 46 |         elif format == 'CSV':
 47 |             pairs = read(fin, ",", tokenize_func)
 48 |         elif format == 'TSV':
 49 |             pairs = read(fin, ",", tokenize_func)
 50 | 
 51 |     logger.info("Number of pairs: %s" % len(pairs))
 52 |     return sort(pairs)
 53 | 
 54 | 
 55 | def sort(pairs):
 56 |     records = []
 57 |     for (context, candidates), target in pairs:
 58 |         sorted_candidates = list()
 59 |         tmp = [(i, v) for i, v in enumerate(candidates)]
 60 |         tmp.sort(key=lambda s: len(s[1]), reverse=True)
 61 |         for i, (idx, cand) in enumerate(tmp):
 62 |             if idx == target:
 63 |                 target = i
 64 |             sorted_candidates.append(cand)
 65 |         records.append(((context, sorted_candidates), target))
 66 | 
 67 |     records.sort(key=lambda s: len(s[0][0]), reverse=True)
 68 |     return records
 69 | 
 70 | def read(fin, delimiter, tokenize_func):
 71 |     pairs = []
 72 |     for line in tqdm(fin):
 73 |         try:
 74 |             src, dst = line.strip().split(delimiter)
 75 |             pair = map(tokenize_func, [src, dst])
 76 |             pairs.append(pair)
 77 |         except:
 78 |             logger.error("Error when reading line: {0}".format(line))
 79 |             raise
 80 |     return pairs
 81 | 
 82 | 
 83 | def process(records, tokenize_func):
 84 |     pairs = []
 85 |     for record in records:
 86 |         context = ""
 87 |         speaker = None
 88 |         for msg in record['messages-so-far']:
 89 |             if speaker is None:
 90 |                 context += msg['utterance'] + " __eou__ "
 91 |                 speaker = msg['speaker']
 92 |             elif speaker != msg['speaker']:
 93 |                 context += "__eot__ " + msg['utterance'] + " __eou__ "
 94 |                 speaker = msg['speaker']
 95 |             else:
 96 |                 context += msg['utterance'] + " __eou__ "
 97 | 
 98 |         context += "__eot__"
 99 | 
100 |         # Create the next utterance options and the target label
101 |         candidates = []
102 |         correct_answer = record['options-for-correct-answers'][0]
103 |         target_id = correct_answer['candidate-id']
104 |         tgt = None
105 |         for i, candidate in enumerate(record['options-for-next']):
106 |             if candidate['candidate-id'] == target_id:
107 |                 tgt = i
108 |             candidates.append(tokenize_func(candidate['utterance']))
109 | 
110 |         if tgt is None:
111 |             logger.info(
112 |                 'Correct answer not found in options-for-next - example {}. Setting 0 as the correct index'.format(
113 |                     record['example-id']))
114 |             tgt = 0
115 |         else:
116 |             pairs.append(((tokenize_func(context), candidates), tgt))
117 | 
118 |     return pairs
119 | 
120 | 
121 | def read_vocabulary(path, max_num_vocab=50000):
122 |     """
123 |     Helper function to read a vocabulary file.
124 |     Args:
125 |         path (str): filepath to raw vocabulary file
126 |         max_num_vocab (int): maximum number of words to read from vocabulary file
127 |     Returns:
128 |         set: read words from vocabulary file
129 |     """
130 |     logger.info("Reading vocabulary from {}".format(path))
131 |     # Read the file and create list of tokens in vocabulary
132 |     vocab = set()
133 |     with open(path) as fin:
134 |         for line in fin:
135 |             if len(vocab) >= max_num_vocab:
136 |                 break
137 |             try:
138 |                 vocab.add(line.strip())
139 |             except:
140 |                 logger.error("Error when reading line: {0}".format(line))
141 |                 raise
142 | 
143 |     logger.info("Size of Vocabulary: %s" % len(vocab))
144 |     return vocab


--------------------------------------------------------------------------------
/docs/public/search.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  5 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  6 | <head>
  7 |   <meta charset="utf-8">
  8 |   
  9 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 10 |   
 11 |   <title>Search &mdash; dstc7-noesis 0.0.1 documentation</title>
 12 |   
 13 | 
 14 |   
 15 |   
 16 |   
 17 |   
 18 | 
 19 |   
 20 |   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
 21 |   
 22 |     
 23 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 24 |         <script type="text/javascript" src="_static/jquery.js"></script>
 25 |         <script type="text/javascript" src="_static/underscore.js"></script>
 26 |         <script type="text/javascript" src="_static/doctools.js"></script>
 27 |         <script type="text/javascript" src="_static/language_data.js"></script>
 28 |         <script type="text/javascript" src="_static/searchtools.js"></script>
 29 |     
 30 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 31 | 
 32 |     
 33 | 
 34 |   
 35 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 36 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 37 |     <link rel="index" title="Index" href="genindex.html" />
 38 |     <link rel="search" title="Search" href="#" /> 
 39 | </head>
 40 | 
 41 | <body class="wy-body-for-nav">
 42 | 
 43 |    
 44 |   <div class="wy-grid-for-nav">
 45 |     
 46 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 47 |       <div class="wy-side-scroll">
 48 |         <div class="wy-side-nav-search" >
 49 |           
 50 | 
 51 |           
 52 |             <a href="index.html" class="icon icon-home"> dstc7-noesis
 53 |           
 54 | 
 55 |           
 56 |           </a>
 57 | 
 58 |           
 59 |             
 60 |             
 61 |           
 62 | 
 63 |           
 64 | <div role="search">
 65 |   <form id="rtd-search-form" class="wy-form" action="#" method="get">
 66 |     <input type="text" name="q" placeholder="Search docs" />
 67 |     <input type="hidden" name="check_keywords" value="yes" />
 68 |     <input type="hidden" name="area" value="default" />
 69 |   </form>
 70 | </div>
 71 | 
 72 |           
 73 |         </div>
 74 | 
 75 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 76 |           
 77 |             
 78 |             
 79 |               
 80 |             
 81 |             
 82 |               <ul>
 83 | <li class="toctree-l1"><a class="reference internal" href="data_description.html">Data Description</a></li>
 84 | <li class="toctree-l1"><a class="reference internal" href="datasets.html">Datasets</a></li>
 85 | <li class="toctree-l1"><a class="reference internal" href="evaluations.html">Evaluations</a></li>
 86 | <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQs</a></li>
 87 | <li class="toctree-l1"><a class="reference internal" href="submission.html">Submission</a></li>
 88 | </ul>
 89 | 
 90 |             
 91 |           
 92 |         </div>
 93 |       </div>
 94 |     </nav>
 95 | 
 96 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 97 | 
 98 |       
 99 |       <nav class="wy-nav-top" aria-label="top navigation">
100 |         
101 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
102 |           <a href="index.html">dstc7-noesis</a>
103 |         
104 |       </nav>
105 | 
106 | 
107 |       <div class="wy-nav-content">
108 |         
109 |         <div class="rst-content">
110 |         
111 |           
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | <div role="navigation" aria-label="breadcrumbs navigation">
128 | 
129 |   <ul class="wy-breadcrumbs">
130 |     
131 |       <li><a href="index.html">Docs</a> &raquo;</li>
132 |         
133 |       <li>Search</li>
134 |     
135 |     
136 |       <li class="wy-breadcrumbs-aside">
137 |         
138 |             
139 |         
140 |       </li>
141 |     
142 |   </ul>
143 | 
144 |   
145 |   <hr/>
146 | </div>
147 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
148 |            <div itemprop="articleBody">
149 |             
150 |   <noscript>
151 |   <div id="fallback" class="admonition warning">
152 |     <p class="last">
153 |       Please activate JavaScript to enable the search
154 |       functionality.
155 |     </p>
156 |   </div>
157 |   </noscript>
158 | 
159 |   
160 |   <div id="search-results">
161 |   
162 |   </div>
163 | 
164 |            </div>
165 |            
166 |           </div>
167 |           <footer>
168 |   
169 | 
170 |   <hr/>
171 | 
172 |   <div role="contentinfo">
173 |     <p>
174 |         &copy; Copyright 2018, International Business Machines Corp
175 | 
176 |     </p>
177 |   </div>
178 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
179 | 
180 | </footer>
181 | 
182 |         </div>
183 |       </div>
184 | 
185 |     </section>
186 | 
187 |   </div>
188 |   
189 | 
190 | 
191 |   <script type="text/javascript">
192 |       jQuery(function () {
193 |           SphinxRtdTheme.Navigation.enable(true);
194 |       });
195 |   </script>
196 | 
197 |   
198 |   
199 |     
200 |   
201 |   <script type="text/javascript">
202 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
203 |   </script>
204 |   
205 |   <script type="text/javascript" id="searchindexloader"></script>
206 |    
207 | 
208 | 
209 | </body>
210 | </html>


--------------------------------------------------------------------------------
/docs/public/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({docnames:["data_description","datasets","evaluations","faq","index","submission"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":1,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:55},filenames:["data_description.rst","datasets.rst","evaluations.rst","faq.rst","index.rst","submission.rst"],objects:{},objnames:{},objtypes:{},terms:{"10th":3,"16th":3,"1st":3,"case":[0,2,4],"class":0,"function":3,"import":0,"new":0,"null":0,"return":[2,3,5],But:3,For:[0,2,3,5],Not:3,One:0,The:[0,1,2,3,5],These:0,_subtask_:5,aaaaaa:5,about:0,academ:0,access:0,accuraci:0,across:1,act:0,addit:0,addition:1,address:5,admincommand:0,advis:[1,2,5],advising_st1_case1_ground_truth:1,advising_st1_case1_test:1,advising_st1_case2_ground_truth:1,advising_st1_case2_test:1,advising_st1_train:1,advising_st1_valid:1,advising_st3_case1_ground_truth:1,advising_st3_case1_test:1,advising_st3_case2_ground_truth:1,advising_st3_case2_test:1,advising_st3_train:1,advising_st3_valid:1,advising_st4_case1_ground_truth:1,advising_st4_case1_test:1,advising_st4_case2_ground_truth:1,advising_st4_case2_test:1,advising_st4_train:1,advising_st4_valid:1,advisor:0,after:0,aggreg:0,alia:0,all:[0,2,3,4],also:0,amaroqwolf:0,ani:[1,3],answer:0,appropri:0,apt:0,area:0,ask:0,associ:2,atm:0,autom:3,avail:[0,3,4,5],averag:[0,2],back:[0,3],base:4,baselin:4,bash:0,bbb:5,befor:[1,3],below:[0,2],best:0,better:0,between:0,bot:0,both:3,can:[0,1,3],candid:[0,1,2,3,4,5],candidate_pool:1,categori:0,cccccc:5,challeng:[0,3],chanc:3,channel:0,check:3,choic:[2,3],chroot:0,chulaka:[4,5],clariti:0,clarityr:0,classsiz:0,code:3,collect:0,com:5,come:0,competit:[0,2,3],complet:0,compon:3,comput:[0,2],confid:5,consid:[0,2],contain:[0,5],contest:0,context:0,continu:0,convers:[0,4],correct:[0,2,4,5],correspond:5,could:0,cours:0,course_inform:1,coursetitl:0,creat:0,creation:0,credit:0,current:0,curriculum:0,data:[1,2,4],databas:0,dataset:[0,3,4,5],date:3,daysofclass:0,ddd:5,deadlin:3,degre:0,descript:4,detail:3,develop:3,dialog:[0,1,4],difficulti:0,digit:0,directori:5,discourag:3,discuss:0,disentangl:0,distribut:[2,3],doe:0,domain:0,don:3,download:1,dstc7:3,dstc7_track1_submiss:5,dstc:4,dure:0,each:[0,2,3,5],easinessr:0,either:[0,5],els:0,email:5,embed:[0,3],end:[0,3],endtim:0,etc:0,evalu:[0,3,5],exampl:[0,5],expect:[0,2,3],experienc:0,explicit:0,extern:4,extra:0,extract:0,fall:0,far:0,featur:3,field:[0,5],file:[0,5],first:0,fit:0,five:0,focu:[0,3],follow:[0,1,2,5],form:[0,1],format:[3,5],forth:0,found:[0,3],four:0,free:0,from:[0,1,2,3,4,5],full:4,gener:0,get:4,given:[0,3],global:[1,5],goal:4,graph:0,guarante:4,gui:0,guid:0,gunasekara:[4,5],had:0,hand:3,has:0,hasdiscuss:0,haslab:0,have:[0,3],hei:0,held:0,help:0,helpfulnessr:0,here:[1,4],higher:0,hour:0,how:[0,3],howev:3,ibm:[4,5],identifi:0,ids:[2,3],improv:0,includ:[0,2,5],incorpor:0,increas:3,indic:0,info:0,inform:0,instal:0,instanc:[2,3],instructor:0,integr:0,interest:0,introduc:4,irc:0,issu:[0,4],item:5,its:0,jonathan:4,json:[0,5],jun:3,june:3,kei:0,kindli:4,knowledg:0,kummerfeld:4,lab:0,larg:[0,4],lasecki:4,lazaro:4,lead:0,learn:3,letter:0,level:0,like:0,line:5,link:1,linux:0,linux_manpag:1,list:[0,5],livecd:0,made:5,mai:4,make:0,manual:0,map:2,mark:0,match:0,mean:[2,3],messag:0,michigan:[0,4],might:0,miss:0,model:[0,3],more:[0,3],mrr:2,msn:0,myself:0,name:[0,5],necessari:3,need:[3,5],next:[0,4],none:[1,5],note:2,nov:3,number:[0,4,5],occur:0,oct:3,offer:[0,4],one:[0,3,4],onli:[0,5],open:4,option:0,order:5,orient:4,origin:1,other:1,otherwis:0,output:0,over:[2,3],page:0,paper:3,paraphras:[0,1,2],part:4,parti:0,partial:[0,4],particip:[0,4],participant_1:0,participant_2:0,per:0,person:0,persona:0,phase:3,pick:0,pleas:3,point:3,polymenako:4,pool:[0,1],possibl:2,pre:3,precis:2,prefer:0,previou:4,prior:0,probabl:[2,3],problem:4,profil:0,propag:3,provid:[0,1,4],publicli:[0,3],purpos:0,push:4,question:0,rang:[0,2],rank:[2,5],real:4,recal:2,reciproc:2,record:0,refer:0,regist:5,registr:0,releas:3,relev:0,remix:1,replac:5,repres:0,represent:0,request:4,requir:[0,3,4],research:4,respond:0,respons:[0,2,3],result:[3,5],reus:0,role:0,rybi7qrd9qzn:0,same:[0,1],scenario:0,scienc:0,score:5,second:0,section:[0,3],select:[0,3,5],semest:0,sentenc:[0,2],sep:3,separ:3,septemb:3,seri:[0,4],session:3,set:[0,2,3,4,5],sever:[0,4],share:[0,1],should:[0,1,3,5],shown:2,similar:[0,4],simul:0,singl:5,six:0,size:0,some:0,someon:0,sourc:0,space:0,speaker:0,special:3,specifi:0,split:0,spring:[0,3],staff:0,stand:0,start:0,starttim:0,state:4,structur:0,student:0,sub:[1,2],subject:5,submiss:[2,3],submit:[3,5],subset:3,subtask:[0,1,2,3,4,5],subtask_numb:5,sudo:0,suggest:0,summer:0,support:4,suppos:2,sure:0,system:[0,3],tabl:[0,2],taken:0,target:0,task:[1,2,3,4],tensorflow:4,term:0,test:[0,2,3,5],text:0,than:0,thank:4,thei:0,them:0,thi:[0,1,4],those:[2,3],though:0,through:0,time:0,timelin:3,tlshf16y4j4l:0,top:5,topic:0,toward:4,track:[0,4,5],train:[0,3],trainabl:3,transform:0,tri:4,turn:0,two:[0,4],type:0,typic:0,ubuntu:[2,5],ubuntu_st1_ground_truth:1,ubuntu_st1_test:1,ubuntu_st1_train:1,ubuntu_st1_valid:1,ubuntu_st2_ground_truth:1,ubuntu_st2_test:1,ubuntu_st2_train:1,ubuntu_st2_valid:1,ubuntu_st4_ground_truth:1,ubuntu_st4_test:1,ubuntu_st4_train:1,ubuntu_st4_valid:1,ubuntu_subtask_1:5,under:3,understand:4,univers:[0,4],unknown:0,unlik:4,updat:4,upon:0,use:[0,3],used:[0,1,2,4],using:0,utter:[0,4],valid:0,valu:5,vari:0,version:0,wai:0,walter:4,week:3,weekdai:0,well:0,were:0,what:[0,3],when:[0,5],where:[0,4,5],whether:0,which:[0,3],whole:3,win:3,window:0,winter:0,word:3,work:3,workload:0,workshop:3,world:4,xxxxxxx:5,year:0,you:[0,2,3,4,5],your:[0,2,3,5],ywoa49156j9p:0,zip:5},titles:["Data Description","Datasets","Evaluations","FAQs","Noetic End-to-End Response Selection Challenge","Submission"],titleterms:{advis:0,best:2,challeng:4,data:0,dataset:1,descript:0,end:4,evalu:2,extern:0,faq:3,format:0,ground:1,maintain:4,metric:2,noetic:4,organ:4,respons:4,score:2,select:4,sub:0,submiss:5,task:0,test:1,train:1,truth:1,ubuntu:0,valid:1}})


--------------------------------------------------------------------------------
/docs/public/_modules/index.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  5 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  6 | <head>
  7 |   <meta charset="utf-8">
  8 |   
  9 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 10 |   
 11 |   <title>Overview: module code &mdash; dstc7-noesis 0.0.1 documentation</title>
 12 |   
 13 | 
 14 |   
 15 |   
 16 |   
 17 |   
 18 | 
 19 |   
 20 | 
 21 |   
 22 |   
 23 |     
 24 | 
 25 |   
 26 | 
 27 |   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
 28 |   <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 29 |     <link rel="index" title="Index" href="../genindex.html" />
 30 |     <link rel="search" title="Search" href="../search.html" /> 
 31 | 
 32 |   
 33 |   <script src="../_static/js/modernizr.min.js"></script>
 34 | 
 35 | </head>
 36 | 
 37 | <body class="wy-body-for-nav">
 38 | 
 39 |    
 40 |   <div class="wy-grid-for-nav">
 41 | 
 42 |     
 43 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 44 |       <div class="wy-side-scroll">
 45 |         <div class="wy-side-nav-search">
 46 |           
 47 | 
 48 |           
 49 |             <a href="../index.html" class="icon icon-home"> dstc7-noesis
 50 |           
 51 | 
 52 |           
 53 |           </a>
 54 | 
 55 |           
 56 |             
 57 |             
 58 |           
 59 | 
 60 |           
 61 | <div role="search">
 62 |   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
 63 |     <input type="text" name="q" placeholder="Search docs" />
 64 |     <input type="hidden" name="check_keywords" value="yes" />
 65 |     <input type="hidden" name="area" value="default" />
 66 |   </form>
 67 | </div>
 68 | 
 69 |           
 70 |         </div>
 71 | 
 72 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 73 |           
 74 |             
 75 |             
 76 |               
 77 |             
 78 |             
 79 |               <ul>
 80 | <li class="toctree-l1"><a class="reference internal" href="../dataset.html">Datasets</a></li>
 81 | <li class="toctree-l1"><a class="reference internal" href="../evaluations.html">Evaluations</a></li>
 82 | <li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQs</a></li>
 83 | <li class="toctree-l1"><a class="reference internal" href="../submission.html">Submission</a></li>
 84 | </ul>
 85 | 
 86 |             
 87 |           
 88 |         </div>
 89 |       </div>
 90 |     </nav>
 91 | 
 92 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 93 | 
 94 |       
 95 |       <nav class="wy-nav-top" aria-label="top navigation">
 96 |         
 97 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 98 |           <a href="../index.html">dstc7-noesis</a>
 99 |         
100 |       </nav>
101 | 
102 | 
103 |       <div class="wy-nav-content">
104 |         
105 |         <div class="rst-content">
106 |         
107 |           
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | <div role="navigation" aria-label="breadcrumbs navigation">
124 | 
125 |   <ul class="wy-breadcrumbs">
126 |     
127 |       <li><a href="../index.html">Docs</a> &raquo;</li>
128 |         
129 |       <li>Overview: module code</li>
130 |     
131 |     
132 |       <li class="wy-breadcrumbs-aside">
133 |         
134 |       </li>
135 |     
136 |   </ul>
137 | 
138 |   
139 |   <hr/>
140 | </div>
141 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
142 |            <div itemprop="articleBody">
143 |             
144 |   <h1>All modules for which code is available</h1>
145 | <ul><li><a href="noesis/dataset/dataset.html">noesis.dataset.dataset</a></li>
146 | <li><a href="noesis/dataset/utils.html">noesis.dataset.utils</a></li>
147 | <li><a href="noesis/dataset/vocabulary.html">noesis.dataset.vocabulary</a></li>
148 | <li><a href="noesis/evaluator/evaluator.html">noesis.evaluator.evaluator</a></li>
149 | <li><a href="noesis/networks/dual_encoder.html">noesis.networks.dual_encoder</a></li>
150 | <li><a href="noesis/trainers/supervised_trainer.html">noesis.trainers.supervised_trainer</a></li>
151 | <li><a href="noesis/util/checkpoint.html">noesis.util.checkpoint</a></li>
152 | </ul>
153 | 
154 |            </div>
155 |            
156 |           </div>
157 |           <footer>
158 |   
159 | 
160 |   <hr/>
161 | 
162 |   <div role="contentinfo">
163 |     <p>
164 |         &copy; Copyright 2018, International Business Machines Corp.
165 | 
166 |     </p>
167 |   </div>
168 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
169 | 
170 | </footer>
171 | 
172 |         </div>
173 |       </div>
174 | 
175 |     </section>
176 | 
177 |   </div>
178 |   
179 | 
180 | 
181 |   
182 | 
183 |     <script type="text/javascript">
184 |         var DOCUMENTATION_OPTIONS = {
185 |             URL_ROOT:'../',
186 |             VERSION:'0.0.1',
187 |             LANGUAGE:'None',
188 |             COLLAPSE_INDEX:false,
189 |             FILE_SUFFIX:'.html',
190 |             HAS_SOURCE:  true,
191 |             SOURCELINK_SUFFIX: '.txt'
192 |         };
193 |     </script>
194 |       <script type="text/javascript" src="../_static/jquery.js"></script>
195 |       <script type="text/javascript" src="../_static/underscore.js"></script>
196 |       <script type="text/javascript" src="../_static/doctools.js"></script>
197 |       <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
198 | 
199 |   
200 | 
201 |   <script type="text/javascript" src="../_static/js/theme.js"></script>
202 | 
203 |   <script type="text/javascript">
204 |       jQuery(function () {
205 |           SphinxRtdTheme.Navigation.enable(true);
206 |       });
207 |   </script> 
208 | 
209 | </body>
210 | </html>


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | # import os
 16 | # import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | import sphinx_rtd_theme
 20 | 
 21 | # -- Project information -----------------------------------------------------
 22 | 
 23 | project = 'dstc7-noesis'
 24 | copyright = '2018, International Business Machines Corp'
 25 | author = 'Chulaka Gunasekara'
 26 | 
 27 | # The short X.Y version
 28 | version = ''
 29 | # The full version, including alpha/beta/rc tags
 30 | release = '0.0.1'
 31 | 
 32 | 
 33 | # -- General configuration ---------------------------------------------------
 34 | 
 35 | # If your documentation needs a minimal Sphinx version, state it here.
 36 | #
 37 | # needs_sphinx = '1.0'
 38 | 
 39 | # Add any Sphinx extension module names here, as strings. They can be
 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 41 | # ones.
 42 | extensions = [
 43 |     'sphinx.ext.intersphinx',
 44 |     'sphinx.ext.autodoc',
 45 |     'sphinx.ext.autosummary',
 46 |     'sphinx.ext.coverage',
 47 |     'sphinx.ext.mathjax',
 48 |     'sphinx.ext.ifconfig',
 49 |     'sphinx.ext.napoleon',
 50 |     'sphinx.ext.viewcode',
 51 |     'sphinx.ext.githubpages',
 52 | ]
 53 | 
 54 | source_parsers = {
 55 |    '.md': 'recommonmark.parser.CommonMarkParser',
 56 | }
 57 | 
 58 | # Add any paths that contain templates here, relative to this directory.
 59 | templates_path = ['_templates']
 60 | 
 61 | # The suffix(es) of source filenames.
 62 | # You can specify multiple suffix as a list of string:
 63 | #
 64 | source_suffix = ['.rst', '.md']
 65 | 
 66 | # The master toctree document.
 67 | master_doc = 'index'
 68 | 
 69 | # The language for content autogenerated by Sphinx. Refer to documentation
 70 | # for a list of supported languages.
 71 | #
 72 | # This is also used if you do content translation via gettext catalogs.
 73 | # Usually you set "language" from the command line for these cases.
 74 | language = None
 75 | 
 76 | # List of patterns, relative to source directory, that match files and
 77 | # directories to ignore when looking for source files.
 78 | # This pattern also affects html_static_path and html_extra_path .
 79 | exclude_patterns = []
 80 | 
 81 | # The name of the Pygments (syntax highlighting) style to use.
 82 | pygments_style = 'sphinx'
 83 | 
 84 | 
 85 | # -- Options for HTML output -------------------------------------------------
 86 | 
 87 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 88 | # a list of builtin themes.
 89 | #
 90 | # html_theme = 'alabaster'
 91 | html_theme = 'sphinx_rtd_theme'
 92 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 93 | 
 94 | # Theme options are theme-specific and customize the look and feel of a theme
 95 | # further.  For a list of options available for each theme, see the
 96 | # documentation.
 97 | #
 98 | html_theme_options = {
 99 |     # 'description': "Dialog State Tracking Challenge 7",
100 |     # # 'github_user': 'ibm',
101 |     # # 'github_repo': 'dstc7-noesis',
102 |     # 'fixed_sidebar': True,
103 | }
104 | 
105 | # Add any paths that contain custom static files (such as style sheets) here,
106 | # relative to this directory. They are copied after the builtin static files,
107 | # so a file named "default.css" will overwrite the builtin "default.css".
108 | html_static_path = ['_static']
109 | 
110 | # Custom sidebar templates, must be a dictionary that maps document names
111 | # to template names.
112 | #
113 | # The default sidebars (for documents that don't match any pattern) are
114 | # defined by theme itself.  Builtin themes are using these templates by
115 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
116 | # 'searchbox.html']``.
117 | #
118 | # html_sidebars = {}
119 | html_sidebars = {
120 |     '**': [
121 |         'about.html',
122 |         'navigation.html',
123 |         'searchbox.html',
124 |     ]
125 | }
126 | 
127 | # -- Options for HTMLHelp output ---------------------------------------------
128 | 
129 | # Output file base name for HTML help builder.
130 | htmlhelp_basename = 'dstc7-noesisdoc'
131 | 
132 | 
133 | # -- Options for LaTeX output ------------------------------------------------
134 | 
135 | latex_elements = {
136 |     # The paper size ('letterpaper' or 'a4paper').
137 |     #
138 |     # 'papersize': 'letterpaper',
139 | 
140 |     # The font size ('10pt', '11pt' or '12pt').
141 |     #
142 |     # 'pointsize': '10pt',
143 | 
144 |     # Additional stuff for the LaTeX preamble.
145 |     #
146 |     # 'preamble': '',
147 | 
148 |     # Latex figure (float) alignment
149 |     #
150 |     # 'figure_align': 'htbp',
151 | }
152 | 
153 | # Grouping the document tree into LaTeX files. List of tuples
154 | # (source start file, target name, title,
155 | #  author, documentclass [howto, manual, or own class]).
156 | latex_documents = [
157 |     (master_doc, 'dstc7-noesis.tex', 'dstc7-noesis Documentation',
158 |      'Chulaka Gunasekara', 'manual'),
159 | ]
160 | 
161 | 
162 | # -- Options for manual page output ------------------------------------------
163 | 
164 | # One entry per manual page. List of tuples
165 | # (source start file, name, description, authors, manual section).
166 | man_pages = [
167 |     (master_doc, 'dstc7-noesis', 'dstc7-noesis Documentation',
168 |      [author], 1)
169 | ]
170 | 
171 | 
172 | # -- Options for Texinfo output ----------------------------------------------
173 | 
174 | # Grouping the document tree into Texinfo files. List of tuples
175 | # (source start file, target name, title, author,
176 | #  dir menu entry, description, category)
177 | texinfo_documents = [
178 |     (master_doc, 'dstc7-noesis', 'dstc7-noesis Documentation',
179 |      author, 'dstc7-noesis', 'One line description of project.',
180 |      'Miscellaneous'),
181 | ]
182 | 
183 | 
184 | # -- Extension configuration --------------------------------------------------
185 | # Example configuration for intersphinx: refer to the Python standard library.
186 | intersphinx_mapping = {
187 |     'python': ('https://docs.python.org/', None),
188 |     'numpy': ('http://docs.scipy.org/doc/numpy/', None),
189 |     'PyTorch': ('http://pytorch.org/docs/master/', None),
190 | }


--------------------------------------------------------------------------------
/noesis/dataset/vocabulary.py:
--------------------------------------------------------------------------------
  1 | class Vocabulary(object):
  2 |     """
  3 |     A Vocabulary stores a set of words belonging to a particular language. Words in the source vocabulary are mapped
  4 |     to unique integer IDs during encoding. Words in the target vocabulary are recovered from the model's output
  5 |     during decoding.
  6 |     In addition to the words in the actual language, a Vocabulary includes three reserved tokens (and IDs) for the
  7 |     start-of-sentence and end-of-sentence markers, and for a special 'mask' marker used to handle
  8 |     rare/unknown words.
  9 |     The Vocabulary is sorted in descending order based on frequency. If the number of words seen is greater than
 10 |     the maximum size of the Vocabulary, the remaining least-frequent words are ignored.
 11 | 
 12 |     Args:
 13 |         size (int): maximum number of words allowed in this vocabulary
 14 |     """
 15 |     def __init__(self, size):
 16 |         self.UNK_token_name = "__unk__"
 17 |         self.PAD_token_name = "__pad__"
 18 |         self.UNK_token_id = 0
 19 |         self.PAD_token_id = 1
 20 | 
 21 |         self._reserved = set([self.UNK_token_name, self.PAD_token_name])
 22 |         self._reserved_token_id = [(self.UNK_token_name, self.UNK_token_id),
 23 |                                    (self.PAD_token_name, self.PAD_token_id)]
 24 | 
 25 |         self.stoi = dict([(tok, idx) for tok, idx in self._reserved_token_id])
 26 |         self.itos = dict([(idx, tok) for tok, idx in self._reserved_token_id])
 27 | 
 28 |         self.stoc = {}
 29 | 
 30 |         self._num_tokens = 0
 31 |         self._num_reserved = 2
 32 | 
 33 |         self.sorted = False
 34 |         self.size = size
 35 | 
 36 |     def trim(self):
 37 |         """
 38 |         Sorts the vocabulary in descending order based on frequency
 39 |         """
 40 |         sorted_vocab_count = sorted(self.stoc.items(), key=lambda x: x[1], reverse=True)[:self.size]
 41 |         self.stoi = dict([(w, self._num_reserved + idx) for idx, (w, _) in enumerate(sorted_vocab_count)])
 42 |         self.itos = dict([(idx, w) for w, idx in self.stoi.items()])
 43 |         for tok, idx in self._reserved_token_id:
 44 |             self.stoi[tok] = idx
 45 |             self.itos[idx] = tok
 46 |         if self._num_tokens > self.size:
 47 |             self._num_tokens = self.size
 48 |         self.sorted = True
 49 | 
 50 |     def check_sorted(self):
 51 |         """
 52 |         Sorts the vocabulary (if it is not already sorted).
 53 |         """
 54 |         if not self.sorted:
 55 |             self.trim()
 56 | 
 57 |     def get_index(self, token):
 58 |         """
 59 |         Returns:
 60 |            ID (int): ID of the given token.
 61 |         """
 62 |         self.check_sorted()
 63 |         return self.stoi[token]
 64 | 
 65 |     def get_token(self, index):
 66 |         """
 67 |         Returns:
 68 |             token (str): token with ID equal to the given index.
 69 |         """
 70 |         self.check_sorted()
 71 |         return self.itos[index]
 72 | 
 73 |     def get_vocab_size(self):
 74 |         """
 75 |         Returns:
 76 |             size (int): maximum number of words in the vocabulary.
 77 |         """
 78 |         self.check_sorted()
 79 |         return self._num_tokens + self._num_reserved
 80 | 
 81 |     def add_token(self, token):
 82 |         """
 83 |         Adds an occurrence of a token to the vocabulary, incrementing its observed frequency if the word already exists.
 84 | 
 85 |         Args:
 86 |              token (int): word to add
 87 |         """
 88 |         if token in self._reserved:
 89 |             return
 90 |         if token not in self.stoc:
 91 |             self.stoc[token] = 1
 92 |             self._num_tokens += 1
 93 |         else:
 94 |             self.stoc[token] += 1
 95 |         self.sorted = False
 96 | 
 97 |     def add_sequence(self, sequence):
 98 |         """
 99 |         Adds a sequence of words to the vocabulary.
100 | 
101 |         Args:
102 |              sequence(list(str)): list of words, e.g. representing a sentence.
103 |         """
104 |         for tok in sequence:
105 |             self.add_token(tok)
106 | 
107 |     def indices_from_sequence(self, sequence):
108 |         """
109 |         Maps a list of words to their token IDs, or else the 'mask' token if the word is rare/unknown.
110 | 
111 |         Args:
112 |             sequence (list(str)): list of words to map
113 | 
114 |         Returns:
115 |             list(int): list of mapped IDs
116 |         """
117 |         self.check_sorted()
118 |         return [self.stoi[tok]
119 |                 if tok in self.stoi
120 |                 else self.UNK_token_id
121 |                 for tok in sequence]
122 | 
123 |     def sequence_from_indices(self, indices):
124 |         """
125 |         Recover a sentence from a list of token IDs.
126 | 
127 |         Args:
128 |             indices (list(int)): list of token IDs.
129 | 
130 |         Returns:
131 |             list(str): recovered sentence, represented as a list of words
132 |         """
133 |         seq = [self.itos[idx] for idx in indices]
134 |         return seq
135 | 
136 |     def save(self, file_name):
137 |         """
138 |         Writes this Vocabulary to disk in a pickle file.
139 | 
140 |         Args:
141 |              file_name (str): path to the target pickle file
142 |         """
143 |         self.check_sorted()
144 |         vocab_size = self.get_vocab_size()
145 |         with open(file_name, "w") as f:
146 |             for index, token in self.itos.items():
147 |                 if token in self._reserved:
148 |                     continue
149 |                 if index < vocab_size -1:
150 |                     f.write(token + "\n")
151 |                 else:
152 |                     f.write(token)
153 | 
154 |     @classmethod
155 |     def load(cls, file_name):
156 |         """
157 |         Loads a Vocabulary from a pickle file on disk.
158 | 
159 |         Args:
160 |             file_name (str): path to the pickle file
161 | 
162 |         Returns:
163 |             Vocabulary: loaded Vocabulary
164 |         """
165 |         with open(file_name, "r") as f:
166 |             tokens = f.readlines()
167 |         vocab = Vocabulary(len(tokens))
168 |         for token in tokens:
169 |             vocab.add_token(token.strip())
170 |         return vocab
171 | 
172 |     def __eq__(self, other):
173 |         if not isinstance(other, self.__class__):
174 |             return False
175 |         self.check_sorted()
176 |         other.check_sorted()
177 | 
178 |         if self.stoc == other.stoc and self.stoi == other.stoi \
179 |            and self.itos == other.itos:
180 |             return True
181 |         else:
182 |             return False
183 | 
184 |     def __ne__(self, other):
185 |         return not self.__eq__(other)
186 | 
187 |     def __hash__(self):
188 |         return hash(self.stoi)


--------------------------------------------------------------------------------
/noesis/dataset/dataset.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | from noesis.dataset.vocabulary import Vocabulary
  5 | from noesis.dataset import utils
  6 | 
  7 | 
  8 | class Dataset(object):
  9 |     """
 10 |     A class that encapsulates a dataset.
 11 | 
 12 |     Warning:
 13 |         Do not use this constructor directly, use one of the class methods to initialize.
 14 | 
 15 |     Note:
 16 |         Source or target sequences that are longer than the respective
 17 |         max length will be filtered.
 18 | 
 19 |     Args:
 20 |         max_len (int): maximum source sequence length
 21 |     """
 22 | 
 23 |     def __init__(self):
 24 |         # Declare vocabulary objects
 25 |         self.vocab = None
 26 |         self.data = None
 27 | 
 28 | 
 29 |     @classmethod
 30 |     def from_file(cls, path, vocab=None, max_vocab=50000):
 31 |         """
 32 |         Initialize a dataset from the file at given path. The file
 33 |         must contains a list of TAB-separated pairs of sequences.
 34 | 
 35 |         Note:
 36 |             Source or target sequences that are longer than the respective
 37 |             max length will be filtered.
 38 |             As specified by maximum vocabulary size, source and target
 39 |             vocabularies will be sorted in descending token frequency and cutoff.
 40 |             Tokens that are in the dataset but not retained in the vocabulary
 41 |             will be dropped in the sequences.
 42 | 
 43 |         Args:
 44 |             path (str): path to the dataset file
 45 |             vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the source language, default `None`. If a pre-populated Vocabulary object, `src_max_vocab` wouldn't be used.
 46 |             max_vocab (int): maximum source vocabulary size
 47 |         """
 48 |         obj = cls()
 49 |         pairs = utils.prepare_data(path)
 50 |         return cls._encode(obj, pairs, vocab, max_vocab)
 51 | 
 52 |     def _encode(self, pairs, vocab=None, max_vocab=500000):
 53 |         """
 54 |         Encodes the source and target lists of sequences using source and target vocabularies.
 55 | 
 56 |         Note:
 57 |             Source or target sequences that are longer than the respective
 58 |             max length will be filtered.
 59 |             As specified by maximum vocabulary size, source and target
 60 |             vocabularies will be sorted in descending token frequency and cutoff.
 61 |             Tokens that are in the dataset but not retained in the vocabulary
 62 |             will be dropped in the sequences.
 63 | 
 64 |         Args:
 65 |             pairs (list): list of tuples (source sequences, target sequence)
 66 |             vocab (Vocabulary): pre-populated Vocabulary object or a path of a file containing words for the source language,
 67 |             default `None`. If a pre-populated Vocabulary object, `src_max_vocab` wouldn't be used.
 68 |             max_vocab (int): maximum source vocabulary size
 69 |         """
 70 |         # Read in vocabularies
 71 |         self.vocab = self._init_vocab(pairs, max_vocab, vocab)
 72 | 
 73 |         # Translate input sequences to token ids
 74 |         self.data = []
 75 |         for (context, candidates), target in pairs:
 76 |             c = self.vocab.indices_from_sequence(context)
 77 |             r = []
 78 |             for candidate in candidates:
 79 |                 r.append(self.vocab.indices_from_sequence(candidate))
 80 |             self.data.append(((c, r), target))
 81 |         return self
 82 | 
 83 |     def _init_vocab(self, data, max_num_vocab, vocab):
 84 |         resp_vocab = Vocabulary(max_num_vocab)
 85 |         if vocab is None:
 86 |             for (context, candidates), target in data:
 87 |                 resp_vocab.add_sequence(context)
 88 |                 for candidate in candidates:
 89 |                     resp_vocab.add_sequence(candidate)
 90 |             resp_vocab.trim()
 91 |         elif isinstance(vocab, Vocabulary):
 92 |             resp_vocab = vocab
 93 |         elif isinstance(vocab, str):
 94 |             for tok in utils.read_vocabulary(vocab, max_num_vocab):
 95 |                 resp_vocab.add_token(tok)
 96 |         else:
 97 |             raise AttributeError('{} is not a valid instance on a vocabulary. None, instance of Vocabulary class \
 98 |                                  and str are only supported formats for the vocabulary'.format(vocab))
 99 |         return resp_vocab
100 | 
101 |     def _pad(self, data):
102 |         c = [pair[0][0] for pair in data]
103 |         r = [pair[0][1] for pair in data]
104 |         context = np.zeros([len(c), max([len(entry) for entry in c])], dtype=int)
105 |         context.fill(self.vocab.PAD_token_id)
106 |         context_lengths = np.zeros(len(c), dtype=int)
107 | 
108 |         for i, entry in enumerate(c):
109 |             context[i, :len(entry)] = entry
110 |             context_lengths[i] = len(entry)
111 | 
112 |         responses = np.zeros([len(r), max([len(entry) for entry in r]), max([len(cand) for entry in r for cand in entry])], dtype=int)
113 |         responses.fill(self.vocab.PAD_token_id)
114 |         responses_lengths = np.zeros([len(r), max([len(entry) for entry in r])], dtype=int)
115 | 
116 |         for i, entry in enumerate(r):
117 |             for j, cand in enumerate(entry):
118 |                 responses[i, j, :len(cand)] = cand
119 |                 responses_lengths[i, j] = len(cand)
120 | 
121 |         return context, responses, context_lengths, responses_lengths
122 | 
123 |     def __len__(self):
124 |         return len(self.data)
125 | 
126 |     def num_batches(self, batch_size):
127 |         """
128 |         Get the number of batches given batch size.
129 | 
130 |         Args:
131 |             batch_size (int): number of examples in a batch
132 | 
133 |         Returns:
134 |             (int) : number of batches
135 |         """
136 |         return len(range(0, len(self.data), batch_size))
137 | 
138 |     def make_batches(self, batch_size):
139 |         """
140 |         Create a generator that generates batches in batch_size over data.
141 | 
142 |         Args:
143 |             batch_size (int): number of pairs in a mini-batch
144 | 
145 |         Yields:
146 |             (list (str), list (str)): next pair of source and target variable in a batch
147 |         """
148 |         if len(self.data) < batch_size:
149 |             raise OverflowError("batch size = {} cannot be larger than data size = {}".
150 |                                 format(batch_size, len(self.data)))
151 |         for i in range(0, len(self.data), batch_size):
152 |             cur_batch = self.data[i:i + batch_size]
153 |             context, responses, context_lengths, responses_lengths = self._pad(cur_batch)
154 |             target = np.asarray([pair[1] for pair in cur_batch])
155 | 
156 |             yield (context, responses, target, context_lengths, responses_lengths)
157 | 
158 |     def shuffle(self, seed=None):
159 |         """
160 |         Shuffle the data.
161 | 
162 |         Args:
163 |             seed (int): provide a value for the random seed; default seed=None is truly random
164 |         """
165 |         if seed is not None:
166 |             random.seed(seed)
167 |         random.shuffle(self.data)


--------------------------------------------------------------------------------
/noesis-tf/scripts/prepare_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import ijson
  4 | import functools
  5 | 
  6 | import tensorflow as tf
  7 | 
  8 | tf.flags.DEFINE_integer(
  9 |     "min_word_frequency", 1, "Minimum frequency of words in the vocabulary")
 10 | 
 11 | tf.flags.DEFINE_integer("max_sentence_len", 160, "Maximum Sentence Length")
 12 | 
 13 | tf.flags.DEFINE_string("train_in", None, "Path to input data file")
 14 | tf.flags.DEFINE_string("validation_in", None, "Path to validation data file")
 15 | 
 16 | tf.flags.DEFINE_string("train_out", None, "Path to output train tfrecords file")
 17 | tf.flags.DEFINE_string("validation_out", None, "Path to output validation tfrecords file")
 18 | 
 19 | tf.flags.DEFINE_string("vocab_path", None, "Path to save vocabulary txt file")
 20 | tf.flags.DEFINE_string("vocab_processor", None, "Path to save vocabulary processor")
 21 | 
 22 | FLAGS = tf.flags.FLAGS
 23 | 
 24 | TRAIN_PATH = os.path.join(FLAGS.train_in)
 25 | VALIDATION_PATH = os.path.join(FLAGS.validation_in)
 26 | 
 27 | def tokenizer_fn(iterator):
 28 |     return (x.split(" ") for x in iterator)
 29 | 
 30 | 
 31 | def process_dialog(dialog):
 32 |     """
 33 |     Add EOU and EOT tags between utterances and create a single context string.
 34 |     :param dialog:
 35 |     :return:
 36 |     """
 37 | 
 38 |     row = []
 39 |     utterances = dialog['messages-so-far']
 40 | 
 41 |     # Create the context
 42 |     context = ""
 43 |     speaker = None
 44 |     for msg in utterances:
 45 |         if speaker is None:
 46 |             context += msg['utterance'] + " __eou__ "
 47 |             speaker = msg['speaker']
 48 |         elif speaker != msg['speaker']:
 49 |             context += "__eot__ " + msg['utterance'] + " __eou__ "
 50 |             speaker = msg['speaker']
 51 |         else:
 52 |             context += msg['utterance'] + " __eou__ "
 53 | 
 54 |     context += "__eot__"
 55 |     row.append(context)
 56 | 
 57 |     # Create the next utterance options and the target label
 58 |     correct_answer = dialog['options-for-correct-answers'][0]
 59 |     target_id = correct_answer['candidate-id']
 60 |     target_index = None
 61 |     for i, utterance in enumerate(dialog['options-for-next']):
 62 |         if utterance['candidate-id'] == target_id:
 63 |             target_index = i
 64 |         row.append(utterance['utterance'] + " __eou__ ")
 65 | 
 66 |     if target_index is None:
 67 |         print('Correct answer not found in options-for-next - example {}. Setting 0 as the correct index'.format(dialog['example-id']))
 68 |     else:
 69 |         row.append(target_index)
 70 | 
 71 |     return row
 72 | 
 73 | 
 74 | def create_dialog_iter(filename):
 75 |     """
 76 |     Returns an iterator over a JSON file.
 77 |     :param filename:
 78 |     :return:
 79 |     """
 80 |     with open(filename, 'rb') as f:
 81 |         json_data = ijson.items(f, 'item')
 82 |         for entry in json_data:
 83 |             row = process_dialog(entry)
 84 |             yield row
 85 | 
 86 | def create_utterance_iter(input_iter):
 87 |     """
 88 |     Returns an iterator over every utterance (context and candidates) for the VocabularyProcessor.
 89 |     :param input_iter:
 90 |     :return:
 91 |     """
 92 |     for row in input_iter:
 93 |         all_utterances = []
 94 |         context = row[0]
 95 |         next_utterances = row[1:101]
 96 |         all_utterances.append(context)
 97 |         all_utterances.extend(next_utterances)
 98 |         for utterance in all_utterances:
 99 |             yield utterance
100 | 
101 | def create_vocab(input_iter, min_frequency):
102 |     """
103 |     Creates and returns a VocabularyProcessor object with the vocabulary
104 |     for the input iterator.
105 |     """
106 |     vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
107 |         FLAGS.max_sentence_len,
108 |         min_frequency=min_frequency,
109 |         tokenizer_fn=tokenizer_fn)
110 |     vocab_processor.fit(input_iter)
111 |     return vocab_processor
112 | 
113 | 
114 | def transform_sentence(sequence, vocab_processor):
115 |     """
116 |     Maps a single sentence into the integer vocabulary. Returns a python array.
117 |     """
118 |     return next(vocab_processor.transform([sequence])).tolist()
119 | 
120 | 
121 | def create_example_new_format(row, vocab):
122 |     """
123 |     Creates an example as a tensorflow.Example Protocol Buffer object.
124 |     :param row:
125 |     :param vocab:
126 |     :return:
127 |     """
128 |     context = row[0]
129 |     next_utterances = row[1:101]
130 |     target = row[-1]
131 | 
132 |     context_transformed = transform_sentence(context, vocab)
133 |     context_len = len(next(vocab._tokenizer([context])))
134 | 
135 |     # New Example
136 |     example = tf.train.Example()
137 |     example.features.feature["context"].int64_list.value.extend(context_transformed)
138 |     example.features.feature["context_len"].int64_list.value.extend([context_len])
139 |     example.features.feature["target"].int64_list.value.extend([target])
140 | 
141 |     # Distractor sequences
142 |     for i, utterance in enumerate(next_utterances):
143 |         opt_key = "option_{}".format(i)
144 |         opt_len_key = "option_{}_len".format(i)
145 |         # Utterance Length Feature
146 |         opt_len = len(next(vocab._tokenizer([utterance])))
147 |         example.features.feature[opt_len_key].int64_list.value.extend([opt_len])
148 |         # Distractor Text Feature
149 |         opt_transformed = transform_sentence(utterance, vocab)
150 |         example.features.feature[opt_key].int64_list.value.extend(opt_transformed)
151 |     return example
152 | 
153 | 
154 | def create_tfrecords_file(input_filename, output_filename, example_fn):
155 |     """
156 |     Creates a TFRecords file for the given input data and
157 |     example transofmration function
158 |     """
159 |     writer = tf.python_io.TFRecordWriter(output_filename)
160 |     print("Creating TFRecords file at {}...".format(output_filename))
161 |     for i, row in enumerate(create_dialog_iter(input_filename)):
162 |         x = example_fn(row)
163 |         writer.write(x.SerializeToString())
164 |     writer.close()
165 |     print("Wrote to {}".format(output_filename))
166 | 
167 | 
168 | def write_vocabulary(vocab_processor, outfile):
169 |     """
170 |     Writes the vocabulary to a file, one word per line.
171 |     """
172 |     vocab_size = len(vocab_processor.vocabulary_)
173 |     with open(outfile, "w") as vocabfile:
174 |         for id in range(vocab_size):
175 |             word =  vocab_processor.vocabulary_._reverse_mapping[id]
176 |             vocabfile.write(word + "\n")
177 |     print("Saved vocabulary to {}".format(outfile))
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     print("Creating vocabulary...")
182 |     input_iter = create_dialog_iter(TRAIN_PATH)
183 |     input_iter = create_utterance_iter(input_iter)
184 |     vocab = create_vocab(input_iter, min_frequency=FLAGS.min_word_frequency)
185 |     print("Total vocabulary size: {}".format(len(vocab.vocabulary_)))
186 | 
187 |     # Create vocabulary.txt file
188 |     write_vocabulary(
189 |         vocab, os.path.join(FLAGS.vocab_path))
190 | 
191 |     # Save vocab processor
192 |     vocab.save(os.path.join(FLAGS.vocab_processor))
193 | 
194 |     # Create train.tfrecords
195 |     create_tfrecords_file(
196 |         input_filename=TRAIN_PATH,
197 |         output_filename=os.path.join(FLAGS.train_out),
198 |         example_fn=functools.partial(create_example_new_format, vocab=vocab))
199 | 
200 |     # Create validation.tfrecords
201 |     create_tfrecords_file(
202 |         input_filename=VALIDATION_PATH,
203 |         output_filename=os.path.join(FLAGS.validation_out),
204 |         example_fn=functools.partial(create_example_new_format, vocab=vocab))


--------------------------------------------------------------------------------
/docs/public/py-modindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  5 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  6 | <head>
  7 |   <meta charset="utf-8">
  8 |   
  9 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 10 |   
 11 |   <title>Python Module Index &mdash; dstc7-noesis 0.0.1 documentation</title>
 12 |   
 13 | 
 14 |   
 15 |   
 16 |   
 17 |   
 18 | 
 19 |   
 20 | 
 21 |   
 22 |   
 23 |     
 24 | 
 25 |   
 26 | 
 27 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 28 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 29 |     <link rel="index" title="Index" href="genindex.html" />
 30 |     <link rel="search" title="Search" href="search.html" />
 31 |  
 32 | 
 33 | 
 34 | 
 35 |   
 36 |   <script src="_static/js/modernizr.min.js"></script>
 37 | 
 38 | </head>
 39 | 
 40 | <body class="wy-body-for-nav">
 41 | 
 42 |    
 43 |   <div class="wy-grid-for-nav">
 44 | 
 45 |     
 46 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 47 |       <div class="wy-side-scroll">
 48 |         <div class="wy-side-nav-search">
 49 |           
 50 | 
 51 |           
 52 |             <a href="index.html" class="icon icon-home"> dstc7-noesis
 53 |           
 54 | 
 55 |           
 56 |           </a>
 57 | 
 58 |           
 59 |             
 60 |             
 61 |           
 62 | 
 63 |           
 64 | <div role="search">
 65 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 66 |     <input type="text" name="q" placeholder="Search docs" />
 67 |     <input type="hidden" name="check_keywords" value="yes" />
 68 |     <input type="hidden" name="area" value="default" />
 69 |   </form>
 70 | </div>
 71 | 
 72 |           
 73 |         </div>
 74 | 
 75 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 76 |           
 77 |             
 78 |             
 79 |               
 80 |             
 81 |             
 82 |               <ul>
 83 | <li class="toctree-l1"><a class="reference internal" href="dataset.html">Datasets</a></li>
 84 | <li class="toctree-l1"><a class="reference internal" href="evaluations.html">Evaluations</a></li>
 85 | <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQs</a></li>
 86 | <li class="toctree-l1"><a class="reference internal" href="package_reference.html">Package Reference</a></li>
 87 | <li class="toctree-l1"><a class="reference internal" href="submission.html">Submission</a></li>
 88 | </ul>
 89 | 
 90 |             
 91 |           
 92 |         </div>
 93 |       </div>
 94 |     </nav>
 95 | 
 96 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 97 | 
 98 |       
 99 |       <nav class="wy-nav-top" aria-label="top navigation">
100 |         
101 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
102 |           <a href="index.html">dstc7-noesis</a>
103 |         
104 |       </nav>
105 | 
106 | 
107 |       <div class="wy-nav-content">
108 |         
109 |         <div class="rst-content">
110 |         
111 |           
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | <div role="navigation" aria-label="breadcrumbs navigation">
128 | 
129 |   <ul class="wy-breadcrumbs">
130 |     
131 |       <li><a href="index.html">Docs</a> &raquo;</li>
132 |         
133 |       <li>Python Module Index</li>
134 |     
135 |     
136 |       <li class="wy-breadcrumbs-aside">
137 |         
138 |       </li>
139 |     
140 |   </ul>
141 | 
142 |   
143 |   <hr/>
144 | </div>
145 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
146 |            <div itemprop="articleBody">
147 |             
148 | 
149 |    <h1>Python Module Index</h1>
150 | 
151 |    <div class="modindex-jumpbox">
152 |    <a href="#cap-n"><strong>n</strong></a>
153 |    </div>
154 | 
155 |    <table class="indextable modindextable">
156 |      <tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
157 |      <tr class="cap" id="cap-n"><td></td><td>
158 |        <strong>n</strong></td><td></td></tr>
159 |      <tr>
160 |        <td><img src="_static/minus.png" class="toggler"
161 |               id="toggle-1" style="display: none" alt="-" /></td>
162 |        <td>
163 |        <code class="xref">noesis</code></td><td>
164 |        <em></em></td></tr>
165 |      <tr class="cg-1">
166 |        <td></td>
167 |        <td>&#160;&#160;&#160;
168 |        <a href="package_reference.html#module-noesis.dataset.dataset"><code class="xref">noesis.dataset.dataset</code></a></td><td>
169 |        <em></em></td></tr>
170 |      <tr class="cg-1">
171 |        <td></td>
172 |        <td>&#160;&#160;&#160;
173 |        <a href="package_reference.html#module-noesis.dataset.utils"><code class="xref">noesis.dataset.utils</code></a></td><td>
174 |        <em></em></td></tr>
175 |      <tr class="cg-1">
176 |        <td></td>
177 |        <td>&#160;&#160;&#160;
178 |        <a href="package_reference.html#module-noesis.dataset.vocabulary"><code class="xref">noesis.dataset.vocabulary</code></a></td><td>
179 |        <em></em></td></tr>
180 |      <tr class="cg-1">
181 |        <td></td>
182 |        <td>&#160;&#160;&#160;
183 |        <a href="package_reference.html#module-noesis.evaluator.evaluator"><code class="xref">noesis.evaluator.evaluator</code></a></td><td>
184 |        <em></em></td></tr>
185 |      <tr class="cg-1">
186 |        <td></td>
187 |        <td>&#160;&#160;&#160;
188 |        <a href="package_reference.html#module-noesis.networks.dual_encoder"><code class="xref">noesis.networks.dual_encoder</code></a></td><td>
189 |        <em></em></td></tr>
190 |      <tr class="cg-1">
191 |        <td></td>
192 |        <td>&#160;&#160;&#160;
193 |        <a href="package_reference.html#module-noesis.trainers.supervised_trainer"><code class="xref">noesis.trainers.supervised_trainer</code></a></td><td>
194 |        <em></em></td></tr>
195 |      <tr class="cg-1">
196 |        <td></td>
197 |        <td>&#160;&#160;&#160;
198 |        <a href="package_reference.html#module-noesis.util.checkpoint"><code class="xref">noesis.util.checkpoint</code></a></td><td>
199 |        <em></em></td></tr>
200 |    </table>
201 | 
202 | 
203 |            </div>
204 |            
205 |           </div>
206 |           <footer>
207 |   
208 | 
209 |   <hr/>
210 | 
211 |   <div role="contentinfo">
212 |     <p>
213 |         &copy; Copyright 2018, International Business Machines Corp.
214 | 
215 |     </p>
216 |   </div>
217 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
218 | 
219 | </footer>
220 | 
221 |         </div>
222 |       </div>
223 | 
224 |     </section>
225 | 
226 |   </div>
227 |   
228 | 
229 | 
230 |   
231 | 
232 |     <script type="text/javascript">
233 |         var DOCUMENTATION_OPTIONS = {
234 |             URL_ROOT:'./',
235 |             VERSION:'0.0.1',
236 |             LANGUAGE:'None',
237 |             COLLAPSE_INDEX:false,
238 |             FILE_SUFFIX:'.html',
239 |             HAS_SOURCE:  true,
240 |             SOURCELINK_SUFFIX: '.txt'
241 |         };
242 |     </script>
243 |       <script type="text/javascript" src="_static/jquery.js"></script>
244 |       <script type="text/javascript" src="_static/underscore.js"></script>
245 |       <script type="text/javascript" src="_static/doctools.js"></script>
246 |       <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
247 | 
248 |   
249 | 
250 |   <script type="text/javascript" src="_static/js/theme.js"></script>
251 | 
252 |   <script type="text/javascript">
253 |       jQuery(function () {
254 |           SphinxRtdTheme.Navigation.enable(true);
255 |       });
256 |   </script> 
257 | 
258 | </body>
259 | </html>


--------------------------------------------------------------------------------
/docs/public/submission.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  5 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  6 | <head>
  7 |   <meta charset="utf-8">
  8 |   
  9 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 10 |   
 11 |   <title>Submission &mdash; dstc7-noesis 0.0.1 documentation</title>
 12 |   
 13 | 
 14 |   
 15 |   
 16 |   
 17 |   
 18 | 
 19 |   
 20 |   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
 21 |   
 22 |     
 23 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 24 |         <script type="text/javascript" src="_static/jquery.js"></script>
 25 |         <script type="text/javascript" src="_static/underscore.js"></script>
 26 |         <script type="text/javascript" src="_static/doctools.js"></script>
 27 |         <script type="text/javascript" src="_static/language_data.js"></script>
 28 |     
 29 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 30 | 
 31 |     
 32 | 
 33 |   
 34 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 35 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 36 |     <link rel="index" title="Index" href="genindex.html" />
 37 |     <link rel="search" title="Search" href="search.html" />
 38 |     <link rel="prev" title="FAQs" href="faq.html" /> 
 39 | </head>
 40 | 
 41 | <body class="wy-body-for-nav">
 42 | 
 43 |    
 44 |   <div class="wy-grid-for-nav">
 45 |     
 46 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 47 |       <div class="wy-side-scroll">
 48 |         <div class="wy-side-nav-search" >
 49 |           
 50 | 
 51 |           
 52 |             <a href="index.html" class="icon icon-home"> dstc7-noesis
 53 |           
 54 | 
 55 |           
 56 |           </a>
 57 | 
 58 |           
 59 |             
 60 |             
 61 |           
 62 | 
 63 |           
 64 | <div role="search">
 65 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 66 |     <input type="text" name="q" placeholder="Search docs" />
 67 |     <input type="hidden" name="check_keywords" value="yes" />
 68 |     <input type="hidden" name="area" value="default" />
 69 |   </form>
 70 | </div>
 71 | 
 72 |           
 73 |         </div>
 74 | 
 75 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 76 |           
 77 |             
 78 |             
 79 |               
 80 |             
 81 |             
 82 |               <ul class="current">
 83 | <li class="toctree-l1"><a class="reference internal" href="data_description.html">Data Description</a></li>
 84 | <li class="toctree-l1"><a class="reference internal" href="datasets.html">Datasets</a></li>
 85 | <li class="toctree-l1"><a class="reference internal" href="evaluations.html">Evaluations</a></li>
 86 | <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQs</a></li>
 87 | <li class="toctree-l1 current"><a class="current reference internal" href="#">Submission</a></li>
 88 | </ul>
 89 | 
 90 |             
 91 |           
 92 |         </div>
 93 |       </div>
 94 |     </nav>
 95 | 
 96 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 97 | 
 98 |       
 99 |       <nav class="wy-nav-top" aria-label="top navigation">
100 |         
101 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
102 |           <a href="index.html">dstc7-noesis</a>
103 |         
104 |       </nav>
105 | 
106 | 
107 |       <div class="wy-nav-content">
108 |         
109 |         <div class="rst-content">
110 |         
111 |           
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | <div role="navigation" aria-label="breadcrumbs navigation">
128 | 
129 |   <ul class="wy-breadcrumbs">
130 |     
131 |       <li><a href="index.html">Docs</a> &raquo;</li>
132 |         
133 |       <li>Submission</li>
134 |     
135 |     
136 |       <li class="wy-breadcrumbs-aside">
137 |         
138 |             
139 |             <a href="_sources/submission.rst.txt" rel="nofollow"> View page source</a>
140 |           
141 |         
142 |       </li>
143 |     
144 |   </ul>
145 | 
146 |   
147 |   <hr/>
148 | </div>
149 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
150 |            <div itemprop="articleBody">
151 |             
152 |   <div class="section" id="submission">
153 | <h1>Submission<a class="headerlink" href="#submission" title="Permalink to this headline">¶</a></h1>
154 | <p>Your submissions should be emailed to <a class="reference external" href="mailto:chulaka&#46;gunasekara&#37;&#52;&#48;ibm&#46;com">chulaka<span>&#46;</span>gunasekara<span>&#64;</span>ibm<span>&#46;</span>com</a>, with the subject line <strong>DSTC7_Track1_Submission</strong>. The results should be submitted from an email address that is registered for Track 1.</p>
155 | <p>You need to submit a single zipped directory containing the result files for each of the subtasks that you need to be evaluated on. The files should be named in the following format.
156 | <code class="docutils literal notranslate"><span class="pre">&lt;dataset&gt;_subtask_&lt;subtask_number&gt;.json</span></code></p>
157 | <p>The &lt;dataset&gt; should be replaced by either ‘Ubuntu’ or ‘Advising’, and the &lt;subtask_number&gt; should be replaced by the subtask number(1-5).
158 | For example, the results file for subtask 1 on Ubuntu dataset should be named as Ubuntu_subtask_1.json</p>
159 | <p>Each results file should follow the following json format.</p>
160 | <div class="highlight-json notranslate"><div class="highlight"><pre><span></span>[
161 |     {
162 |         &quot;example-id&quot;: xxxxxxx,
163 |         &quot;candidate-ranking&quot;:[
164 |             {
165 |                 &quot;candidate-id&quot;: aaaaaa,
166 |                 &quot;confidence&quot;: b.bbb
167 |             },
168 |             {
169 |                 &quot;candidate-id&quot;: cccccc,
170 |                 &quot;confidence&quot;: d.ddd
171 |             },
172 |             ...
173 |         ]
174 |     },
175 |     ...
176 | ]
177 | </pre></div>
178 | </div>
179 | <p>The value for the field “example-id” should contain the corresponding example-id of the test dataset. The candidate ranking field should ONLY include 100 candidates in the order of confidence.</p>
180 | <p>For subtask 2, where the selection is made from a global list of candidates, candidate-ranking fields should <strong>only include the top 100 candidates</strong> from the global list.</p>
181 | <p>For subtask 4, when the correct candidate not available in the candidate set, return <code class="docutils literal notranslate"><span class="pre">&quot;candidate-id&quot;:</span> <span class="pre">NONE</span></code> with the confidence score as an item in the candidate-ranking list.</p>
182 | </div>
183 | 
184 | 
185 |            </div>
186 |            
187 |           </div>
188 |           <footer>
189 |   
190 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
191 |       
192 |       
193 |         <a href="faq.html" class="btn btn-neutral float-left" title="FAQs" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
194 |       
195 |     </div>
196 |   
197 | 
198 |   <hr/>
199 | 
200 |   <div role="contentinfo">
201 |     <p>
202 |         &copy; Copyright 2018, International Business Machines Corp
203 | 
204 |     </p>
205 |   </div>
206 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
207 | 
208 | </footer>
209 | 
210 |         </div>
211 |       </div>
212 | 
213 |     </section>
214 | 
215 |   </div>
216 |   
217 | 
218 | 
219 |   <script type="text/javascript">
220 |       jQuery(function () {
221 |           SphinxRtdTheme.Navigation.enable(true);
222 |       });
223 |   </script>
224 | 
225 |   
226 |   
227 |     
228 |    
229 | 
230 | </body>
231 | </html>


--------------------------------------------------------------------------------
/noesis/trainers/supervised_trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import logging
  3 | import os
  4 | import random
  5 | 
  6 | import torch
  7 | from torch.nn import CrossEntropyLoss
  8 | from torch import optim
  9 | from noesis.util.checkpoint import Checkpoint
 10 | from noesis.evaluator.evaluator import Evaluator
 11 | 
 12 | 
 13 | class SupervisedTrainer(object):
 14 |     r""" The SupervisedTrainer class helps in setting up a training framework in a
 15 |     supervised setting.
 16 | 
 17 |     Args:
 18 |         expt_dir (optional, str): experiment Directory to store details of the experiment,
 19 |             by default it makes a folder in the current directory to store the details (default: `experiment`).
 20 |         loss_func (torch.nn.CrossEntropyLoss, optional): loss for training, (default: torch.nn.CrossEntropyLoss)
 21 |         batch_size (int, optional): batch size for experiment, (default: 64)
 22 |         checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
 23 |     """
 24 |     def __init__(self, expt_dir='experiment', loss_func=CrossEntropyLoss(), batch_size=64,
 25 |                  random_seed=None,
 26 |                  checkpoint_every=5000, print_every=1000):
 27 |         self._trainer = "Simple Trainer"
 28 |         self.random_seed = random_seed
 29 |         if random_seed is not None:
 30 |             random.seed(random_seed)
 31 |             torch.manual_seed(random_seed)
 32 |         self.loss_func = loss_func
 33 |         self.optimizer = None
 34 |         self.checkpoint_every = checkpoint_every
 35 |         self.print_every = print_every
 36 | 
 37 |         if not os.path.isabs(expt_dir):
 38 |             expt_dir = os.path.join(os.getcwd(), expt_dir)
 39 |         self.expt_dir = expt_dir
 40 |         if not os.path.exists(self.expt_dir):
 41 |             os.makedirs(self.expt_dir)
 42 |         self.batch_size = batch_size
 43 | 
 44 |         self.evaluator = Evaluator(batch_size=self.batch_size)
 45 | 
 46 |         self.logger = logging.getLogger(__name__)
 47 | 
 48 |     def _train_batch(self, context_variable, responses_variable, target_variable, model, context_lengths_variable, responses_lengths_variable):
 49 |         loss_func = self.loss_func
 50 |         # Forward propagation
 51 |         outputs = model(context_variable, responses_variable, context_lengths_variable, responses_lengths_variable)
 52 |         # Get loss
 53 |         if len(outputs.size()) == 1:
 54 |             outputs = outputs.unsqueeze(0)
 55 |         loss = loss_func(outputs, target_variable)
 56 | 
 57 |         # Backward propagation
 58 |         model.zero_grad()
 59 |         loss.backward()
 60 |         self.optimizer.step()
 61 | 
 62 |         return loss
 63 | 
 64 |     def _train_epoches(self, data, model, n_epochs, start_epoch, start_step, batch_size=2,
 65 |                        dev_data=None):
 66 |         log = self.logger
 67 | 
 68 |         print_loss_total = 0  # Reset every print_every
 69 |         epoch_loss_total = 0  # Reset every epoch
 70 | 
 71 | 
 72 |         steps_per_epoch = data.num_batches(batch_size)
 73 |         total_steps = steps_per_epoch * n_epochs
 74 | 
 75 |         step = start_step
 76 |         step_elapsed = 0
 77 |         for epoch in range(start_epoch, n_epochs + 1):
 78 |             log.debug("Epoch: %d, Step: %d" % (epoch, step))
 79 | 
 80 |             model.train(True)
 81 |             for batch in data.make_batches(batch_size):
 82 |                 step += 1
 83 |                 step_elapsed += 1
 84 | 
 85 |                 if torch.cuda.is_available():
 86 |                     context_variable = torch.tensor(batch[0]).cuda()
 87 |                     responses_variable = torch.tensor(batch[1]).cuda()
 88 |                     target_variable = torch.tensor(batch[2]).cuda()
 89 |                     context_lengths_variable = torch.tensor(batch[3]).cuda()
 90 |                     responses_lengths_variable = torch.tensor(batch[4]).cuda()
 91 |                 else:
 92 |                     context_variable = torch.tensor(batch[0])
 93 |                     responses_variable = torch.tensor(batch[1])
 94 |                     target_variable = torch.tensor(batch[2])
 95 |                     context_lengths_variable = torch.tensor(batch[3])
 96 |                     responses_lengths_variable = torch.tensor(batch[4])
 97 | 
 98 |                 loss = self._train_batch(context_variable, responses_variable, target_variable, model, context_lengths_variable, responses_lengths_variable)
 99 | 
100 |                 # Record average loss
101 |                 print_loss_total += loss
102 |                 epoch_loss_total += loss
103 | 
104 |                 if step % self.print_every == 0 and step_elapsed > self.print_every:
105 |                     print_loss_avg = print_loss_total / self.print_every
106 |                     print_loss_total = 0
107 |                     log_msg = 'Progress: %d%%, Train %s: %.4f' % (
108 |                         step / total_steps * 100,
109 |                         'CrossEntropyLoss',
110 |                         print_loss_avg)
111 |                     log.info(log_msg)
112 | 
113 |             # Checkpoint
114 |             if epoch % self.checkpoint_every == 0:
115 |                 Checkpoint(model=model,
116 |                            optimizer=self.optimizer,
117 |                            epoch=epoch, step=step,
118 |                            vocab=data.vocab).save(self.expt_dir)
119 | 
120 |             if step_elapsed == 0:
121 |                 continue
122 | 
123 |             epoch_loss_avg = epoch_loss_total / min(steps_per_epoch, step - start_step)
124 |             epoch_loss_total = 0
125 |             log_msg = "Finished epoch %d: Train %s: %.4f" % (epoch, 'CrossEntropyLoss', epoch_loss_avg)
126 |             if dev_data is not None:
127 |                 dev_loss, accuracy, recall = self.evaluator.evaluate(model, dev_data)
128 |                 log_msg += ", Dev CrossEntropyLoss: %.4f, Accuracy: %.4f" % (dev_loss, accuracy)
129 |                 log_msg += ", \n Recall: {}".format(recall)
130 |                 model.train(mode=True)
131 |             else:
132 |                 self.optimizer.update(epoch_loss_avg, epoch)
133 | 
134 |             log.info(log_msg)
135 | 
136 |     def train(self, model, data, batch_size=1, num_epochs=5,
137 |               resume=False, dev_data=None,
138 |               optimizer=None):
139 |         r""" Run training for a given model.
140 | 
141 |         Args:
142 |             model (models.networks): model to run training on, if `resume=True`, it would be
143 |                overwritten by the model loaded from the latest checkpoint.
144 |             data (models.dataset.dataset.Dataset): dataset object to train on
145 |             num_epochs (int, optional): number of epochs to run (default 5)
146 |             resume(bool, optional): resume training with the latest checkpoint, (default False)
147 |             dev_data (models.dataset.dataset.Dataset, optional): dev Dataset (default None)
148 |             optimizer (pytorch.optim, optional): optimizer for training
149 |                (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
150 | 
151 |         """
152 |         # If training is set to resume
153 |         if resume:
154 |             latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir)
155 |             resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
156 |             model = resume_checkpoint.model
157 |             self.optimizer = resume_checkpoint.optimizer
158 | 
159 |             # A walk around to set optimizing parameters properly
160 |             resume_optim = self.optimizer
161 |             defaults = resume_optim.param_groups[0]
162 |             defaults.pop('params', None)
163 |             defaults.pop('initial_lr', None)
164 |             self.optimizer = resume_optim.__class__(model.parameters(), **defaults)
165 | 
166 |             start_epoch = resume_checkpoint.epoch
167 |             step = resume_checkpoint.step
168 |         else:
169 |             start_epoch = 1
170 |             step = 0
171 |             if optimizer is None:
172 |                 optimizer = optim.Adam(model.parameters())
173 |             self.optimizer = optimizer
174 | 
175 |         self.logger.info("Optimizer: %s" % self.optimizer)
176 | 
177 |         self._train_epoches(data, model, num_epochs,
178 |                             start_epoch, step, batch_size=batch_size, dev_data=dev_data)
179 | 


--------------------------------------------------------------------------------
/docs/public/_sources/datasets.rst.txt:
--------------------------------------------------------------------------------
  1 | Datasets
  2 | ========
  3 | 
  4 | The datasets can be downloaded from the following links.
  5 | 
  6 | Training and Validation
  7 | -----------------------
  8 | 
  9 |     +----------+----------------------+--------------------------+---------------------+
 10 |     | Sub-Task | Training             | Validation               | Other               |
 11 |     +==========+======================+==========================+=====================+
 12 |     | 1        | Ubuntu_st1_train_    | Ubuntu_st1_validation_   | None                |
 13 |     |          |                      |                          |                     |
 14 |     |          | Advising_st1_train_  | Advising_st1_validation_ |                     |
 15 |     +----------+----------------------+--------------------------+---------------------+
 16 |     | 2        | Ubuntu_st2_train_    | Ubuntu_st2_validation_   | Candidate_pool_     |
 17 |     |          |                      |                          |                     |
 18 |     +----------+----------------------+--------------------------+---------------------+
 19 |     | 3        |                      |                          | None                |
 20 |     |          | Advising_st3_train_  | Advising_st3_validation_ |                     |
 21 |     +----------+----------------------+--------------------------+---------------------+
 22 |     | 4        | Ubuntu_st4_train_    | Ubuntu_st4_validation_   | None                |
 23 |     |          |                      |                          |                     |
 24 |     |          | Advising_st4_train_  | Advising_st4_validation_ |                     |
 25 |     +----------+----------------------+--------------------------+---------------------+
 26 |     | 5        | Same as subtask 1    | Same as subtask 1        | Linux_manpages_     |
 27 |     |          |                      |                          |                     |
 28 |     |          |                      |                          | Course_information_ |
 29 |     +----------+----------------------+--------------------------+---------------------+
 30 | 
 31 | .. _Ubuntu_st1_train: https://ibm.box.com/s/fsk885se8ieoape46uzk7ylhx1097kk9
 32 | .. _Advising_st1_train: https://ibm.box.com/s/sb5wloejbsbhrpfws0yuj1wbb28you2w
 33 | .. _Ubuntu_st1_validation: https://ibm.box.com/s/rqb6bocovby1jau112y5wq99tz1fffp2
 34 | .. _Advising_st1_validation: https://ibm.box.com/s/f53kcojriaqrj5taevtw3doaatq3sfjv
 35 | .. _Ubuntu_st2_train: https://ibm.box.com/s/i9o9gz37leycvxfqgdabh7478ep1dqo7
 36 | .. _Ubuntu_st2_validation: https://ibm.box.com/s/ha4lcw6cjcwq6wseq5qv0t6ogxat2fhl
 37 | .. _Candidate_pool: https://ibm.box.com/s/uyzbhvt6zuowg120qzin099fbcijc2bp
 38 | .. _Advising_st3_train: https://ibm.box.com/s/kfev11bqpsvhwl8u2ko4fxb11kl9satq
 39 | .. _Advising_st3_validation: https://ibm.box.com/s/vhwmnt0kg1j1vx1j5wijez67mhjxjlnc
 40 | .. _Ubuntu_st4_train: https://ibm.box.com/s/ss7vaagg83qsycjv38bce6i8wsze8p9k
 41 | .. _Advising_st4_train: https://ibm.box.com/s/4p31ja8p83fehes0f6cuakr2wbdd4px9
 42 | .. _Ubuntu_st4_validation: https://ibm.box.com/s/6jmxiavc50achlr7k4g5i5lgyspcsqbg
 43 | .. _Advising_st4_validation: https://ibm.box.com/s/6jq99o1cz9m3env319s6e02ibtwksc1b
 44 | .. _Linux_manpages: https://ibm.box.com/s/7ro3t72tp0rcnggq5cgq9hq80fvh5pkh
 45 | .. _Course_information: https://ibm.box.com/s/lslz39r951fys52qqa3enl0ccods5lus
 46 | 
 47 | Additionally, for the Advising data, we are providing a form of the data with the original dialogs and their paraphrases before remixing. This can be used for training in any subtask, and can be downloaded here_. 
 48 | The global candidate pool for the sub-task 2, should be shared across training, validation and test datasets for sub-task 2.
 49 | 
 50 | .. _here: https://ibm.box.com/s/qh9gbkjo8pg8uph3vysv9fjhp18407fx
 51 | 
 52 | Test
 53 | ----
 54 |     +----------+---------------------------+
 55 |     | Sub-Task | Test                      |
 56 |     +==========+===========================+
 57 |     | 1        | Ubuntu_st1_test_          |
 58 |     |          |                           |
 59 |     |          | Advising_st1_case1_test_  |
 60 |     |          |                           |
 61 |     |          | Advising_st1_case2_test_  |
 62 |     +----------+---------------------------+
 63 |     | 2        | Ubuntu_st2_test_          |
 64 |     |          |                           |
 65 |     |          |                           |
 66 |     +----------+---------------------------+
 67 |     | 3        |                           |
 68 |     |          | Advising_st3_case1_test_  |
 69 |     |          |                           |
 70 |     |          | Advising_st3_case2_test_  |
 71 |     +----------+---------------------------+
 72 |     | 4        | Ubuntu_st4_test_          |
 73 |     |          |                           |
 74 |     |          | Advising_st4_case1_test_  |
 75 |     |          |                           |
 76 |     |          | Advising_st4_case2_test_  |
 77 |     +----------+---------------------------+
 78 |     | 5        | Same as subtask 1         |
 79 |     |          |                           |
 80 |     |          |                           |
 81 |     +----------+---------------------------+
 82 | 
 83 | .. _Ubuntu_st1_test: https://ibm.box.com/s/lerplhwcm7n6nbsnywhku5m8kckxq90n
 84 | .. _Advising_st1_case1_test: https://ibm.box.com/s/bw6wj2lbt2g9alarnsoj8myckexij1s2
 85 | .. _Advising_st1_case2_test: https://ibm.box.com/s/9vkmus89gn459th9l8mtiarj1yj3jtyz
 86 | .. _Ubuntu_st2_test: https://ibm.box.com/s/pw3v5nz152yr75d9dfcvsldpo5kfpvuu
 87 | .. _Advising_st3_case1_test: https://ibm.box.com/s/cip5j31lptl8ih2cy0o1kj6mtljdihp0
 88 | .. _Advising_st3_case2_test: https://ibm.box.com/s/tteqjsflzm5venqezv1ba6hwv6qh5msv
 89 | .. _Ubuntu_st4_test: https://ibm.box.com/s/2socex1jk1h9vw6ni1l8v868vbp6og1j
 90 | .. _Advising_st4_case1_test: https://ibm.box.com/s/mdznqlga1g6i4j7knq0opkmf7m7m7plr
 91 | .. _Advising_st4_case2_test: https://ibm.box.com/s/5tqmwio1j59i04emix83y6ro4dwvgd0g
 92 | 
 93 | 
 94 | Ground truth for test datasets
 95 | ------------------------------
 96 |     +----------+-----------------------------------+
 97 |     | Sub-Task | Test                              |
 98 |     +==========+===================================+
 99 |     | 1        | Ubuntu_st1_ground_truth_          |
100 |     |          |                                   |
101 |     |          | Advising_st1_case1_ground_truth_  |
102 |     |          |                                   |
103 |     |          | Advising_st1_case2_ground_truth_  |
104 |     +----------+-----------------------------------+
105 |     | 2        | Ubuntu_st2_ground_truth_          |
106 |     |          |                                   |
107 |     |          |                                   |
108 |     +----------+-----------------------------------+
109 |     | 3        |                                   |
110 |     |          | Advising_st3_case1_ground_truth_  |
111 |     |          |                                   |
112 |     |          | Advising_st3_case2_ground_truth_  |
113 |     +----------+-----------------------------------+
114 |     | 4        | Ubuntu_st4_ground_truth_          |
115 |     |          |                                   |
116 |     |          | Advising_st4_case1_ground_truth_  |
117 |     |          |                                   |
118 |     |          | Advising_st4_case2_ground_truth_  |
119 |     +----------+-----------------------------------+
120 |     | 5        | Same as subtask 1                 |
121 |     |          |                                   |
122 |     |          |                                   |
123 |     +----------+-----------------------------------+
124 | 
125 | .. _Ubuntu_st1_ground_truth: https://ibm.box.com/s/xjf30dirjql3t5y2zuhaytr6x9pr6soa
126 | .. _Advising_st1_case1_ground_truth: https://ibm.box.com/s/gtogma9n6torzupv1g6g01c9kmuzkc4f
127 | .. _Advising_st1_case2_ground_truth: https://ibm.box.com/s/7ay1kjeqp25laspho7egiwhea61r2xky
128 | .. _Ubuntu_st2_ground_truth: https://ibm.box.com/s/f7so3abgcdt7afvr17mmyzswotkxa3my
129 | .. _Advising_st3_case1_ground_truth: https://ibm.box.com/s/5f0rh7vnqpgyq3oa7kwstin8nlhv80qa
130 | .. _Advising_st3_case2_ground_truth: https://ibm.box.com/s/3yqc61kkmxjid0cg4fw8uz1s0a226voo
131 | .. _Ubuntu_st4_ground_truth: https://ibm.box.com/s/w6gs5g5j0ea2069pq9p1ipqs2imchfxt
132 | .. _Advising_st4_case1_ground_truth: https://ibm.box.com/s/s4cd0et6bx20eusn20ko5azvpn9utp68
133 | .. _Advising_st4_case2_ground_truth: https://ibm.box.com/s/zrzlvpds1ekfsq4oznmxry0yfwyjuhcl


--------------------------------------------------------------------------------
/docs/public/faq.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  5 | <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  6 | <head>
  7 |   <meta charset="utf-8">
  8 |   
  9 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 10 |   
 11 |   <title>FAQs &mdash; dstc7-noesis 0.0.1 documentation</title>
 12 |   
 13 | 
 14 |   
 15 |   
 16 |   
 17 |   
 18 | 
 19 |   
 20 |   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
 21 |   
 22 |     
 23 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 24 |         <script type="text/javascript" src="_static/jquery.js"></script>
 25 |         <script type="text/javascript" src="_static/underscore.js"></script>
 26 |         <script type="text/javascript" src="_static/doctools.js"></script>
 27 |         <script type="text/javascript" src="_static/language_data.js"></script>
 28 |     
 29 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 30 | 
 31 |     
 32 | 
 33 |   
 34 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 35 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 36 |     <link rel="index" title="Index" href="genindex.html" />
 37 |     <link rel="search" title="Search" href="search.html" />
 38 |     <link rel="next" title="Submission" href="submission.html" />
 39 |     <link rel="prev" title="Evaluations" href="evaluations.html" /> 
 40 | </head>
 41 | 
 42 | <body class="wy-body-for-nav">
 43 | 
 44 |    
 45 |   <div class="wy-grid-for-nav">
 46 |     
 47 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 48 |       <div class="wy-side-scroll">
 49 |         <div class="wy-side-nav-search" >
 50 |           
 51 | 
 52 |           
 53 |             <a href="index.html" class="icon icon-home"> dstc7-noesis
 54 |           
 55 | 
 56 |           
 57 |           </a>
 58 | 
 59 |           
 60 |             
 61 |             
 62 |           
 63 | 
 64 |           
 65 | <div role="search">
 66 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 67 |     <input type="text" name="q" placeholder="Search docs" />
 68 |     <input type="hidden" name="check_keywords" value="yes" />
 69 |     <input type="hidden" name="area" value="default" />
 70 |   </form>
 71 | </div>
 72 | 
 73 |           
 74 |         </div>
 75 | 
 76 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 77 |           
 78 |             
 79 |             
 80 |               
 81 |             
 82 |             
 83 |               <ul class="current">
 84 | <li class="toctree-l1"><a class="reference internal" href="data_description.html">Data Description</a></li>
 85 | <li class="toctree-l1"><a class="reference internal" href="datasets.html">Datasets</a></li>
 86 | <li class="toctree-l1"><a class="reference internal" href="evaluations.html">Evaluations</a></li>
 87 | <li class="toctree-l1 current"><a class="current reference internal" href="#">FAQs</a></li>
 88 | <li class="toctree-l1"><a class="reference internal" href="submission.html">Submission</a></li>
 89 | </ul>
 90 | 
 91 |             
 92 |           
 93 |         </div>
 94 |       </div>
 95 |     </nav>
 96 | 
 97 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 98 | 
 99 |       
100 |       <nav class="wy-nav-top" aria-label="top navigation">
101 |         
102 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
103 |           <a href="index.html">dstc7-noesis</a>
104 |         
105 |       </nav>
106 | 
107 | 
108 |       <div class="wy-nav-content">
109 |         
110 |         <div class="rst-content">
111 |         
112 |           
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | <div role="navigation" aria-label="breadcrumbs navigation">
129 | 
130 |   <ul class="wy-breadcrumbs">
131 |     
132 |       <li><a href="index.html">Docs</a> &raquo;</li>
133 |         
134 |       <li>FAQs</li>
135 |     
136 |     
137 |       <li class="wy-breadcrumbs-aside">
138 |         
139 |             
140 |             <a href="_sources/faq.rst.txt" rel="nofollow"> View page source</a>
141 |           
142 |         
143 |       </li>
144 |     
145 |   </ul>
146 | 
147 |   
148 |   <hr/>
149 | </div>
150 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
151 |            <div itemprop="articleBody">
152 |             
153 |   <div class="section" id="faqs">
154 | <h1>FAQs<a class="headerlink" href="#faqs" title="Permalink to this headline">¶</a></h1>
155 | <ol class="arabic">
156 | <li><p><strong>What is the timeline of the competition</strong>?</p>
157 | <blockquote>
158 | <div><table class="docutils align-center">
159 | <colgroup>
160 | <col style="width: 60%" />
161 | <col style="width: 40%" />
162 | </colgroup>
163 | <thead>
164 | <tr class="row-odd"><th class="head"><p>Task</p></th>
165 | <th class="head"><p>Dates</p></th>
166 | </tr>
167 | </thead>
168 | <tbody>
169 | <tr class="row-even"><td><p>Development phase (14 weeks)</p></td>
170 | <td><p>Jun 1 – Sep 9, 2018</p></td>
171 | </tr>
172 | <tr class="row-odd"><td><p>Evaluation phase (2 weeks)</p></td>
173 | <td><p>Sep 10 – Oct 8, 2018</p></td>
174 | </tr>
175 | <tr class="row-even"><td><p>Release of the results</p></td>
176 | <td><p>16th Oct 2018</p></td>
177 | </tr>
178 | <tr class="row-odd"><td><p>Paper submission deadline</p></td>
179 | <td><p>Oct-Nov 2018</p></td>
180 | </tr>
181 | <tr class="row-even"><td><p>DSTC7 special session or workshop</p></td>
182 | <td><p>Spring 2019</p></td>
183 | </tr>
184 | </tbody>
185 | </table>
186 | </div></blockquote>
187 | </li>
188 | </ol>
189 | <div class="line-block">
190 | <div class="line"><br /></div>
191 | </div>
192 | <ol class="arabic" start="2">
193 | <li><p><strong>What should we submit</strong>?</p>
194 | <blockquote>
195 | <div><p>You are required to submit the responses to the test dataset that will be released on the 10th of September. The format of the responses can be found under <cite>submissions</cite> section.</p>
196 | </div></blockquote>
197 | </li>
198 | <li><p><strong>Do we need to work on both datasets</strong>?</p>
199 | <blockquote>
200 | <div><p>Not necessary. You can select one dataset and work on all or a subset of the subtasks. But, submitting results for both datasets and all subtasks will increase your chance of winning the competition.</p>
201 | </div></blockquote>
202 | </li>
203 | <li><p><strong>How are we evaluated</strong>?</p>
204 | <blockquote>
205 | <div><p>For each test instance, we expect you to return a set of 100 choices (candidate ids) from the set candidates and a probability distribution over those 100 choices. For more details please check the <cite>evaluations</cite> section.</p>
206 | </div></blockquote>
207 | </li>
208 | </ol>
209 | <ol class="arabic" start="4">
210 | <li><p><strong>What do you mean by end-to-end models</strong>?</p>
211 | <blockquote>
212 | <div><p>We don’t need the whole system to be end-to-end trainable. You can have separate components, which are not trained with back-propagation. However, we expect the functionality of each of the components in your system to be learned from the given dataset. We discourage the use of hand-coded features for any component in your system, as one of the focus points of the challenge is automation.</p>
213 | </div></blockquote>
214 | </li>
215 | <li><p><strong>Can we use pre-trained word embeddings</strong>?</p>
216 | <blockquote>
217 | <div><p>You can use any pre-trained embeddings that was publicly available before the 1st of June.</p>
218 | </div></blockquote>
219 | </li>
220 | </ol>
221 | </div>
222 | 
223 | 
224 |            </div>
225 |            
226 |           </div>
227 |           <footer>
228 |   
229 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
230 |       
231 |         <a href="submission.html" class="btn btn-neutral float-right" title="Submission" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
232 |       
233 |       
234 |         <a href="evaluations.html" class="btn btn-neutral float-left" title="Evaluations" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
235 |       
236 |     </div>
237 |   
238 | 
239 |   <hr/>
240 | 
241 |   <div role="contentinfo">
242 |     <p>
243 |         &copy; Copyright 2018, International Business Machines Corp
244 | 
245 |     </p>
246 |   </div>
247 |   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
248 | 
249 | </footer>
250 | 
251 |         </div>
252 |       </div>
253 | 
254 |     </section>
255 | 
256 |   </div>
257 |   
258 | 
259 | 
260 |   <script type="text/javascript">
261 |       jQuery(function () {
262 |           SphinxRtdTheme.Navigation.enable(true);
263 |       });
264 |   </script>
265 | 
266 |   
267 |   
268 |     
269 |    
270 | 
271 | </body>
272 | </html>


--------------------------------------------------------------------------------
/docs/source/datasets.rst:
--------------------------------------------------------------------------------
  1 | Datasets
  2 | ========
  3 | 
  4 | The datasets can be downloaded from the following links.
  5 | 
  6 | Note: the Ubuntu data is NOT the same as the previous Ubuntu dataset from `Lowe et. al (2015) <https://arxiv.org/abs/1506.08909>`. It is a new resource, described in the following paper::
  7 | 
  8 |   @Article{arxiv18disentangle,
  9 |     author    = {Jonathan K. Kummerfeld, Sai R. Gouravajhala, Joseph Peper, Vignesh Athreya, Chulaka Gunasekara, Jatin Ganhotra, Siva Sankalp Patel, Lazaros Polymenakos, and Walter S. Lasecki},
 10 |     title     = {Analyzing Assumptions in Conversation Disentanglement Research Through the Lens of a New Dataset and Model},
 11 |     journal   = {ArXiv e-prints},
 12 |     archivePrefix = {arXiv},
 13 |     eprint    = {1810.11118},
 14 |     primaryClass = {cs.CL},
 15 |     year      = {2018},
 16 |     month     = {October},
 17 |     url       = {https://arxiv.org/pdf/1810.11118.pdf},
 18 |   }
 19 | 
 20 | Training and Validation
 21 | -----------------------
 22 | 
 23 |     +----------+----------------------+--------------------------+---------------------+
 24 |     | Sub-Task | Training             | Validation               | Other               |
 25 |     +==========+======================+==========================+=====================+
 26 |     | 1        | Ubuntu_st1_train_    | Ubuntu_st1_validation_   | None                |
 27 |     |          |                      |                          |                     |
 28 |     |          | Advising_st1_train_  | Advising_st1_validation_ |                     |
 29 |     +----------+----------------------+--------------------------+---------------------+
 30 |     | 2        | Ubuntu_st2_train_    | Ubuntu_st2_validation_   | Candidate_pool_     |
 31 |     |          |                      |                          |                     |
 32 |     +----------+----------------------+--------------------------+---------------------+
 33 |     | 3        |                      |                          | None                |
 34 |     |          | Advising_st3_train_  | Advising_st3_validation_ |                     |
 35 |     +----------+----------------------+--------------------------+---------------------+
 36 |     | 4        | Ubuntu_st4_train_    | Ubuntu_st4_validation_   | None                |
 37 |     |          |                      |                          |                     |
 38 |     |          | Advising_st4_train_  | Advising_st4_validation_ |                     |
 39 |     +----------+----------------------+--------------------------+---------------------+
 40 |     | 5        | Same as subtask 1    | Same as subtask 1        | Linux_manpages_     |
 41 |     |          |                      |                          |                     |
 42 |     |          |                      |                          | Course_information_ |
 43 |     +----------+----------------------+--------------------------+---------------------+
 44 | 
 45 | .. _Ubuntu_st1_train: https://ibm.box.com/s/fsk885se8ieoape46uzk7ylhx1097kk9
 46 | .. _Advising_st1_train: https://ibm.box.com/s/sb5wloejbsbhrpfws0yuj1wbb28you2w
 47 | .. _Ubuntu_st1_validation: https://ibm.box.com/s/rqb6bocovby1jau112y5wq99tz1fffp2
 48 | .. _Advising_st1_validation: https://ibm.box.com/s/f53kcojriaqrj5taevtw3doaatq3sfjv
 49 | .. _Ubuntu_st2_train: https://ibm.box.com/s/i9o9gz37leycvxfqgdabh7478ep1dqo7
 50 | .. _Ubuntu_st2_validation: https://ibm.box.com/s/ha4lcw6cjcwq6wseq5qv0t6ogxat2fhl
 51 | .. _Candidate_pool: https://ibm.box.com/s/uyzbhvt6zuowg120qzin099fbcijc2bp
 52 | .. _Advising_st3_train: https://ibm.box.com/s/kfev11bqpsvhwl8u2ko4fxb11kl9satq
 53 | .. _Advising_st3_validation: https://ibm.box.com/s/vhwmnt0kg1j1vx1j5wijez67mhjxjlnc
 54 | .. _Ubuntu_st4_train: https://ibm.box.com/s/ss7vaagg83qsycjv38bce6i8wsze8p9k
 55 | .. _Advising_st4_train: https://ibm.box.com/s/4p31ja8p83fehes0f6cuakr2wbdd4px9
 56 | .. _Ubuntu_st4_validation: https://ibm.box.com/s/6jmxiavc50achlr7k4g5i5lgyspcsqbg
 57 | .. _Advising_st4_validation: https://ibm.box.com/s/6jq99o1cz9m3env319s6e02ibtwksc1b
 58 | .. _Linux_manpages: https://ibm.box.com/s/7ro3t72tp0rcnggq5cgq9hq80fvh5pkh
 59 | .. _Course_information: https://ibm.box.com/s/lslz39r951fys52qqa3enl0ccods5lus
 60 | 
 61 | Additionally, for the Advising data, we are providing a form of the data with the original dialogs and their paraphrases before remixing. This can be used for training in any subtask, and can be downloaded here_. 
 62 | The global candidate pool for the sub-task 2, should be shared across training, validation and test datasets for sub-task 2.
 63 | 
 64 | .. _here: https://ibm.box.com/s/qh9gbkjo8pg8uph3vysv9fjhp18407fx
 65 | 
 66 | Test
 67 | ----
 68 |     +----------+---------------------------+
 69 |     | Sub-Task | Test                      |
 70 |     +==========+===========================+
 71 |     | 1        | Ubuntu_st1_test_          |
 72 |     |          |                           |
 73 |     |          | Advising_st1_case1_test_  |
 74 |     |          |                           |
 75 |     |          | Advising_st1_case2_test_  |
 76 |     +----------+---------------------------+
 77 |     | 2        | Ubuntu_st2_test_          |
 78 |     |          |                           |
 79 |     |          |                           |
 80 |     +----------+---------------------------+
 81 |     | 3        |                           |
 82 |     |          | Advising_st3_case1_test_  |
 83 |     |          |                           |
 84 |     |          | Advising_st3_case2_test_  |
 85 |     +----------+---------------------------+
 86 |     | 4        | Ubuntu_st4_test_          |
 87 |     |          |                           |
 88 |     |          | Advising_st4_case1_test_  |
 89 |     |          |                           |
 90 |     |          | Advising_st4_case2_test_  |
 91 |     +----------+---------------------------+
 92 |     | 5        | Same as subtask 1         |
 93 |     |          |                           |
 94 |     |          |                           |
 95 |     +----------+---------------------------+
 96 | 
 97 | .. _Ubuntu_st1_test: https://ibm.box.com/s/lerplhwcm7n6nbsnywhku5m8kckxq90n
 98 | .. _Advising_st1_case1_test: https://ibm.box.com/s/bw6wj2lbt2g9alarnsoj8myckexij1s2
 99 | .. _Advising_st1_case2_test: https://ibm.box.com/s/9vkmus89gn459th9l8mtiarj1yj3jtyz
100 | .. _Ubuntu_st2_test: https://ibm.box.com/s/pw3v5nz152yr75d9dfcvsldpo5kfpvuu
101 | .. _Advising_st3_case1_test: https://ibm.box.com/s/cip5j31lptl8ih2cy0o1kj6mtljdihp0
102 | .. _Advising_st3_case2_test: https://ibm.box.com/s/tteqjsflzm5venqezv1ba6hwv6qh5msv
103 | .. _Ubuntu_st4_test: https://ibm.box.com/s/2socex1jk1h9vw6ni1l8v868vbp6og1j
104 | .. _Advising_st4_case1_test: https://ibm.box.com/s/mdznqlga1g6i4j7knq0opkmf7m7m7plr
105 | .. _Advising_st4_case2_test: https://ibm.box.com/s/5tqmwio1j59i04emix83y6ro4dwvgd0g
106 | 
107 | 
108 | Ground truth for test datasets
109 | ------------------------------
110 |     +----------+-----------------------------------+
111 |     | Sub-Task | Test                              |
112 |     +==========+===================================+
113 |     | 1        | Ubuntu_st1_ground_truth_          |
114 |     |          |                                   |
115 |     |          | Advising_st1_case1_ground_truth_  |
116 |     |          |                                   |
117 |     |          | Advising_st1_case2_ground_truth_  |
118 |     +----------+-----------------------------------+
119 |     | 2        | Ubuntu_st2_ground_truth_          |
120 |     |          |                                   |
121 |     |          |                                   |
122 |     +----------+-----------------------------------+
123 |     | 3        |                                   |
124 |     |          | Advising_st3_case1_ground_truth_  |
125 |     |          |                                   |
126 |     |          | Advising_st3_case2_ground_truth_  |
127 |     +----------+-----------------------------------+
128 |     | 4        | Ubuntu_st4_ground_truth_          |
129 |     |          |                                   |
130 |     |          | Advising_st4_case1_ground_truth_  |
131 |     |          |                                   |
132 |     |          | Advising_st4_case2_ground_truth_  |
133 |     +----------+-----------------------------------+
134 |     | 5        | Same as subtask 1                 |
135 |     |          |                                   |
136 |     |          |                                   |
137 |     +----------+-----------------------------------+
138 | 
139 | .. _Ubuntu_st1_ground_truth: https://ibm.box.com/s/xjf30dirjql3t5y2zuhaytr6x9pr6soa
140 | .. _Advising_st1_case1_ground_truth: https://ibm.box.com/s/gtogma9n6torzupv1g6g01c9kmuzkc4f
141 | .. _Advising_st1_case2_ground_truth: https://ibm.box.com/s/7ay1kjeqp25laspho7egiwhea61r2xky
142 | .. _Ubuntu_st2_ground_truth: https://ibm.box.com/s/f7so3abgcdt7afvr17mmyzswotkxa3my
143 | .. _Advising_st3_case1_ground_truth: https://ibm.box.com/s/5f0rh7vnqpgyq3oa7kwstin8nlhv80qa
144 | .. _Advising_st3_case2_ground_truth: https://ibm.box.com/s/3yqc61kkmxjid0cg4fw8uz1s0a226voo
145 | .. _Ubuntu_st4_ground_truth: https://ibm.box.com/s/w6gs5g5j0ea2069pq9p1ipqs2imchfxt
146 | .. _Advising_st4_case1_ground_truth: https://ibm.box.com/s/s4cd0et6bx20eusn20ko5azvpn9utp68
147 | .. _Advising_st4_case2_ground_truth: https://ibm.box.com/s/zrzlvpds1ekfsq4oznmxry0yfwyjuhcl
148 | 


--------------------------------------------------------------------------------
/noesis/networks/dual_encoder.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch
  3 | import logging
  4 | 
  5 | 
  6 | class Base(nn.Module):
  7 |     r"""
  8 |     Applies a multi-layer RNN to an input sequence.
  9 | 
 10 |     Warning:
 11 |         Do not use this class directly, use one of the sub classes.
 12 | 
 13 |     Args:
 14 |         vocab_size (int): size of the vocabulary
 15 |         max_len (int): maximum allowed length for the sequence to be processed
 16 |         hidden_size (int): number of features in the hidden state `h`
 17 |         input_dropout_p (float): dropout probability for the input sequence
 18 |         dropout_p (float): dropout probability for the output sequence
 19 |         n_layers (int): number of recurrent layers
 20 |         rnn_cell (str): type of RNN cell (Eg. 'LSTM' , 'GRU')
 21 | 
 22 |     Inputs: ``*args``, ``**kwargs``
 23 |         - ``*args``: variable length argument list.
 24 |         - ``**kwargs``: arbitrary keyword arguments.
 25 | 
 26 |     """
 27 | 
 28 |     def __init__(self, vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell_type):
 29 |         super(Base, self).__init__()
 30 |         self.vocab_size = vocab_size
 31 |         self.max_len = max_len
 32 |         self.hidden_size = hidden_size
 33 |         self.n_layers = n_layers
 34 |         self.input_dropout_p = input_dropout_p
 35 |         self.input_dropout = nn.Dropout(p=input_dropout_p)
 36 |         self.rnn_cell_type = rnn_cell_type.lower()
 37 |         if self.rnn_cell_type == 'lstm':
 38 |             self.rnn_cell = nn.LSTM
 39 |         elif self.rnn_cell_type == 'gru':
 40 |             self.rnn_cell = nn.GRU
 41 |         else:
 42 |             raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell_type))
 43 | 
 44 |         self.dropout_p = dropout_p
 45 | 
 46 |         self.logger = logging.getLogger(__name__)
 47 | 
 48 |     def forward(self, *args, **kwargs):
 49 |         raise NotImplementedError()
 50 | 
 51 | 
 52 | class Encoder(Base):
 53 |     r"""
 54 |     Applies a multi-layer RNN to an input sequence.
 55 | 
 56 |     Args:
 57 |         vocab_size (int): size of the vocabulary
 58 |         max_len (int): a maximum allowed length for the sequence to be processed
 59 |         hidden_size (int): the number of features in the hidden state `h`
 60 |         input_dropout_p (float, optional): dropout probability for the input sequence (default: 0)
 61 |         dropout_p (float, optional): dropout probability for the output sequence (default: 0)
 62 |         n_layers (int, optional): number of recurrent layers (default: 1)
 63 |         bidirectional (bool, optional): if True, becomes a bidirectional encodr (defulat False)
 64 |         rnn_cell (str, optional): type of RNN cell (default: gru)
 65 |         variable_lengths (bool, optional): if use variable length RNN (default: False)
 66 |         embedding (torch.Tensor, optional): Pre-trained embedding.  The size of the tensor has to match
 67 |             the size of the embedding parameter: (vocab_size, hidden_size).  The embedding layer would be initialized
 68 |             with the tensor if provided (default: None).
 69 |         update_embedding (bool, optional): If the embedding should be updated during training (default: False).
 70 | 
 71 |     Inputs: inputs, input_lengths
 72 |         - **inputs**: list of sequences, whose length is the batch size and within which each sequence is a list of token IDs.
 73 |         - **input_lengths** (list of int, optional): list that contains the lengths of sequences in the mini-batch, it must be provided when using variable length RNN (default: `None`)
 74 | 
 75 |     Outputs: output, hidden
 76 |         - **output** (batch, seq_len, hidden_size): tensor containing the encoded features of the input sequence
 77 |         - **hidden** (num_layers * num_directions, batch, hidden_size): tensor containing the features in the hidden state `h`
 78 | 
 79 |     Examples::
 80 |          >>> encoder = EncoderRNN(input_vocab, max_seq_length, hidden_size)
 81 |          >>> output, hidden = encoder(input)
 82 |     """
 83 | 
 84 |     def __init__(self, vocab_size, max_len, hidden_size,
 85 |                  input_dropout_p=0, dropout_p=0,
 86 |                  n_layers=1, bidirectional=False, rnn_cell_type='gru', variable_lengths=False,
 87 |                  embedding=None, update_embedding=True):
 88 |         super(Encoder, self).__init__(vocab_size, max_len, hidden_size,
 89 |                                       input_dropout_p, dropout_p, n_layers, rnn_cell_type)
 90 | 
 91 |         self.bidirectional = bidirectional
 92 |         self.variable_lengths = variable_lengths
 93 |         self.embedding = nn.Embedding(vocab_size, hidden_size)
 94 |         if embedding is not None:
 95 |             self.embedding.weight = nn.Parameter(embedding)
 96 |         self.embedding.weight.requires_grad = update_embedding
 97 |         self.rnn = self.rnn_cell(hidden_size, hidden_size, n_layers,
 98 |                                  batch_first=True, bidirectional=self.bidirectional, dropout=dropout_p)
 99 | 
100 |     def forward(self, input_var, input_lengths=None):
101 |         r"""
102 |         Applies a multi-layer RNN to an input sequence.
103 | 
104 |         Args:
105 |             input_var (batch, seq_len): tensor containing the features of the input sequence.
106 |             input_lengths (list of int, optional): A list that contains the lengths of sequences
107 |               in the mini-batch
108 | 
109 |         Returns: output, hidden
110 |             - **output** (batch, seq_len, hidden_size): variable containing the encoded features of the input sequence
111 |             - **hidden** (num_layers * num_directions, batch, hidden_size): variable containing the features in the hidden state h
112 |         """
113 |         embedded = self.embedding(input_var)
114 |         embedded = self.input_dropout(embedded)
115 |         if self.variable_lengths:
116 |             embedded = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths, batch_first=True)
117 |         output, hidden = self.rnn(embedded)
118 |         if self.variable_lengths:
119 |             output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
120 |         return output, hidden
121 | 
122 | 
123 | class DualEncoder(nn.Module):
124 |     r""""
125 |     Applies dual encoder architecture.
126 | 
127 |     Args:
128 |         context (noesis.networks.dual_encoder.Encoder): encoder RNN for context information
129 |         response (noesis.networks.dual_encoder.Encoder): encoder RNN for responses information
130 | 
131 |     Inputs: context_var, responses_var
132 |         - **context_var** : a tensor containing context information
133 |         - **responses_var** : a tensor containing responses per context information
134 | 
135 |     Outputs: output
136 |         - **output** (batch, num_responses): tensor containing scaled probabilities of responses
137 | 
138 |     Examples::
139 |          >>> dual_encoder = DualEncoder(ctx_encoder, resp_encoder)
140 |          >>> output = dual_encoder(ctx_variable, resp_variable)
141 |     """
142 |     def __init__(self, context, response, use_output=False):
143 |         super(DualEncoder, self).__init__()
144 |         self.context = context
145 |         self.response = response
146 |         self.use_output = use_output
147 | 
148 |         c_h = context.hidden_size
149 |         r_h = response.hidden_size
150 | 
151 |         if self.context.bidirectional:
152 |             c_h = 2 * c_h
153 | 
154 |         if self.response.bidirectional:
155 |             r_h = 2 * r_h
156 | 
157 |         if torch.cuda.is_available():
158 |             self.M = torch.randn([c_h, r_h], requires_grad=True).cuda()
159 |         else:
160 |             self.M = torch.randn([c_h, r_h], requires_grad=True)
161 |         self.final_layer = nn.Softmax()
162 | 
163 | 
164 |     def forward(self, context_var, responses_var, context_lengths_var, responses_lengths_var):
165 |         r"""
166 |         Applies a multi-layer RNN to an input sequence.
167 | 
168 |         Args:
169 |             context_var (batch, seq_len): tensor containing the features of the context sequence.
170 |             responses_var (batch, num_responses, seq_len): tensor containing the features of the responses sequence.
171 | 
172 |         Returns: output
173 |             - **output** (batch, num_responses): variable containing the scaled probabilities over responses
174 |         """
175 |         batch, num_resp, seq_len = responses_var.size()
176 | 
177 |         if self.context.rnn_cell_type == 'gru':
178 |             c, h_c = self.context(context_var, context_lengths_var)
179 |         elif self.context.rnn_cell_type == 'lstm':
180 |             c, (h_c, _) = self.context(context_var, context_lengths_var)
181 | 
182 |         if self.response.rnn_cell_type == 'gru':
183 |             r, h_r = self.response(responses_var.reshape([-1, seq_len]), responses_lengths_var.reshape([-1]))
184 |         elif self.response.rnn_cell_type == 'lstm':
185 |             r, (h_r, _) = self.response(responses_var.reshape([-1, seq_len]), responses_lengths_var.reshape([-1]))
186 | 
187 |         # unscaled log probabilities
188 |         if self.use_output:
189 |             f_c = c.gather(1, context_lengths_var.view(-1, 1, 1).expand(c.size(0), 1, c.size(2)) - 1)
190 |             f_r = r.gather(1, responses_lengths_var.view(-1, 1, 1).expand(r.size(0), 1, r.size(2)) - 1).squeeze(1)
191 |             logits = torch.matmul(torch.matmul(f_c, self.M), f_r.reshape([batch, num_resp, -1]).transpose(1, 2)).squeeze(1)
192 |         else:
193 |             logits = torch.matmul(torch.matmul(h_c.view(self.context.n_layers, 1, -1), self.M),
194 |                                   h_r.view(self.response.n_layers, num_resp, -1).transpose(1, 2)).squeeze()
195 | 
196 |         output = self.final_layer(logits)
197 |         return output
198 | 


--------------------------------------------------------------------------------
/docs/public/_static/doctools.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * doctools.js
  3 |  * ~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx JavaScript utilities for all documentation.
  6 |  *
  7 |  * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | /**
 13 |  * select a different prefix for underscore
 14 |  */
 15 | $u = _.noConflict();
 16 | 
 17 | /**
 18 |  * make the code below compatible with browsers without
 19 |  * an installed firebug like debugger
 20 | if (!window.console || !console.firebug) {
 21 |   var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
 22 |     "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
 23 |     "profile", "profileEnd"];
 24 |   window.console = {};
 25 |   for (var i = 0; i < names.length; ++i)
 26 |     window.console[names[i]] = function() {};
 27 | }
 28 |  */
 29 | 
 30 | /**
 31 |  * small helper function to urldecode strings
 32 |  */
 33 | jQuery.urldecode = function(x) {
 34 |   return decodeURIComponent(x).replace(/\+/g, ' ');
 35 | };
 36 | 
 37 | /**
 38 |  * small helper function to urlencode strings
 39 |  */
 40 | jQuery.urlencode = encodeURIComponent;
 41 | 
 42 | /**
 43 |  * This function returns the parsed url parameters of the
 44 |  * current request. Multiple values per key are supported,
 45 |  * it will always return arrays of strings for the value parts.
 46 |  */
 47 | jQuery.getQueryParameters = function(s) {
 48 |   if (typeof s === 'undefined')
 49 |     s = document.location.search;
 50 |   var parts = s.substr(s.indexOf('?') + 1).split('&');
 51 |   var result = {};
 52 |   for (var i = 0; i < parts.length; i++) {
 53 |     var tmp = parts[i].split('=', 2);
 54 |     var key = jQuery.urldecode(tmp[0]);
 55 |     var value = jQuery.urldecode(tmp[1]);
 56 |     if (key in result)
 57 |       result[key].push(value);
 58 |     else
 59 |       result[key] = [value];
 60 |   }
 61 |   return result;
 62 | };
 63 | 
 64 | /**
 65 |  * highlight a given string on a jquery object by wrapping it in
 66 |  * span elements with the given class name.
 67 |  */
 68 | jQuery.fn.highlightText = function(text, className) {
 69 |   function highlight(node, addItems) {
 70 |     if (node.nodeType === 3) {
 71 |       var val = node.nodeValue;
 72 |       var pos = val.toLowerCase().indexOf(text);
 73 |       if (pos >= 0 &&
 74 |           !jQuery(node.parentNode).hasClass(className) &&
 75 |           !jQuery(node.parentNode).hasClass("nohighlight")) {
 76 |         var span;
 77 |         var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
 78 |         if (isInSVG) {
 79 |           span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
 80 |         } else {
 81 |           span = document.createElement("span");
 82 |           span.className = className;
 83 |         }
 84 |         span.appendChild(document.createTextNode(val.substr(pos, text.length)));
 85 |         node.parentNode.insertBefore(span, node.parentNode.insertBefore(
 86 |           document.createTextNode(val.substr(pos + text.length)),
 87 |           node.nextSibling));
 88 |         node.nodeValue = val.substr(0, pos);
 89 |         if (isInSVG) {
 90 |           var bbox = span.getBBox();
 91 |           var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
 92 |        	  rect.x.baseVal.value = bbox.x;
 93 |           rect.y.baseVal.value = bbox.y;
 94 |           rect.width.baseVal.value = bbox.width;
 95 |           rect.height.baseVal.value = bbox.height;
 96 |           rect.setAttribute('class', className);
 97 |           var parentOfText = node.parentNode.parentNode;
 98 |           addItems.push({
 99 |               "parent": node.parentNode,
100 |               "target": rect});
101 |         }
102 |       }
103 |     }
104 |     else if (!jQuery(node).is("button, select, textarea")) {
105 |       jQuery.each(node.childNodes, function() {
106 |         highlight(this, addItems);
107 |       });
108 |     }
109 |   }
110 |   var addItems = [];
111 |   var result = this.each(function() {
112 |     highlight(this, addItems);
113 |   });
114 |   for (var i = 0; i < addItems.length; ++i) {
115 |     jQuery(addItems[i].parent).before(addItems[i].target);
116 |   }
117 |   return result;
118 | };
119 | 
120 | /*
121 |  * backward compatibility for jQuery.browser
122 |  * This will be supported until firefox bug is fixed.
123 |  */
124 | if (!jQuery.browser) {
125 |   jQuery.uaMatch = function(ua) {
126 |     ua = ua.toLowerCase();
127 | 
128 |     var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
129 |       /(webkit)[ \/]([\w.]+)/.exec(ua) ||
130 |       /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
131 |       /(msie) ([\w.]+)/.exec(ua) ||
132 |       ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
133 |       [];
134 | 
135 |     return {
136 |       browser: match[ 1 ] || "",
137 |       version: match[ 2 ] || "0"
138 |     };
139 |   };
140 |   jQuery.browser = {};
141 |   jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
142 | }
143 | 
144 | /**
145 |  * Small JavaScript module for the documentation.
146 |  */
147 | var Documentation = {
148 | 
149 |   init : function() {
150 |     this.fixFirefoxAnchorBug();
151 |     this.highlightSearchWords();
152 |     this.initIndexTable();
153 |     if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) {
154 |       this.initOnKeyListeners();
155 |     }
156 |   },
157 | 
158 |   /**
159 |    * i18n support
160 |    */
161 |   TRANSLATIONS : {},
162 |   PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; },
163 |   LOCALE : 'unknown',
164 | 
165 |   // gettext and ngettext don't access this so that the functions
166 |   // can safely bound to a different name (_ = Documentation.gettext)
167 |   gettext : function(string) {
168 |     var translated = Documentation.TRANSLATIONS[string];
169 |     if (typeof translated === 'undefined')
170 |       return string;
171 |     return (typeof translated === 'string') ? translated : translated[0];
172 |   },
173 | 
174 |   ngettext : function(singular, plural, n) {
175 |     var translated = Documentation.TRANSLATIONS[singular];
176 |     if (typeof translated === 'undefined')
177 |       return (n == 1) ? singular : plural;
178 |     return translated[Documentation.PLURALEXPR(n)];
179 |   },
180 | 
181 |   addTranslations : function(catalog) {
182 |     for (var key in catalog.messages)
183 |       this.TRANSLATIONS[key] = catalog.messages[key];
184 |     this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
185 |     this.LOCALE = catalog.locale;
186 |   },
187 | 
188 |   /**
189 |    * add context elements like header anchor links
190 |    */
191 |   addContextElements : function() {
192 |     $('div[id] > :header:first').each(function() {
193 |       $('<a class="headerlink">\u00B6</a>').
194 |       attr('href', '#' + this.id).
195 |       attr('title', _('Permalink to this headline')).
196 |       appendTo(this);
197 |     });
198 |     $('dt[id]').each(function() {
199 |       $('<a class="headerlink">\u00B6</a>').
200 |       attr('href', '#' + this.id).
201 |       attr('title', _('Permalink to this definition')).
202 |       appendTo(this);
203 |     });
204 |   },
205 | 
206 |   /**
207 |    * workaround a firefox stupidity
208 |    * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
209 |    */
210 |   fixFirefoxAnchorBug : function() {
211 |     if (document.location.hash && $.browser.mozilla)
212 |       window.setTimeout(function() {
213 |         document.location.href += '';
214 |       }, 10);
215 |   },
216 | 
217 |   /**
218 |    * highlight the search words provided in the url in the text
219 |    */
220 |   highlightSearchWords : function() {
221 |     var params = $.getQueryParameters();
222 |     var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
223 |     if (terms.length) {
224 |       var body = $('div.body');
225 |       if (!body.length) {
226 |         body = $('body');
227 |       }
228 |       window.setTimeout(function() {
229 |         $.each(terms, function() {
230 |           body.highlightText(this.toLowerCase(), 'highlighted');
231 |         });
232 |       }, 10);
233 |       $('<p class="highlight-link"><a href="javascript:Documentation.' +
234 |         'hideSearchWords()">' + _('Hide Search Matches') + '</a></p>')
235 |           .appendTo($('#searchbox'));
236 |     }
237 |   },
238 | 
239 |   /**
240 |    * init the domain index toggle buttons
241 |    */
242 |   initIndexTable : function() {
243 |     var togglers = $('img.toggler').click(function() {
244 |       var src = $(this).attr('src');
245 |       var idnum = $(this).attr('id').substr(7);
246 |       $('tr.cg-' + idnum).toggle();
247 |       if (src.substr(-9) === 'minus.png')
248 |         $(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
249 |       else
250 |         $(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
251 |     }).css('display', '');
252 |     if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
253 |         togglers.click();
254 |     }
255 |   },
256 | 
257 |   /**
258 |    * helper function to hide the search marks again
259 |    */
260 |   hideSearchWords : function() {
261 |     $('#searchbox .highlight-link').fadeOut(300);
262 |     $('span.highlighted').removeClass('highlighted');
263 |   },
264 | 
265 |   /**
266 |    * make the url absolute
267 |    */
268 |   makeURL : function(relativeURL) {
269 |     return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
270 |   },
271 | 
272 |   /**
273 |    * get the current relative url
274 |    */
275 |   getCurrentURL : function() {
276 |     var path = document.location.pathname;
277 |     var parts = path.split(/\//);
278 |     $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
279 |       if (this === '..')
280 |         parts.pop();
281 |     });
282 |     var url = parts.join('/');
283 |     return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
284 |   },
285 | 
286 |   initOnKeyListeners: function() {
287 |     $(document).keyup(function(event) {
288 |       var activeElementType = document.activeElement.tagName;
289 |       // don't navigate when in search box or textarea
290 |       if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') {
291 |         switch (event.keyCode) {
292 |           case 37: // left
293 |             var prevHref = $('link[rel="prev"]').prop('href');
294 |             if (prevHref) {
295 |               window.location.href = prevHref;
296 |               return false;
297 |             }
298 |           case 39: // right
299 |             var nextHref = $('link[rel="next"]').prop('href');
300 |             if (nextHref) {
301 |               window.location.href = nextHref;
302 |               return false;
303 |             }
304 |         }
305 |       }
306 |     });
307 |   }
308 | };
309 | 
310 | // quick alias for translations
311 | _ = Documentation.gettext;
312 | 
313 | $(document).ready(function() {
314 |   Documentation.init();
315 | });
316 | 


--------------------------------------------------------------------------------