├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── models
├── __init__.py
├── dual_encoder.py
└── helpers.py
├── notebooks
├── Data Exploration.ipynb
└── TFIDF Baseline Evaluation.ipynb
├── requirements.txt
├── scripts
└── prepare_data.py
├── udc_hparams.py
├── udc_inputs.py
├── udc_metrics.py
├── udc_model.py
├── udc_predict.py
├── udc_test.py
└── udc_train.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | data/test.csv filter=lfs diff=lfs merge=lfs -text
2 | data/train.csv filter=lfs diff=lfs merge=lfs -text
3 | data/valid.csv filter=lfs diff=lfs merge=lfs -text
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
3 | tmp/
4 | data/
5 | runs/
6 |
7 | # Created by https://www.gitignore.io/api/python,ipythonnotebook
8 |
9 | ### Python ###
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | env/
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *,cover
55 | .hypothesis/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 |
65 | # Flask instance folder
66 | instance/
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # IPython Notebook
78 | .ipynb_checkpoints
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # dotenv
87 | .env
88 |
89 | # virtualenv
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 |
96 | # Rope project settings
97 | .ropeproject
98 |
99 |
100 | ### IPythonNotebook ###
101 | # Temporary data
102 | .ipynb_checkpoints/
103 |
104 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [year] [fullname]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Retrieval-Based Conversational Model in Tensorflow (Ubuntu Dialog Corpus)
2 |
3 | #### [Please read the blog post for this code](http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow)
4 |
5 | #### Overview
6 |
7 | The code here implements the Dual LSTM Encoder model from [The Ubuntu Dialogue Corpus: A Large Dataset for Research in Unstructured Multi-Turn Dialogue Systems](http://arxiv.org/abs/1506.08909).
8 |
9 | #### Setup
10 |
11 | This code uses Python 3 and Tensorflow >= 0.9. Clone the repository and install all required packages:
12 |
13 | ```
14 | pip install -U pip
15 | pip install numpy scikit-learn pandas jupyter
16 | ```
17 |
18 | #### Get the Data
19 |
20 |
21 | Download the train/dev/test data [here](https://drive.google.com/open?id=0B_bZck-ksdkpVEtVc1R6Y01HMWM) and extract the acrhive into `./data`.
22 |
23 |
24 | #### Training
25 |
26 | ```
27 | python udc_train.py
28 | ```
29 |
30 |
31 | #### Evaluation
32 |
33 | ```
34 | python udc_test.py --model_dir=...
35 | ```
36 |
37 |
38 | #### Evaluation
39 |
40 | ```
41 | python udc_predict.py --model_dir=...
42 | ```
43 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dennybritz/chatbot-retrieval/aac42717f156665e3b8341f20514254733ad3eb4/models/__init__.py
--------------------------------------------------------------------------------
/models/dual_encoder.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from models import helpers
4 |
5 | FLAGS = tf.flags.FLAGS
6 |
7 | def get_embeddings(hparams):
8 | if hparams.glove_path and hparams.vocab_path:
9 | tf.logging.info("Loading Glove embeddings...")
10 | vocab_array, vocab_dict = helpers.load_vocab(hparams.vocab_path)
11 | glove_vectors, glove_dict = helpers.load_glove_vectors(hparams.glove_path, vocab=set(vocab_array))
12 | initializer = helpers.build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, hparams.embedding_dim)
13 | else:
14 | tf.logging.info("No glove/vocab path specificed, starting with random embeddings.")
15 | initializer = tf.random_uniform_initializer(-0.25, 0.25)
16 |
17 | return tf.get_variable(
18 | "word_embeddings",
19 | shape=[hparams.vocab_size, hparams.embedding_dim],
20 | initializer=initializer)
21 |
22 |
23 | def dual_encoder_model(
24 | hparams,
25 | mode,
26 | context,
27 | context_len,
28 | utterance,
29 | utterance_len,
30 | targets):
31 |
32 | # Initialize embedidngs randomly or with pre-trained vectors if available
33 | embeddings_W = get_embeddings(hparams)
34 |
35 | # Embed the context and the utterance
36 | context_embedded = tf.nn.embedding_lookup(
37 | embeddings_W, context, name="embed_context")
38 | utterance_embedded = tf.nn.embedding_lookup(
39 | embeddings_W, utterance, name="embed_utterance")
40 |
41 |
42 | # Build the RNN
43 | with tf.variable_scope("rnn") as vs:
44 | # We use an LSTM Cell
45 | cell = tf.nn.rnn_cell.LSTMCell(
46 | hparams.rnn_dim,
47 | forget_bias=2.0,
48 | use_peepholes=True,
49 | state_is_tuple=True)
50 |
51 | # Run the utterance and context through the RNN
52 | rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
53 | cell,
54 | tf.concat(0, [context_embedded, utterance_embedded]),
55 | sequence_length=tf.concat(0, [context_len, utterance_len]),
56 | dtype=tf.float32)
57 | encoding_context, encoding_utterance = tf.split(0, 2, rnn_states.h)
58 |
59 | with tf.variable_scope("prediction") as vs:
60 | M = tf.get_variable("M",
61 | shape=[hparams.rnn_dim, hparams.rnn_dim],
62 | initializer=tf.truncated_normal_initializer())
63 |
64 | # "Predict" a response: c * M
65 | generated_response = tf.matmul(encoding_context, M)
66 | generated_response = tf.expand_dims(generated_response, 2)
67 | encoding_utterance = tf.expand_dims(encoding_utterance, 2)
68 |
69 | # Dot product between generated response and actual response
70 | # (c * M) * r
71 | logits = tf.batch_matmul(generated_response, encoding_utterance, True)
72 | logits = tf.squeeze(logits, [2])
73 |
74 | # Apply sigmoid to convert logits to probabilities
75 | probs = tf.sigmoid(logits)
76 |
77 | if mode == tf.contrib.learn.ModeKeys.INFER:
78 | return probs, None
79 |
80 | # Calculate the binary cross-entropy loss
81 | losses = tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.to_float(targets))
82 |
83 | # Mean loss across the batch of examples
84 | mean_loss = tf.reduce_mean(losses, name="mean_loss")
85 | return probs, mean_loss
86 |
--------------------------------------------------------------------------------
/models/helpers.py:
--------------------------------------------------------------------------------
1 | import array
2 | import numpy as np
3 | import tensorflow as tf
4 | from collections import defaultdict
5 |
6 | def load_vocab(filename):
7 | vocab = None
8 | with open(filename) as f:
9 | vocab = f.read().splitlines()
10 | dct = defaultdict(int)
11 | for idx, word in enumerate(vocab):
12 | dct[word] = idx
13 | return [vocab, dct]
14 |
15 | def load_glove_vectors(filename, vocab):
16 | """
17 | Load glove vectors from a .txt file.
18 | Optionally limit the vocabulary to save memory. `vocab` should be a set.
19 | """
20 | dct = {}
21 | vectors = array.array('d')
22 | current_idx = 0
23 | with open(filename, "r", encoding="utf-8") as f:
24 | for _, line in enumerate(f):
25 | tokens = line.split(" ")
26 | word = tokens[0]
27 | entries = tokens[1:]
28 | if not vocab or word in vocab:
29 | dct[word] = current_idx
30 | vectors.extend(float(x) for x in entries)
31 | current_idx += 1
32 | word_dim = len(entries)
33 | num_vectors = len(dct)
34 | tf.logging.info("Found {} out of {} vectors in Glove".format(num_vectors, len(vocab)))
35 | return [np.array(vectors).reshape(num_vectors, word_dim), dct]
36 |
37 |
38 | def build_initial_embedding_matrix(vocab_dict, glove_dict, glove_vectors, embedding_dim):
39 | initial_embeddings = np.random.uniform(-0.25, 0.25, (len(vocab_dict), embedding_dim)).astype("float32")
40 | for word, glove_word_idx in glove_dict.items():
41 | word_idx = vocab_dict.get(word)
42 | initial_embeddings[word_idx, :] = glove_vectors[glove_word_idx]
43 | return initial_embeddings
44 |
--------------------------------------------------------------------------------
/notebooks/Data Exploration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%matplotlib inline\n",
12 | "\n",
13 | "import pandas as pd\n",
14 | "import matplotlib.pyplot as plt\n",
15 | "import matplotlib\n",
16 | "matplotlib.style.use('ggplot')"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "# Load Data\n",
28 | "train_df = pd.read_csv(\"../data/train.csv\")\n",
29 | "train_df.Label = train_df.Label.astype('category')\n",
30 | "\n",
31 | "test_df = pd.read_csv(\"../data/test.csv\")\n",
32 | "validation_df = pd.read_csv(\"../data/valid.csv\")"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 3,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [
42 | {
43 | "data": {
44 | "text/html": [
45 | "
\n",
46 | "
\n",
47 | " \n",
48 | " \n",
49 | " | \n",
50 | " Context | \n",
51 | " Utterance | \n",
52 | " Label | \n",
53 | "
\n",
54 | " \n",
55 | " \n",
56 | " \n",
57 | " count | \n",
58 | " 1000000 | \n",
59 | " 1000000 | \n",
60 | " 1000000 | \n",
61 | "
\n",
62 | " \n",
63 | " unique | \n",
64 | " 957097 | \n",
65 | " 736145 | \n",
66 | " 2 | \n",
67 | "
\n",
68 | " \n",
69 | " top | \n",
70 | " ! op __eou__ __eot__ ? __eou__ __eot__ | \n",
71 | " thank __eou__ | \n",
72 | " 0 | \n",
73 | "
\n",
74 | " \n",
75 | " freq | \n",
76 | " 15 | \n",
77 | " 12426 | \n",
78 | " 500127 | \n",
79 | "
\n",
80 | " \n",
81 | "
\n",
82 | "
"
83 | ],
84 | "text/plain": [
85 | " Context Utterance Label\n",
86 | "count 1000000 1000000 1000000\n",
87 | "unique 957097 736145 2\n",
88 | "top ! op __eou__ __eot__ ? __eou__ __eot__ thank __eou__ 0\n",
89 | "freq 15 12426 500127"
90 | ]
91 | },
92 | "execution_count": 3,
93 | "metadata": {},
94 | "output_type": "execute_result"
95 | }
96 | ],
97 | "source": [
98 | "train_df.describe()"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 4,
104 | "metadata": {
105 | "collapsed": false
106 | },
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/plain": [
111 | ""
112 | ]
113 | },
114 | "execution_count": 4,
115 | "metadata": {},
116 | "output_type": "execute_result"
117 | },
118 | {
119 | "data": {
120 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEPCAYAAACHuClZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9YlHW+//HnzJi6w4xOg5CYaSl6alHQI16pndT8UVdZ\n53LVpV+nc/DHubLW1ebql7Tt5betZPslomi6i53tLKda04On67SnTiXoWpSQokBZIuVJk5SZIEYw\ngbm/f3hxI2ExcIOD9XpcFxfMPffnnve8wfs1n/u+Z7QZhmEgIiJigT3SBYiIyIVPYSIiIpYpTERE\nxDKFiYiIWKYwERERyxQmIiJimcJEutwnn3yC3W5nz549HRoXFxfHqlWruqmqrrVhwwbcbneP2U6z\n22+/nX/8x3/ssu2dbePGjfTr18+83dW1f1daWhqJiYndtn3pWgqTnyC73Y7D4cBut5/za9iwYZa2\nP3LkSCorKxkzZkyHxpWWlnLvvfdaeuxwWQ0um82GzWazXEc427ntttvM31nv3r2JjY1lypQpZGRk\ncOrUqVbr/uEPfyAnJyesx25qasJut7N58+aw1k9NTaWioqJDtYfjnXfewW63c/z48VbLf/vb37Jj\nxw7L25fzo1ekC5Dzr7Ky0vz53XffZd68eezdu5eBAwcC4HA4zjmuoaGBiy66qN3t22w2YmNjO1xX\ndHR0h8f8FNhsNmbOnElOTg5NTU1UVVWxc+dOnnzySTZt2sTf/vY3Lr74YoAOzRQ68n7lpqYm+vTp\nQ58+fTpcfzh12Gy2NvU4nU6cTmeXP550D81MfoJiY2PNL6/XC8CAAQPMZc079bi4OH73u99x9913\nEx0dzfXXXw/Ac889R1JSEi6Xi0svvZS77rqLEydOmNv/7mGu5tu5ubncdNNNREVFMWLECF5++eVW\ndX13thAXF8fKlStZsmQJF198MXFxcSxfvrzVmLq6OhYsWED//v2JiYnh/vvv58EHH2T06NGWevTQ\nQw9x1VVXERUVxdChQ1m6dCknT55ss97//M//8POf/5yf/exnXHPNNXz00Uet7v/ggw+YMWMGLpeL\ngQMHcuutt3L06NEO19OnTx9iYmIYOHAgo0aN4t5776WgoIAvvviCRx991Fzvu4e59u/fz8yZM/F4\nPLhcLkaNGsWrr74KwGWXXYbNZuP222/HbrebO+7mw1f/+7//y5gxY+jTpw+7du1i48aN5wyrH+rB\nuQ6FHTp0CLvdzu7du/nkk0/Mv6u4uDjsdjs33XQTAMuXL2/ze8zOzubKK6+kT58+DBkyhMcee6zV\n/RMnTmTJkiWsWLGCSy65hAEDBrBo0aI2MzjpegoT+UGrVq3iiiuuYPfu3WzcuBE4c5gsMzOTsrIy\ntmzZwsGDB/nnf/7nVuPOdfhj+fLl3H333ZSUlDB79mxSU1P5v//7v3Yff/jw4RQVFZGRkcGzzz7b\nKoTuu+8+3nrrLTZv3sy7776LzWYjOzvb8uEXt9vNCy+8wMcff8ymTZt44403eOCBB1qtc+rUKVas\nWMGmTZvYvXs3LpeLm2++mcbGRgCKi4uZNm0aM2bMoLi4mLfeeouGhgZuuOEGmpqaLNUHMGTIEG69\n9Va2bNnyvev88pe/ZMiQIezevZvS0lKeeeYZ87zH3r17MQyDjRs3UllZyeHDh4Ezv7vm55aVlcWB\nAwfMcxff7Wt7Pfi+Q2HNy0aOHGkeZistLaWystL8/X537NatW7nnnntYvHgxH330EU8//TQZGRmk\np6e32vZLL73E6dOn2bVrFzk5Obz66qtkZGSE31jpHEN+0vLz8w273W4cPXq0zX0DBw40brnllna3\n8d577xl2u90IBAKGYRjGgQMHDJvNZnz44Yetbm/YsMEcc/r0aaNPnz7Gv//7v7d6vOeee67V7dtu\nu63VY1133XXGggULDMMwjK+//tq46KKLjJdffrnVOmPGjDFGjx79gzV/97Ha8/LLLxv9+vUzb2/Y\nsMGw2+1GQUGBuezEiRNG3759jZdeeskwDMO47bbbjPnz57faTjAYNC666CLjzTffNLfjdrt/8LFv\nu+227/09rF692rDb7UZtbe051+3bt6/xl7/85ZxjGxsbDZvN1ub+5ufW/Ps7e/nZtYbTg3M9v/Ly\ncsNmsxkffPCBYRiG8fbbbxt2u9346quvWq23fPnyVr/H8ePHG6mpqa3Weeqpp1r9XiZMmGBcffXV\nrdaZP3++MW3atHP2QLqOZibyg8aPH99m2dtvv83111/PkCFD6NevHzNnzgQwX9l+n6SkJPPniy66\niAEDBvDVV1+FPQZg0KBB5phPP/2UpqYmrr766lbrTJw48Qe3GY6//OUvXHvttQwaNAi3282CBQsI\nBoNUV1eb69jt9lb9GTBgACNGjKCsrAyAwsJCXn75Zdxut/k1cOBAQqEQBw8etFwjtJz3+L6Z2AMP\nPMA//dM/MX36dB5//HH2798f1nYdDgdjx45td732etCVPvroI6699tpWy6ZMmUIwGGz1t/fdCz/O\n/puR7qMwkR8UFRXV6vahQ4e45ZZbuOqqq9i8eTMffviheQz+9OnTP7it3r17t7pts9kIhUKWx3TF\nFUVn27lzJ3feeSc33HADr732Gnv37mXNmjVA2+f4Q48dCoVYtGgR+/fvZ9++febXp59+yl133dUl\ntZaWlhITE9Pm99Ts8ccf58CBA8ydO5fi4mLGjx/Pk08+2e52+/btG3Zff2g9u93e5sR6Q0NDWNvt\nrM78nYl1ChPpkA8++IDGxkYyMjKYMGECI0aM4NixY23W++4Opqt3+HDmeLvD4aCgoKDV8vfff9/S\ndnft2sVll13Go48+SnJyMvHx8ec8txMKhdi9e7d5+8SJExw8eJCEhAQAkpOT2b9/P1dccQXDhg1r\n9XX2+zU66/Dhw7z66qv88pe//MH1hg0bxr333svWrVt55JFH2LBhA3Bm9uFwOCydv2mvB7GxsdTV\n1VFbW2uu8+GHH7b6e2je+bdXx89//nN27tzZall+fj5ut5uhQ4d2+jlI11CYSIcuER05ciShUIhV\nq1bx+eefs3XrVn7/+9+3u82OPEa4PB4P8+fP5+GHH+aNN97g008/5aGHHqKioiKs8Pryyy9bzRj2\n7dvHl19+yd/93d9x9OhRcnJy+Oyzz3jhhRfYtGlTm/EOh4P77ruP9957j/3793PXXXdxySWXMG/e\nPAAeffRR9uzZw4IFC/jwww/57LPPeOedd/j1r3/Nl19+2aHn+u233/LVV19x7NgxSktLWbduHZMm\nTeLyyy/nd7/73TnHfP311yxbtoz8/HwOHz7Mhx9+yFtvvWXu6AGGDh3K9u3bqaysJBAIdKim7+vB\nwIEDzR5MmjSJn/3sZzz88MMcOnSI119/vc0J88svvxyA119/nRMnTrQKnrOlpaXx0ksvsWrVKsrL\ny3nppZdIT09vc4WfRIbCRL53x3uu5cnJyaxatYo1a9aQkJDAunXrWL16dbtjf+iKno6M+a7Vq1cz\nc+ZMbr31ViZNmsTp06e544476Nu3b7tjMzIy+Pu///tWX88++yxz587l/vvv5/777ycxMZHXXnuN\np59+us34vn37smLFChYsWMDVV19NMBjk9ddfN9+Lk5iYyK5du6iqqmLmzJnmJb1NTU0dnpm8/fbb\nDBo0iKFDhzJt2jReffVVHnzwQXbv3m2+x+S7evfuzfHjx1mwYAFXXnklN998M8OGDePFF18018nM\nzGTXrl0MHTqUwYMHd6im7+vBf//3f5s9iImJ4aWXXiIvL4/ExESeeeYZnnnmmVbbuOyyy3j88cf5\nf//v/xEXF8ett956zsf6xS9+wYYNG/jjH//IqFGjSEtL4/77728VJt0xA5bw2IwwXjLW1dWxYcMG\nvvjiC2w2G/fccw9xcXGsXr2aEydOEBsbi8/nM69Tz83NJS8vD4fDQWpqqnkStaKigvXr19PQ0MDY\nsWNJTU0FoLGxkaysLCoqKnC73fh8PgYMGACcmcbm5uYCMGfOHKZMmdIdfZAfkWuuuYZhw4bx5z//\nOdKliPx0hHPJV1ZWlrF9+3bDMM5cTnjy5Enjz3/+s7Ft2zbDMAwjNzfXyMnJMQzDML744gvjwQcf\nNBobG42vvvrKWLJkiREKhQzDMIy0tDTj4MGDhmEYxsqVK429e/cahmEYb775pvHHP/7RMAzDePfd\nd42MjAzDMAyjtrbWWLJkiXHy5EkjGAyaP7entLQ0nKf1k/Bj78XevXuNnJwc4+DBg8b+/fuN++67\nz7Db7caOHTvarPtj70VHqBct1IsWVnrR7mGuuro6Dhw4wHXXXQecOUbqdDopKioyZwlTp06lsLAQ\ngKKiIiZNmoTD4SA2Npa4uDjKy8uprq6mvr6e+Ph4ACZPnmyOKSwsNLc1YcIESktLAdi3bx+JiYk4\nnU6ioqJITEykuLi43YDsjssSL1Q/hV6sWbOG5ORkrr32Wt5//33++te/Mnny5Dbr/RR6ES71ooV6\n0cJKL9r9bK7jx4/jdrtZv349hw8fZtiwYaSmplJTU4PH4wHOnAitqakBIBAIMHLkSHO81+slEAjg\ncDhaffZSdHS0ecIvEAiY9zV/rEMwGGy1/OxtiTQbM2YMH3zwQaTLEPnJa3dmEgqF+Oyzz7jhhht4\n6qmn6NOnD9u2bWuzXlee+DK64cofERHpPu3OTLxeL9HR0QwfPhw4cxhq27ZteDweqqurze/9+/c3\n16+qqjLH+/1+vF4vXq8Xv9/fZnnzmObboVCI+vp6XC4XXq+31bTL7/czatSoNjWWlZW1Wi8lJaWj\nffjRUi9aqBct1IsW6kWLlJSUVv8lQUJCQqtLyX9Iu2Hi8XiIjo7myy+/ZNCgQZSUlDB48GAGDx5M\nfn4+s2fPJj8/n+TkZODMpaNr1qzh5ptvJhAIUFlZSXx8PDabDafTSXl5OcOHD2fnzp3ceOON5pgd\nO3YwYsQICgoKzMBISkrilVdeoa6ujlAoRElJCXfeeWebGs/1hDt6Hf+Pldvt/t7r9n9q1IsW6kUL\n9aLFoEGDOh2uYV0a/Pnnn7Nx40YaGxu55JJLuPfeewmFQmRkZFBVVUVMTAw+n8/8SIfc3Fy2b99O\nr1692lwavG7dOvPS4Pnz5wNnPl5h7dq1fP7557jdbpYtW2b+fxj5+fn853/+JzabrUOXBitMztA/\nlBbqRQv1ooV60WLQoEGdHhtWmFyIFCZn6B9KC/WihXrRQr1oYSVM9A54ERGxTGEiIiKWKUxERMQy\nhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsU\nJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOY\niIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFjWK5yVfvWrX+F0OrHZbDgcDtLT0wkGg6xevZoTJ04Q\nGxuLz+fD6XQCkJubS15eHg6Hg9TUVJKSkgCoqKhg/fr1NDQ0MHbsWFJTUwFobGwkKyuLiooK3G43\nPp+PAQMGAJCfn09ubi4Ac+bMYcqUKV3dAxERsSismYnNZmPFihU8/fTTpKenA7Bt2zZGjx5NZmYm\nCQkJ5g7/yJEjFBQUkJGRQVpaGtnZ2RiGAUB2djaLFy8mMzOTY8eOUVxcDMD27dtxuVysWbOGWbNm\nkZOTA0AwGGTr1q2kp6ezcuVKtmzZQl1dXZc3QURErAkrTAzDMAOhWVFRkTlLmDp1KoWFhebySZMm\n4XA4iI2NJS4ujvLycqqrq6mvryc+Ph6AyZMnm2MKCwvNbU2YMIHS0lIA9u3bR2JiIk6nk6ioKBIT\nE80AEhGRniOsw1w2m40nnngCu93OjBkzmD59OjU1NXg8HgA8Hg81NTUABAIBRo4caY71er0EAgEc\nDgfR0dHm8ujoaAKBgDmm+T673Y7T6SQYDLZafva2RESkZwkrTB5//HEuvvhivvnmG5544gkGDRrU\nZh2bzdZlRX13FtQZjuoIh47dRlO/iyNbg4jIeRJWmFx88ZmdYr9+/Rg/fjzl5eV4PB6qq6vN7/37\n9wfOzB6qqqrMsX6/H6/Xi9frxe/3t1nePKb5digUor6+HpfLhdfrpaysrNWYUaNGtamvrKys1Xop\nKSmcfjC1A23oer3+YSauX6V1ach2Ru/evXG73RGtoadQL1qoFy16Si8ajh0hVHU8skUMGsTmzZvN\nmwkJCSQkJIQ1tN0w+fbbbzEMg759+3Lq1Cn279/PvHnzGDduHPn5+cyePZv8/HySk5MBSE5OZs2a\nNdx8880EAgEqKyuJj4/HZrPhdDopLy9n+PDh7Ny5kxtvvNEcs2PHDkaMGEFBQYEZGElJSbzyyivU\n1dURCoUoKSnhzjvvbFNjR57w+WIYBidPnuySWZYVbreb2traiNbQU6gXLdSLFj2lF46vvuT07x+O\nbBGvF5GSktKpoe2GSU1NDc888ww2m42mpiauvfZakpKSGD58OBkZGeTl5RETE4PP5wNg8ODBTJw4\nEZ/PR69evVi0aJH56nzhwoWsW7fOvDR4zJgxAEybNo21a9eydOlS3G43y5YtA8DlcjF37lyWL1+O\nzWZj3rx5REVFdeqJiohI97EZkX7p3E2+mJUc0cd3XDMD2/xlmpn0IOpFC/WiRU/phePQxxGfmVz2\nelGnx+od8CIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnC\nRERELFOYiIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiIZQoT\nERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKW9Qp3\nxVAoRFpaGl6vl4cffphgMMjq1as5ceIEsbGx+Hw+nE4nALm5ueTl5eFwOEhNTSUpKQmAiooK1q9f\nT0NDA2PHjiU1NRWAxsZGsrKyqKiowO124/P5GDBgAAD5+fnk5uYCMGfOHKZMmdKVz19ERLpA2DOT\nv/71r1x66aXm7W3btjF69GgyMzNJSEgwd/hHjhyhoKCAjIwM0tLSyM7OxjAMALKzs1m8eDGZmZkc\nO3aM4uJiALZv347L5WLNmjXMmjWLnJwcAILBIFu3biU9PZ2VK1eyZcsW6urquuzJi4hI1wgrTPx+\nP3v37mX69OnmsqKiInOWMHXqVAoLC83lkyZNwuFwEBsbS1xcHOXl5VRXV1NfX098fDwAkydPNscU\nFhaa25owYQKlpaUA7Nu3j8TERJxOJ1FRUSQmJpoBJCIiPUdYYfLiiy9y1113YbPZzGU1NTV4PB4A\nPB4PNTU1AAQCAfMQFYDX6yUQCBAIBIiOjjaXR0dHEwgEzDHN99ntdpxOJ8FgsM2Y5m2JiEjP0u45\nkz179tC/f38uv/xyysrKvne9s4PGqubDYuEqKytrVVtKSkqX1dJZNhtERUV1aV86o3fv3rjd7ojW\n0FOoFy3UixY9pRffOsI+hd2tNm/ebP6ckJBAQkJCWOParf7AgQMUFRWxd+9eTp8+TX19PWvXrsXj\n8VBdXW1+79+/P3Bm9lBVVWWO9/v9eL1evF4vfr+/zfLmMc23Q6EQ9fX1uFwuvF5vq5Dw+/2MGjWq\nTY0decLni2HAyZMnOxyMXc3tdlNbWxvRGnoK9aKFetGip/TC0dQY6RKAzr8Yb/cw1x133MHzzz9P\nVlYW9913H6NGjeLXv/4148aNIz8/HzhzxVVycjIAycnJvPfeezQ2NnL8+HEqKyuJj4/H4/HgdDop\nLy/HMAx27tzJ+PHjzTE7duwAoKCgwAyMpKQkSkpKqKurIxgMUlJSYl4ZJiIiPUen51WzZ88mIyOD\nvLw8YmJi8Pl8AAwePJiJEyfi8/no1asXixYtMg/1LFy4kHXr1pmXBo8ZMwaAadOmsXbtWpYuXYrb\n7WbZsmUAuFwu5s6dy/Lly7HZbMybN4+oqCirz1lERLqYzYj0cZhu8sWs5Ig+vuOaGdjmL9Nhrh5E\nvWihXrToKb1wHPqY079/OKI1XPZ6UafH6h3wIiJimcJEREQsU5iIiIhlChMREbFMYSIiIpYpTERE\nxDKFiYiIWKYwERERyxQmIiJimcJEREQsU5iIiIhlChMREbFMYSIiIpYpTERExDKFiYiIWKYwERER\nyxQmIiJimcJEREQsU5iIiIhlChMREbFMYSIiIpYpTERExDKFiYiIWKYwERERyxQmIiJimcJEREQs\nU5iIiIhlChMREbGsV3srNDQ0sGLFChobG2lsbCQ5OZk77riDYDDI6tWrOXHiBLGxsfh8PpxOJwC5\nubnk5eXhcDhITU0lKSkJgIqKCtavX09DQwNjx44lNTUVgMbGRrKysqioqMDtduPz+RgwYAAA+fn5\n5ObmAjBnzhymTJnSHX0QEREL2p2ZXHTRRaxYsYKnn36aZ599lrKyMg4cOMC2bdsYPXo0mZmZJCQk\nmDv8I0eOUFBQQEZGBmlpaWRnZ2MYBgDZ2dksXryYzMxMjh07RnFxMQDbt2/H5XKxZs0aZs2aRU5O\nDgDBYJCtW7eSnp7OypUr2bJlC3V1dd3VCxER6aSwDnP16dMHODNLCYVCuFwuioqKzFnC1KlTKSws\nBKCoqIhJkybhcDiIjY0lLi6O8vJyqqurqa+vJz4+HoDJkyebYwoLC81tTZgwgdLSUgD27dtHYmIi\nTqeTqKgoEhMTzQASEZGeo93DXAChUIjly5fz1VdfMXPmTAYPHkxNTQ0ejwcAj8dDTU0NAIFAgJEj\nR5pjvV4vgUAAh8NBdHS0uTw6OppAIGCOab7PbrfjdDoJBoOtlp+9LRER6VnCChO73c7TTz9NXV0d\nTz75JGVlZW3WsdlsXVZU82GxcJWVlbWqKSUlpctq6SybDaKiorq0L53Ru3dv3G53RGvoKdSLFupF\ni57Si28dYe2Ou93mzZvNnxMSEkhISAhrXIeqdzqdjB07lkOHDuHxeKiurja/9+/fHzgze6iqqjLH\n+P1+vF4vXq8Xv9/fZnnzmObboVCI+vp6XC4XXq+3VUj4/X5GjRrVpq6OPOHzxTDg5MmTHQ7GruZ2\nu6mtrY1oDT2FetFCvWjRU3rhaGqMdAlA51+Mt3vO5JtvvjFPep8+fZqSkhKuuOIKxo0bR35+PnDm\niqvk5GQAkpOTee+992hsbOT48eNUVlYSHx+Px+PB6XRSXl6OYRjs3LmT8ePHm2N27NgBQEFBgRkY\nSUlJlJSUUFdXRzAYpKSkxLwyTEREeo52ZybV1dWsW7cOwzAwDINrr72W0aNHc8UVV5CRkUFeXh4x\nMTH4fD4ABg8ezMSJE/H5fPTq1YtFixaZh3oWLlzIunXrzEuDx4wZA8C0adNYu3YtS5cuxe12s2zZ\nMgBcLhdz585l+fLl2Gw25s2bR1RUVHf1QkREOslmRPo4TDf5YlZyRB/fcc0MbPOX6TBXD6JetFAv\nWvSUXjgOfczp3z8c0Roue72o02P1DngREbFMYSIiIpYpTERExDKFiYiIWKYwERERyxQmIiJimcJE\nREQsU5iIiIhlChMREbFMYSIiIpYpTERExDKFiYiIWKYwERERyxQmIiJimcJEREQsU5iIiIhlChMR\nEbFMYSIiIpYpTERExDKFiYiIWKYwERERyxQmIiJimcJEREQsU5iIiIhlChMREbFMYSIiIpYpTERE\nxDKFiYiIWNarvRX8fj9ZWVnU1NRgs9mYPn06N910E8FgkNWrV3PixAliY2Px+Xw4nU4AcnNzycvL\nw+FwkJqaSlJSEgAVFRWsX7+ehoYGxo4dS2pqKgCNjY1kZWVRUVGB2+3G5/MxYMAAAPLz88nNzQVg\nzpw5TJkypTv6ICIiFrQ7M3E4HPzLv/wLq1at4sknn+TNN9/k6NGjbNu2jdGjR5OZmUlCQoK5wz9y\n5AgFBQVkZGSQlpZGdnY2hmEAkJ2dzeLFi8nMzOTYsWMUFxcDsH37dlwuF2vWrGHWrFnk5OQAEAwG\n2bp1K+np6axcuZItW7ZQV1fXXb0QEZFOajdMPB4Pl19+OQB9+/bl0ksvxe/3U1RUZM4Spk6dSmFh\nIQBFRUVMmjQJh8NBbGwscXFxlJeXU11dTX19PfHx8QBMnjzZHFNYWGhua8KECZSWlgKwb98+EhMT\ncTqdREVFkZiYaAaQiIj0HB06Z3L8+HEOHz7MyJEjqampwePxAGcCp6amBoBAIGAeogLwer0EAgEC\ngQDR0dHm8ujoaAKBgDmm+T673Y7T6SQYDLYZ07wtERHpWdo9Z9Ls1KlTrFq1itTUVPr27dvmfpvN\n1mVFNR8WC1dZWRllZWXm7ZSUlC6rpbNsNoiKiurSvnRG7969cbvdEa2hp1AvWqgXLXpKL751hL07\n7labN282f05ISCAhISGscWFV39TUxHPPPcfkyZMZP348cGY2Ul1dbX7v378/cGb2UFVVZY71+/14\nvV68Xi9+v7/N8uYxzbdDoRD19fW4XC68Xm+rkPD7/YwaNapNfR15wueLYcDJkyc7HIxdze12U1tb\nG9Eaegr1ooV60aKn9MLR1BjpEoDOvxgP6zDX888/z+DBg7npppvMZePGjSM/Px84c8VVcnIyAMnJ\nybz33ns0NjZy/PhxKisriY+Px+Px4HQ6KS8vxzAMdu7caQZTcnIyO3bsAKCgoMAMjKSkJEpKSqir\nqyMYDFJSUmJeGSYiIj1HuzOTAwcO8Le//Y0hQ4bw0EMPYbPZuP3225k9ezYZGRnk5eURExODz+cD\nYPDgwUycOBGfz0evXr1YtGiReahn4cKFrFu3zrw0eMyYMQBMmzaNtWvXsnTpUtxuN8uWLQPA5XIx\nd+5cli9fjs1mY968eURFRXVXL0REpJNsRqSPw3STL2YlR/TxHdfMwDZ/mQ5z9SDqRQv1okVP6YXj\n0Mec/v3DEa3hsteLOj1W74AXERHLFCYiImKZwkRERCxTmIiIiGUKExERsUxhIiIililMRETEMoWJ\niIhYpjAkPJnEAAAKIUlEQVQRERHLFCYiImKZwkRERCxTmIiIiGUKExERsUxhIiIililMRETEMoWJ\niIhYpjARERHLFCYiImKZwkRERCxTmIiIiGUKExERsUxhIiIililMRETEMoWJiIhYpjARERHLFCYi\nImKZwkRERCxTmIiIiGW92lvh+eefZ8+ePfTv359nn30WgGAwyOrVqzlx4gSxsbH4fD6cTicAubm5\n5OXl4XA4SE1NJSkpCYCKigrWr19PQ0MDY8eOJTU1FYDGxkaysrKoqKjA7Xbj8/kYMGAAAPn5+eTm\n5gIwZ84cpkyZ0uUNEBER69qdmVx33XX85je/abVs27ZtjB49mszMTBISEswd/pEjRygoKCAjI4O0\ntDSys7MxDAOA7OxsFi9eTGZmJseOHaO4uBiA7du343K5WLNmDbNmzSInJwc4E1hbt24lPT2dlStX\nsmXLFurq6rr0yYuISNdoN0yuvPJKoqKiWi0rKioyZwlTp06lsLDQXD5p0iQcDgexsbHExcVRXl5O\ndXU19fX1xMfHAzB58mRzTGFhobmtCRMmUFpaCsC+fftITEzE6XQSFRVFYmKiGUAiItKzdOqcSU1N\nDR6PBwCPx0NNTQ0AgUDAPEQF4PV6CQQCBAIBoqOjzeXR0dEEAgFzTPN9drsdp9NJMBhsM6Z5WyIi\n0vN0yQl4m83WFZsBMA+LiYjIhaPdE/Dn4vF4qK6uNr/3798fODN7qKqqMtfz+/14vV68Xi9+v7/N\n8uYxzbdDoRD19fW4XC68Xi9lZWWtxowaNeqc9ZSVlbVaNyUlpTNPq0vZbBAVFdWlQdsZvXv3xu12\nR7SGnkK9aKFetOgpvfjW0andcZfbvHmz+XNCQgIJCQlhjQuresMwWs0Yxo0bR35+PrNnzyY/P5/k\n5GQAkpOTWbNmDTfffDOBQIDKykri4+Ox2Ww4nU7Ky8sZPnw4O3fu5MYbbzTH7NixgxEjRlBQUGAG\nRlJSEq+88gp1dXWEQiFKSkq48847z1lfR57w+WIYcPLkyYjPtNxuN7W1tRGtoadQL1qoFy16Si8c\nTY2RLgHo/IvxdsMkMzOTjz76iNraWu655x5SUlKYPXs2GRkZ5OXlERMTg8/nA2Dw4MFMnDgRn89H\nr169WLRokfnKfOHChaxbt868NHjMmDEATJs2jbVr17J06VLcbjfLli0DwOVyMXfuXJYvX47NZmPe\nvHltLgQQEZGewWZE+qVzN/liVnJEH99xzQxs85dpZtKDqBct1IsWPaUXjkMfc/r3D0e0hsteL+r0\nWL0DXkRELFOYiIiIZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiI\nZQoTERGxTGEiIiKWKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKW\nKUxERMQyhYmIiFimMBEREcsUJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKW9Yp0AeEoLi7mT3/6\nE4ZhcN111zF79uxIlyQiImfp8TOTUCjEpk2b+M1vfsNzzz3Hu+++y9GjRyNdloiInKXHh0l5eTlx\ncXHExMTQq1cvrrnmGgoLCyNdloiInKXHh0kgECA6Otq87fV6CQQCEaxIRES+64I4Z9IZF91xd0Qf\n337pUJoiWoGIyPnT48PE6/VSVVVl3g4EAni93lbrlJWVUVZWZt5OSUlh4J3/et5q7OncbnekS+gx\n1IsW6kWLHtGLQYPg2qJIV8HmzZvNnxMSEkhISAhrXI8/zBUfH09lZSUnTpygsbGRd999l+Tk5Fbr\nJCQkkJKSYn6d3YyfOvWihXrRQr1ooV602Lx5c6t9abhBAhfAzMRut7Nw4UKeeOIJDMNg2rRpDB48\nONJliYjIWXp8mACMGTOGzMzMSJchIiLfo8cf5uqMjkzNfuzUixbqRQv1ooV60cJKL2yGYRhdWIuI\niPwE/ShnJiIicn4pTERExLIL4gT89wnnAyBfeOEFiouL6dOnD7/61a+4/PLLz3+h50F7vdi1axf/\n9V//BUDfvn3513/9V4YMGRKJUrtduB8MWl5ezm9/+1vuu+8+rr766vNc5fkRTi/Kysp48cUXaWpq\nol+/fqxYsSIClXa/9npRW1vL2rVr+frrrwmFQtxyyy1MnTo1MsV2o+eff549e/bQv39/nn322XOu\n06n9pnGBampqMpYsWWIcP37caGhoMB544AHjyJEjrdbZs2ePsXLlSsMwDOPTTz81HnnkkUiU2u3C\n6cUnn3xinDx50jAMw9i7d+9PuhfN6z322GNGenq68f7770eg0u4XTi9Onjxp+Hw+w+/3G4ZhGDU1\nNZEotduF04vNmzcb//Ef/2EYxpk+zJ8/32hsbIxEud3q448/Nj777DPj/vvvP+f9nd1vXrCHucL5\nAMjCwkKmTJkCwIgRI6irq6O6ujoS5XarcHoxcuRInE4ncKYXP9bPNwv3g0HfeOMNJkyYQL9+/SJQ\n5fkRTi927drF1VdfbX6qxI+1H+H0wuPxUF9fD8CpU6dwu904HI5IlNutrrzySqKior73/s7uNy/Y\nMAnnAyB/Kh8S2dHn+c477zBmzJjzUdp5F+7fRWFhIddff/35Lu+8CqcXX375JcFgkMcee4y0tDR2\n7tx5vss8L8LpxfTp0zly5Ah33303Dz74IKmpqee5yp6hs/vNCzZMpHNKS0vJz8/nzjvvjHQpEfOn\nP/2p1fM3fsJXx4dCIT777DPS0tJ45JFH2Lp1K5WVlZEuKyK2bdvG0KFD2bhxI0899RSbNm3i1KlT\nkS7rgnHBnoAP5wMgvV4vfr/fvO33+9us82MQTi8ADh8+zB/+8AceeeQRXC7X+SzxvAmnFxUVFaxe\nvRrDMKitrWXv3r306tWrzWe+XejC/Tfidrvp3bs3vXv35qqrruLzzz9n4MCB57vcbhVOLz755BN+\n8YtfADBw4EBiY2M5evQow4cPP6+1Rlpn95sX7MwknA+ATE5OZseOHQB8+umnREVF4fF4IlFutwqn\nF1VVVTz33HMsWbLkR7ejOFs4vcjKyiIrK4t169YxYcIEFi1a9KMLEgivF+PHj+fAgQOEQiG+/fZb\nDh48+KP87LtwenHppZdSUlICQHV1NceOHeOSSy6JRLndzjCM752Rd3a/eUG/A764uJh/+7d/Mz8A\ncvbs2bz11lvYbDZmzJgBwKZNmyguLqZv377cc889DBs2LMJVd4/2erFhwwZ2795NTEwMhmHgcDhI\nT0+PdNndIpy/i2br169n3LhxP+pLg9vrxWuvvUZ+fj52u53p06dz4403Rrjq7tFeL7755hvWr1+P\n3+/HMAxmz57NP/zDP0S67C6XmZnJRx99RG1tLf379yclJYXGxkbL+80LOkxERKRnuGAPc4mISM+h\nMBEREcsUJiIiYpnCRERELFOYiIiIZQoTERGxTGEiIiKWKUxERMSy/w8Px6MffL6O8wAAAABJRU5E\nrkJggg==\n",
121 | "text/plain": [
122 | ""
123 | ]
124 | },
125 | "metadata": {},
126 | "output_type": "display_data"
127 | }
128 | ],
129 | "source": [
130 | "train_df.Label.hist()\n",
131 | "plt.title(\"Training Label Distribution\")"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 5,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [
141 | {
142 | "data": {
143 | "text/html": [
144 | "\n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " | \n",
149 | " Context | \n",
150 | " Utterance | \n",
151 | " Label | \n",
152 | "
\n",
153 | " \n",
154 | " \n",
155 | " \n",
156 | " 0 | \n",
157 | " i think we could import the old comment via rsync , but from there we need to go via email . i think it be easier than cach the status on each bug and than import bite here and there __eou__ __eot__ it would be veri easi to keep a hash db of message-id __eou__ sound good __eou__ __eot__ ok __eou__ perhap we can ship an ad-hoc apt_preferec __eou__ __eot__ version ? __eou__ __eot__ thank __eou__ __eot__ not yet __eou__ it be cover by your insur ? __eou__ __eot__ yes __eou__ but it 's realli no... | \n",
158 | " basic each xfree86 upload will not forc user to upgrad 100mb of font for noth __eou__ no someth i do in my spare time . __eou__ | \n",
159 | " 1 | \n",
160 | "
\n",
161 | " \n",
162 | " 1 | \n",
163 | " i 'm not suggest all - onli the one you modifi . __eou__ __eot__ ok , it sound like you re agre with me , then __eou__ though rather than `` the one we modifi '' , my idea be `` the one we need to merg '' __eou__ __eot__ | \n",
164 | " sorri __eou__ i think it be ubuntu relat . __eou__ | \n",
165 | " 0 | \n",
166 | "
\n",
167 | " \n",
168 | " 2 | \n",
169 | " afternoon all __eou__ not entir relat to warti , but if grub-instal take 5 minut to instal , be this a sign that i should just retri the instal : ) __eou__ __eot__ here __eou__ __eot__ you might want to know that thinic in warti be buggi compar to that in sid __eou__ __eot__ and appar gnome be suddent almost perfect ( out of the thinic problem ) , nobodi report bug : -p __eou__ i do n't get your question , where do you want to past ? __eou__ __eot__ can i file the panel not link to ed ? : ) ... | \n",
170 | " yep . __eou__ oh , okay . i wonder what happen to you __eou__ what distro do you need ? __eou__ yes __eou__ | \n",
171 | " 0 | \n",
172 | "
\n",
173 | " \n",
174 | " 3 | \n",
175 | " interest __eou__ grub-instal work with / be ext3 , fail when it be xfs __eou__ i think d-i instal the relev kernel for your machin . i have a p4 and it instal the 386 kernel __eou__ holi crap a lot of stuff get instal by default : ) __eou__ you be instal vim on a box of mine __eou__ ; ) __eou__ __eot__ more like osx than debian ; ) __eou__ we have a select of python modul avail for great justic ( and python develop ) __eou__ __eot__ 2.8 be fix them iirc __eou__ __eot__ pong __eou__ vino will... | \n",
176 | " that the one __eou__ | \n",
177 | " 1 | \n",
178 | "
\n",
179 | " \n",
180 | " 4 | \n",
181 | " and becaus python give mark a woodi __eou__ __eot__ i 'm not sure if we re mean to talk about that public yet . __eou__ __eot__ and i think we be a `` pant off '' kind of compani ... : p __eou__ you need new glass __eou__ __eot__ mono 1.0 ? dude , that 's go to be a barrel of laugh for total non-releas relat reason dure hoari __eou__ read bryan clark 's entri about networkmanag ? __eou__ __eot__ there be an accompani irc convers to that one < g > __eou__ explain ? __eou__ i guess you could s... | \n",
182 | " ( i think someon be go to make a joke about .au bandwidth ... ) __eou__ especi not if you re use screen ; ) __eou__ | \n",
183 | " 1 | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " Context \\\n",
191 | "0 i think we could import the old comment via rsync , but from there we need to go via email . i think it be easier than cach the status on each bug and than import bite here and there __eou__ __eot__ it would be veri easi to keep a hash db of message-id __eou__ sound good __eou__ __eot__ ok __eou__ perhap we can ship an ad-hoc apt_preferec __eou__ __eot__ version ? __eou__ __eot__ thank __eou__ __eot__ not yet __eou__ it be cover by your insur ? __eou__ __eot__ yes __eou__ but it 's realli no... \n",
192 | "1 i 'm not suggest all - onli the one you modifi . __eou__ __eot__ ok , it sound like you re agre with me , then __eou__ though rather than `` the one we modifi '' , my idea be `` the one we need to merg '' __eou__ __eot__ \n",
193 | "2 afternoon all __eou__ not entir relat to warti , but if grub-instal take 5 minut to instal , be this a sign that i should just retri the instal : ) __eou__ __eot__ here __eou__ __eot__ you might want to know that thinic in warti be buggi compar to that in sid __eou__ __eot__ and appar gnome be suddent almost perfect ( out of the thinic problem ) , nobodi report bug : -p __eou__ i do n't get your question , where do you want to past ? __eou__ __eot__ can i file the panel not link to ed ? : ) ... \n",
194 | "3 interest __eou__ grub-instal work with / be ext3 , fail when it be xfs __eou__ i think d-i instal the relev kernel for your machin . i have a p4 and it instal the 386 kernel __eou__ holi crap a lot of stuff get instal by default : ) __eou__ you be instal vim on a box of mine __eou__ ; ) __eou__ __eot__ more like osx than debian ; ) __eou__ we have a select of python modul avail for great justic ( and python develop ) __eou__ __eot__ 2.8 be fix them iirc __eou__ __eot__ pong __eou__ vino will... \n",
195 | "4 and becaus python give mark a woodi __eou__ __eot__ i 'm not sure if we re mean to talk about that public yet . __eou__ __eot__ and i think we be a `` pant off '' kind of compani ... : p __eou__ you need new glass __eou__ __eot__ mono 1.0 ? dude , that 's go to be a barrel of laugh for total non-releas relat reason dure hoari __eou__ read bryan clark 's entri about networkmanag ? __eou__ __eot__ there be an accompani irc convers to that one < g > __eou__ explain ? __eou__ i guess you could s... \n",
196 | "\n",
197 | " Utterance \\\n",
198 | "0 basic each xfree86 upload will not forc user to upgrad 100mb of font for noth __eou__ no someth i do in my spare time . __eou__ \n",
199 | "1 sorri __eou__ i think it be ubuntu relat . __eou__ \n",
200 | "2 yep . __eou__ oh , okay . i wonder what happen to you __eou__ what distro do you need ? __eou__ yes __eou__ \n",
201 | "3 that the one __eou__ \n",
202 | "4 ( i think someon be go to make a joke about .au bandwidth ... ) __eou__ especi not if you re use screen ; ) __eou__ \n",
203 | "\n",
204 | " Label \n",
205 | "0 1 \n",
206 | "1 0 \n",
207 | "2 0 \n",
208 | "3 1 \n",
209 | "4 1 "
210 | ]
211 | },
212 | "execution_count": 5,
213 | "metadata": {},
214 | "output_type": "execute_result"
215 | }
216 | ],
217 | "source": [
218 | "pd.options.display.max_colwidth = 500\n",
219 | "train_df.head()"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 13,
225 | "metadata": {
226 | "collapsed": false
227 | },
228 | "outputs": [
229 | {
230 | "name": "stdout",
231 | "output_type": "stream",
232 | "text": [
233 | "count 1000000.000000\n",
234 | "mean 86.339195\n",
235 | "std 74.929713\n",
236 | "min 5.000000\n",
237 | "25% 37.000000\n",
238 | "50% 63.000000\n",
239 | "75% 108.000000\n",
240 | "max 1879.000000\n",
241 | "Name: Context, dtype: float64\n",
242 | "count 1000000.000000\n",
243 | "mean 17.246392\n",
244 | "std 16.422901\n",
245 | "min 1.000000\n",
246 | "25% 7.000000\n",
247 | "50% 13.000000\n",
248 | "75% 22.000000\n",
249 | "max 653.000000\n",
250 | "Name: Utterance, dtype: float64\n"
251 | ]
252 | },
253 | {
254 | "data": {
255 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEPCAYAAAB/WNKuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtUVOe9PvBnzyDCXGCYgWlANF6QmgwCRmjQHFEhaeIl\nZ3mh9GhuJBhrYqOZ5oZNGlcbo9FEEQRNGkx6UntiMAbXMTW6ThUkCWghBgSMNiONp6QKOhOQKagM\n8/7+8Mc+InecrYjPZy2XM3vv9513f9nO47v3nkESQggQERF5mOpGD4CIiAYnBgwRESmCAUNERIpg\nwBARkSIYMEREpAgGDBERKYIBc4s4ceIEVCoVjhw50qd2wcHB2LBhg0KjooFu3759UKlUcDgcN3oo\nPdq3bx/UavU1j5XHvOcwYAYIlUoFtVoNlUrV6Z/Ro0dfU//h4eE4c+YMoqOj+9SusrISTz/99DW9\ndl8cPXoUDz30EIYNGwYfHx+MHj0aP/vZz/DFF1949HVOnjwJlUqFv/71rx7tt699T5o0CcuWLfP4\nGPpj+PDhWLduXYflkiT1q7/f//73uOuuu+Dn5wd/f39ERETgmWeekddv3boVvr6+fe63tbUVKpUK\nubm57ZYnJibi9OnTMBqNvernkUcewcyZMzssv97H/GDmdaMHQJedOXNGfvzll18iKSkJX3/9NW67\n7TYAgFqt7rRdS0sLhgwZ0mP/kiTBbDb3eVwmk6nPbfpr9+7dSE5OxvTp0/H+++8jPDwc58+fx6ef\nfoqnn34aR48e9dhrCSH6/cZ5I/u+Wbz99tt47rnnsHHjRkyfPh1CCBw7dgyfffaZvE1/69TVZ8O9\nvLz6dYxf7Xoe84OeoAGnoKBAqFQq8f3333dYd9ttt4nf/va3YvHixcJoNIpp06YJIYR46623RGRk\npNBqtSIkJEQ8/PDDoq6uTm53/PhxIUmS+Oqrr9o9/+STT8SMGTOERqMRYWFh4r/+6786vN769evb\nPX/99dfF0qVLhcFgELfddpt46aWX2rX517/+JR5//HHh5+cnAgMDxa9+9Svx/PPPi4iIiC73ubGx\nUZhMJjF//vxO19fX18uPa2pqRFJSkvD39xcajUYkJiaK8vJyef3evXuFJEkiPz9f3HPPPcLX11dE\nRESIv/zlL0IIIS5cuCAkSRIqlUpIkiQkSRJ33HGH3P7Pf/6ziIuLE76+viI0NFQ8+eST4ocffhBC\nCNHc3CwsFov4+c9/Lm/vdDpFeHi4SE1N7bHvq8XFxYlnnnmmy/Xnz58XTz/9tAgODhZarVbExsaK\n3bt3y+t7+3O02WwiISFB+Pj4iNGjR4v33nuv3WvHxcW1G7dKpRK1tbVi7969QqVSdVnLrjzwwAPi\nkUce6XJ928/oytd76qmnhBBC7NmzR8THxwuj0SgMBoNISEgQR44ckdvedtttQqVSyW19fX2FEEJ8\n9tlnQpIkYbfbhRBCXLx4UTzzzDNi2LBhYujQoSIkJESkpKQIIYRIS0vr8PofffSR3P+Vx/ylS5fE\nK6+8IkaNGiW8vb3F8OHDxQsvvCCvz87OFj/+8Y+Fj4+PMJlMIiEhod2/vVsZA2YA6ilg/P39xZo1\na4TNZhMnTpwQQgixYcMGkZ+fL7777jtRVFQk7r77bvHAAw/I7Y4fPy5UKlWHgAkPDxe7du0SJ0+e\nFM8//7zw9vYWp06davd6VweMyWQSGzZsEDabTXz44YdCrVa3e0N78sknRWhoqNi7d684ceKEeO65\n54TBYBDjx4/vcp8//PBDoVKp2r2RdMbtdouoqCgRGxsrDh8+LCoqKsTcuXNFUFCQaGhoEEL835tX\nTEyM2L9/v/j222/FQw89JEwmk3A6nUIIIQ4dOiQkSRJ79+4VtbW18pvSnj17hFarFe+8846orq4W\nf/3rX8WUKVPE/fffL4+hsrJSaLVasXXrViGEEI888oi48847RXNzc7d9d6a7gHG73WLy5Mnipz/9\nqTh06JCorq4WmzdvFt7e3uLLL78UQvTu59ja2iruuOMOER8fL44cOSLKysrEfffdJ/z9/eXXdjgc\nIiQkRPzmN78RtbW1ora2tte17Mzjjz8uwsPDxcmTJztd39LSIjZs2CB8fX1FXV2dqK2tFY2NjUII\nIXbs2CE++eQTYbPZRFVVlXjssceE2WwW58+fF0IIcfr0aSFJknj33XdFbW2t/GbeFoZt9X799dfF\n6NGjxRdffCH+8Y9/iJKSEpGVlSWEuPyfgqSkJJGYmCi//sWLF4UQHY/55ORkERwcLD766CP5mGjr\n54svvhDe3t7io48+Ev/7v/8rKisrxe9//3sGzP/HgBmAegqYBx98sMc+ioqKhEqlEg6HQwjR9Qzm\n7bfflttcunRJDB06VHzwwQftXu/qgPmP//iPdq81ffp08cQTTwghhPjhhx/EkCFDxIcffthum+jo\n6G4D5ne/+51QqVTym3RXPv30U6FWq0V1dbW8rKmpSQQGBoo333xTCPF/bzR79+6Vtzl16pSQJEkU\nFhYKIS7/j16SJHH48OF2/cfFxYnf/va37ZadOHFCSJIkh7kQQrzzzjtCp9OJV155Rfj6+oqKigp5\nXVd9d6a7gPnss8+ETqcTTU1N7ZYvXLhQLFiwQAjRu5/jf//3fwsvLy9RU1Mjb1NbWyuGDh3a7rVD\nQ0PF2rVr271Wb2rZmZqaGjFp0iShUqnE6NGjxYIFC8TWrVvb/XxzcnLk2Ud3WlpahFarFZ988okQ\nQgiXyyUkSZJnHFePtS1gfvGLX4iZM2d22e/DDz8sZsyY0WH5lcd8ZWWlkCRJ7Nmzp9M+PvzwQxEU\nFNThZ0SX8SL/TSg2NrbDsr/85S/46U9/ihEjRsDPzw/33XcfAODUqVPd9hUVFSU/HjJkCAIDA1Fb\nW9vrNgAQEhIit/nb3/6G1tZW3H333e22mTRpUrd9il5+5+qxY8cQEhKCUaNGyct8fX0RExODqqqq\nLscZEhICAD3u21dffYU33ngDer1e/jNx4kRIkoRvv/1W3m7x4sVITEzE66+/jnXr1iEiIqJX4++L\n0tJSNDU1wWw2txvPzp07240F6P7n+M033yAkJATDhg2TtzGbzX26caSvtRw2bBiKiopQWVmJl156\nCT4+PrBarYiMjER9fX23r2Wz2bBw4UKEhYXB398fAQEBuHDhQo/H8tUWLVqEw4cPIzw8HEuXLsWu\nXbvgcrn61MeRI0egUqmQmJjY6fqZM2fiRz/6EW6//XY89NBD2Lp1K3744Yc+vcZgxoC5CWm12nbP\nT548iQcffBB33HEHcnNz8dVXX2HHjh0AgEuXLnXbl7e3d7vnkiTB7XZfc5u+Xrz98Y9/DOBygHjK\nleNsG093+yaEgNvtxquvvory8vJ2f7799lskJCTI2zY0NKC8vBxeXl44ceKEx8Z8JbfbjR/96Ec4\nevRou7EcO3YMu3btardtTz+Ta73poK+1bHPHHXdg8eLFeO+99/DVV1/h5MmTePfdd7tt88ADD+Ds\n2bN45513cPjwYZSXl8PPz6/HY/lqMTExOHXqFNauXQu1Wo2lS5ciJiYGzc3NfepHkqQu6+fn54fy\n8nLs2LEDY8aMwaZNmxAWFtbhPzu3KgbMIHD48GG4XC6kp6cjLi4OY8eOxenTpztsd/U/EiXudAoP\nD4darUZxcXG75YcOHeq23axZs2AwGLBq1apO17f9r9diseCf//wnqqur5XVNTU0oLS3F+PHjez3O\ntjfM1tZWeZkkSbjrrrtw7NgxjB49usOfK2+pffLJJxEQEIB9+/bhnXfewe7du7vtuz9iYmJQV1cH\nt9vdYSxXzkZ6+jneeeed+P7771FTUyMvq6ura1fDtnFf65i7M3LkSPj4+KCurq7L12v72b7yyitI\nTEzEuHHjAKDdrEetVkOtVvdqrFqtFnPnzkVmZiaKiopw9OhRFBUVdfn6V5s4cSLcbjf+53/+p8tt\nVCoVpk6dit/97nf4+uuvERAQgO3bt/c4tlsBb1MeoHp7ygi4/KbudruxYcMGJCUlyad5euqzL6/R\nWwaDAY8//jheeuklBAQEYPTo0cjJyUF1dTVuv/32LtvpdDq8//77+PnPf44ZM2bAarVi7NixcDqd\n2LNnD/70pz/h6NGjmDFjBsaPH48FCxYgIyMDWq0Wr776KlQqFZ588slej/O2226Dj48P9u3bhzFj\nxmDo0KHw9/fHqlWrMHv2bISEhGDhwoXQarU4ceIEcnNz8f7770OSJLz77rv47LPPcOTIEYwdOxav\nvvoqnnjiCZSXlyMkJKTLvrty7tw5lJeXt1vm7++PGTNm4J577sG///u/44033sD48eNht9vxxRdf\nICAgAI8++iiAnn+OM2fOxNixY/Hwww9jw4YNkCQJK1asgI+PT7twGjVqFD7//HM8+uij8PHxuabb\ndRctWoRRo0Zh2rRpGD58OOrq6rB+/XpcvHgRc+fOlV/P5XLhs88+w09+8hP4+vrCbDbDYDDgnXfe\nwbBhw1BbW4sXX3yxw+dlbr/9dhw4cADTp0+Ht7e3/NmXK2vxxhtvYOTIkYiKioKPjw/+8Ic/wNvb\nG2FhYfLr79u3D8ePH0dQUBD8/Pw63PJ/5513Yt68eXjyySexfv163H333Th79ixKSkqwdOlS7Ny5\nE//85z/xb//2bwgMDMShQ4dw+vRpWCyWftduULmB13+oC91d5A8ODm530b3Nxo0bxfDhw4VGoxHT\np08Xf/7zn4VKpZIvNHd2F9mVz9sMHz683YXeq1+vs9e/+mJpU1OTeOKJJ4Sfn58wmUxi+fLl4qmn\nnhI/+clPetz3srIysWDBAhESEiKGDh0qRo4cKebMmSMOHDggb/P999+Ln/3sZ8JgMAitVtvpbcpX\nXuwV4vKF4StvRRVCiK1bt4qRI0eKIUOGtLuVOD8/XyQkJAi9Xi/0er2IiIgQzz33nBBCiG+++Ubo\ndDrx3nvvydu73W6RkJAgpk+f3mPfV4uLi5Nvub3yz9y5c+VavvDCC2LkyJFi6NChIjg4WMyaNUt8\n/vnnQoje/xxtNptITEwUvr6+YuTIkeLdd98VkZGR4sUXX5S3KS4uFhMmTBA+Pj4dblPuqZZXy83N\nFQ8++KAIDQ0VPj4+Ijg4WDzwwANi//797bZbunSpMJvN7W5T3r9/vxg/frzw9fUVFotF7N69u8P+\nfPrpp2LcuHHC29tbvlHg6rFu2rRJ3HXXXcLPz0/4+fmJuLi4djcr1NXViQceeED4+fm125+rj/FL\nly6JX//61+L2228XQ4cOFSNGjJBvzd+/f7+YNm2aCAwMFL6+vmLcuHEiPT29y7rcaiQhevffWLfb\njRUrVsBoNOKll16C0+nExo0bcfbsWZjNZlitVmg0GgBAXl4e8vPzoVarkZKSIl8grK6uxubNm9HS\n0oIJEyYgJSUFAOByuZCVlYXq6mro9XpYrVYEBgYCAAoKCpCXlwcAmDdvHqZOnerpjKXr4J577sHo\n0aPxxz/+8UYPhXD5GtKwYcOQkZGB1NTUGz0cGqR6fQ1mz5497c777tq1C+PHj0dGRgYsFoscAjU1\nNSguLkZ6ejpWrFiBnJwcedqak5ODJUuWICMjA6dPn0ZZWRkA4MCBA9DpdMjMzMSsWbOwbds2AIDT\n6cTOnTuxZs0arF69Gh9//DGampp6HCsvsHlWX+tZVlaGP/3pT7DZbKioqIDVasWhQ4f6dAprMLsR\nx2deXh727duHU6dOobi4GElJSfD19cW8efOu+1g8jf/ePcfTtexVwNjtdnz99dftbtUrLS2VZxPT\npk1DSUmJvHzy5MlQq9Uwm80IDg6GzWZDfX09mpub5fOf8fHxcpuSkhK5r7i4OFRWVgIAysvLERkZ\nCY1GA61Wi8jISDmUusMDzrP6U8/MzEzExMRgypQpOHToEPbs2YP4+HgFRnfzuRHH57/+9S88++yz\n8jUFHx8ffPnllwgICLjuY/E0/nv3HE/XslcX+f/zP/8TjzzySLvZQ0NDAwwGA4DLF3YbGhoAAA6H\nA+Hh4fJ2RqMRDocDarW63UVDk8kkf+upw+GQ16lUKmg0GjidznbLr+yLBrbo6GgcPnz4Rg+DrvDw\nww/j4YcfvtHDoFtMjzOYI0eOwN/fHyNHjuz2bhVP3vLay8tCREQ0gPU4gzl+/DhKS0vx9ddf49Kl\nS2hubsamTZtgMBhQX18v/912G6bRaMS5c+fk9na7HUajEUajEXa7vcPytjZtz91uN5qbm6HT6WA0\nGttN2ex2e6efmK6qqmq3XXJycj9KQV1hPT2L9fQs1tNzkpOT2/0aBIvFck23XPcYMAsXLsTChQsB\nXP6U9e7du/HMM89g27ZtKCgowJw5c1BQUICYmBgAlz8clpmZidmzZ8PhcODMmTMICwuDJEnQaDSw\n2WwYM2YMCgsLMWPGDLnNwYMHMXbsWBQXF8shEhUVhe3bt6OpqQlutxsVFRV46KGHOoyxsyLU7s7F\npbc7/m4LAJBMZni9uhFuja4Ppbp16fV6NDY23uhhDBqsp2exnp4TEhLi0cDu9wct58yZg/T0dOTn\n5yMoKAhWqxUAEBoaikmTJsFqtcLLywuLFi2ST5+lpqYiOztbvk257ZdfJSQkYNOmTVi2bBn0ej2W\nL18O4PKH7+bPn4+0tDRIkoSkpKQOX5NCREQDU68/B3Oz4QzGc/g/RM9iPT2L9fScti8y9RR+FxkR\nESmCAUNERIpgwBARkSIYMEREpAgGDBERKYIBQ0REimDAEBGRIhgwRESkCAYMEREpggFDRESKYMAQ\nEZEiGDBERKQIBgwRESmCAUNERIpgwBARkSIYMEREpAgGDBERKYIBQ0REimDAEBGRIrx62qClpQUr\nV66Ey+WCy+VCTEwMFi5ciB07dmD//v3w9/cHACxYsADR0dEAgLy8POTn50OtViMlJQVRUVEAgOrq\namzevBktLS2YMGECUlJSAAAulwtZWVmorq6GXq+H1WpFYGAgAKCgoAB5eXkAgHnz5mHq1KkeLwIR\nEXlejwEzZMgQrFy5EkOHDoXb7cZvfvMbHD9+HAAwe/ZszJ49u932NTU1KC4uRnp6Oux2O1577TVk\nZmZCkiTk5ORgyZIlCAsLw5o1a1BWVobo6GgcOHAAOp0OmZmZKCoqwrZt2/Dss8/C6XRi586dWLt2\nLYQQSEtLQ2xsLDQajTLVICIij+nVKbKhQ4cCuDybcbvd0Ol0AAAhRIdtS0tLMXnyZKjVapjNZgQH\nB8Nms6G+vh7Nzc0ICwsDAMTHx6OkpAQAUFJSIs9M4uLiUFlZCQAoLy9HZGQkNBoNtFotIiMjUVZW\ndo27TERE10OPMxgAcLvdSEtLQ21tLe677z6EhoYCAPbu3YvCwkKMGTMGjz76KDQaDRwOB8LDw+W2\nRqMRDocDarUaJpNJXm4ymeBwOAAADodDXqdSqaDRaOB0Otstv7IvIiIa+Ho1g1GpVFi3bh22bNmC\nb775BseOHcP999+PrKwsvPnmmzAYDPjggw88NqjOZkZERHRz6dUMpo1Go8GECRNw8uRJ3HnnnfLy\nxMRErF27FsDlWca5c+fkdXa7HUajEUajEXa7vcPytjZtz91uN5qbm6HT6WA0GlFVVdWuTURERIdx\nVVVVtdsuOTkZKqn77Bw6dCi89Pq+7P4ty9vbG3rWymNYT89iPT0rNzdXfmyxWGCxWPrdV48Bc/78\neXh5eUGj0eDSpUuoqKhAUlIS6uvrYTAYAACHDx/G8OHDAQAxMTHIzMzE7Nmz4XA4cObMGYSFhUGS\nJGg0GthsNowZMwaFhYWYMWOG3ObgwYMYO3YsiouL5RCJiorC9u3b0dTUBLfbjYqKCjz00EMdxthZ\nEdzC3e1+Xbx4Ec2Njb0oEen1ejSyVh7DenoW6+k5er0eycnJHuuvx4Cpr69HdnY2hBAQQmDKlCkY\nP348srKy8N1330GSJAQFBWHx4sUAgNDQUEyaNAlWqxVeXl5YtGgRJEkCAKSmpiI7O1u+TbnttuaE\nhARs2rQJy5Ytg16vx/LlywEAOp0O8+fPR1paGiRJQlJSErRarcd2noiIlCOJQXrBo3Z3Li69va7T\ndZLJDK9XN8Kt0V3nUd2c+D9Ez2I9PYv19JyQkBCP9sdP8hMRkSIYMEREpAgGDBERKYIBQ0REimDA\nEBGRIhgwRESkCAYMEREpggFDRESKYMAQEZEiGDBERKQIBgwRESmCAUNERIpgwBARkSIYMEREpAgG\nDBERKYIBQ0REiujxN1oOVqp/NUI6/Y+uNzAGoTUg8PoNiIhokLllAwb1dlxa9+suV3unrQUYMERE\n/cZTZEREpIgeZzAtLS1YuXIlXC4XXC4XYmJisHDhQjidTmzcuBFnz56F2WyG1WqFRqMBAOTl5SE/\nPx9qtRopKSmIiooCAFRXV2Pz5s1oaWnBhAkTkJKSAgBwuVzIyspCdXU19Ho9rFYrAgMvzx4KCgqQ\nl5cHAJg3bx6mTp2qRB2IiMjDepzBDBkyBCtXrsS6devw1ltvoaqqCsePH8euXbswfvx4ZGRkwGKx\nyCFQU1OD4uJipKenY8WKFcjJyYEQAgCQk5ODJUuWICMjA6dPn0ZZWRkA4MCBA9DpdMjMzMSsWbOw\nbds2AIDT6cTOnTuxZs0arF69Gh9//DGampqUqgUREXlQr06RDR06FMDl2Yzb7YZOp0Npaak8m5g2\nbRpKSkoAAKWlpZg8eTLUajXMZjOCg4Nhs9lQX1+P5uZmhIWFAQDi4+PlNiUlJXJfcXFxqKysBACU\nl5cjMjISGo0GWq0WkZGRcigREdHA1quL/G63G2lpaaitrcV9992H0NBQNDQ0wGAwAAAMBgMaGhoA\nAA6HA+Hh4XJbo9EIh8MBtVoNk8kkLzeZTHA4HHKbtnUqlQoajQZOp7Pd8iv7IiKiga9XAaNSqbBu\n3To0NTXh9ddfR1VVVYdtJEny2KDaTqn1VlVVVbsxJScnQyV1PzmTelivVntBo9f3aRyDlbe3N/Ss\nhcewnp7FenpWbm6u/NhiscBisfS7rz7dpqzRaDBhwgScPHkSBoMB9fX18t/+/v4ALs8yzp07J7ex\n2+0wGo0wGo2w2+0dlre1aXvudrvR3NwMnU4Ho9HYLjjsdjsiIiI6jKuzIriFu9t9ET2sb211obGx\nsdttbhV6vZ618CDW07NYT8/R6/VITk72WH89XoM5f/68fGH90qVLqKiowKhRozBx4kQUFBQAuHyn\nV0xMDAAgJiYGRUVFcLlcqKurw5kzZxAWFgaDwQCNRgObzQYhBAoLCxEbGyu3OXjwIACguLhYDpGo\nqChUVFSgqakJTqcTFRUV8h1pREQ0sPU4g6mvr0d2djaEEBBCYMqUKRg/fjxGjRqF9PR05OfnIygo\nCFarFQAQGhqKSZMmwWq1wsvLC4sWLZJPn6WmpiI7O1u+TTk6OhoAkJCQgE2bNmHZsmXQ6/VYvnw5\nAECn02H+/PlIS0uDJElISkqCVqtVqhZERORBkujrBY+bRO3uXFx6e12n6ySTGd6pz+JiD5/kbx1z\nh1LDu6nwFIRnsZ6exXp6TkhIiEf74yf5iYhIEQwYIiJSBAOGiIgUwYAhIiJFMGCIiEgRDBgiIlIE\nA4aIiBTBgCEiIkUwYIiISBEMGCIiUgQDhoiIFMGAISIiRTBgiIhIEQwYIiJSBAOGiIgUwYAhIiJF\nMGCIiEgRDBgiIlIEA4aIiBTh1dMGdrsdWVlZaGhogCRJuPfeezFjxgzs2LED+/fvh7+/PwBgwYIF\niI6OBgDk5eUhPz8farUaKSkpiIqKAgBUV1dj8+bNaGlpwYQJE5CSkgIAcLlcyMrKQnV1NfR6PaxW\nKwIDAwEABQUFyMvLAwDMmzcPU6dO9XgRiIjI83oMGLVajcceewwjR47EhQsX8NJLLyEyMhIAMHv2\nbMyePbvd9jU1NSguLkZ6ejrsdjtee+01ZGZmQpIk5OTkYMmSJQgLC8OaNWtQVlaG6OhoHDhwADqd\nDpmZmSgqKsK2bdvw7LPPwul0YufOnVi7di2EEEhLS0NsbCw0Go0y1SAiIo/p8RSZwWDAyJEjAQA+\nPj4YNmwYHA4HAEAI0WH70tJSTJ48GWq1GmazGcHBwbDZbKivr0dzczPCwsIAAPHx8SgpKQEAlJSU\nyDOTuLg4VFZWAgDKy8sRGRkJjUYDrVaLyMhIlJWVXfteExGR4vp0Daaurg6nTp3C2LFjAQB79+7F\nCy+8gLfffhtNTU0AAIfDIZ/eAgCj0QiHwwGHwwGTySQvN5lMclBduU6lUkGj0cDpdHZo09YXEREN\nfD2eImtz4cIFbNiwASkpKfDx8cH999+PpKQkSJKE7du344MPPsCSJUs8MqjOZkbdqaqqQlVVlfw8\nOTkZKqn77JR6WK9We0Gj1/dpHIOVt7c39KyFx7CensV6elZubq782GKxwGKx9LuvXgVMa2sr1q9f\nj/j4eMTGxgIA/Pz85PWJiYlYu3YtgMuzjHPnzsnr7HY7jEYjjEYj7HZ7h+Vtbdqeu91uNDc3Q6fT\nwWg0tgsOu92OiIiIDuPrrAhu4e52n0QP61tbXWhsbOx2m1uFXq9nLTyI9fQs1tNz9Ho9kpOTPdZf\nr06RbdmyBaGhoZg5c6a8rL6+Xn58+PBhDB8+HAAQExODoqIiuFwu1NXV4cyZMwgLC4PBYIBGo4HN\nZoMQAoWFhXJYxcTE4ODBgwCA4uJiOUSioqJQUVGBpqYmOJ1OVFRUyHekERHRwNbjDOb48eP4/PPP\nMWLECLz44ouQJAkLFizAF198ge+++w6SJCEoKAiLFy8GAISGhmLSpEmwWq3w8vLCokWLIEkSACA1\nNRXZ2dnybcpttzUnJCRg06ZNWLZsGfR6PZYvXw4A0Ol0mD9/PtLS0iBJEpKSkqDVapWqBREReZAk\n+nrB4yZRuzsXl95e1+k6yWSGd+qzuLju1122905bi9Yxdyg1vJsKT0F4FuvpWayn54SEhHi0P36S\nn4iIFMGAISIiRTBgiIhIEQwYIiJSBAOGiIgUwYAhIiJFMGCIiEgRDBgiIlIEA4aIiBTBgCEiIkUw\nYIiISBEpdxWRAAAVs0lEQVQMGCIiUgQDhoiIFMGAISIiRTBgiIhIEQwYIiJSBAOGiIgUwYAhIiJF\nMGCIiEgRXj1tYLfbkZWVhYaGBkiShMTERMycORNOpxMbN27E2bNnYTabYbVaodFoAAB5eXnIz8+H\nWq1GSkoKoqKiAADV1dXYvHkzWlpaMGHCBKSkpAAAXC4XsrKyUF1dDb1eD6vVisDAQABAQUEB8vLy\nAADz5s3D1KlTlagDERF5WI8zGLVajcceewwbNmzA66+/jn379uH777/Hrl27MH78eGRkZMBiscgh\nUFNTg+LiYqSnp2PFihXIycmBEAIAkJOTgyVLliAjIwOnT59GWVkZAODAgQPQ6XTIzMzErFmzsG3b\nNgCA0+nEzp07sWbNGqxevRoff/wxmpqalKoFERF5UI8BYzAYMHLkSACAj48Phg0bBrvdjtLSUnk2\nMW3aNJSUlAAASktLMXnyZKjVapjNZgQHB8Nms6G+vh7Nzc0ICwsDAMTHx8ttSkpK5L7i4uJQWVkJ\nACgvL0dkZCQ0Gg20Wi0iIyPlUCIiooGtT9dg6urqcOrUKYSHh6OhoQEGgwHA5RBqaGgAADgcDvn0\nFgAYjUY4HA44HA6YTCZ5uclkgsPhkNu0rVOpVNBoNHA6nR3atPVFREQDX4/XYNpcuHABGzZsQEpK\nCnx8fDqslyTJY4NqO6XWW1VVVaiqqpKfJycnQyV1n51SD+vVai9o9Po+jWOw8vb2hp618BjW07NY\nT8/Kzc2VH1ssFlgsln731auAaW1txfr16xEfH4/Y2FgAl2ct9fX18t/+/v4ALs8yzp07J7e12+0w\nGo0wGo2w2+0dlre1aXvudrvR3NwMnU4Ho9HYLjjsdjsiIiI6jK+zIriFu9t9Ej2sb211obGxsdtt\nbhV6vZ618CDW07NYT8/R6/VITk72WH+9OkW2ZcsWhIaGYubMmfKyiRMnoqCgAMDlO71iYmIAADEx\nMSgqKoLL5UJdXR3OnDmDsLAwGAwGaDQa2Gw2CCFQWFgoh1VMTAwOHjwIACguLpZDJCoqChUVFWhq\naoLT6URFRYV8RxoREQ1sPc5gjh8/js8//xwjRozAiy++CEmSsGDBAsyZMwfp6enIz89HUFAQrFYr\nACA0NBSTJk2C1WqFl5cXFi1aJJ8+S01NRXZ2tnybcnR0NAAgISEBmzZtwrJly6DX67F8+XIAgE6n\nw/z585GWlgZJkpCUlAStVqtULYiIyIMk0dcLHjeJ2t25uPT2uk7XSSYzvFOfxcV1v+6yvXfaWrSO\nuUOp4d1UeArCs1hPz2I9PSckJMSj/fGT/EREpAgGDBERKYIBQ0REimDAEBGRIhgwRESkCAYMEREp\nggFDRESKYMAQEZEiGDBERKQIBgwRESmCAUNERIpgwBARkSIYMEREpAgGDBERKYIBQ0REimDAEBGR\nIhgwRESkCAYMEREpggFDRESK8Oppgy1btuDIkSPw9/fHW2+9BQDYsWMH9u/fD39/fwDAggULEB0d\nDQDIy8tDfn4+1Go1UlJSEBUVBQCorq7G5s2b0dLSggkTJiAlJQUA4HK5kJWVherqauj1elitVgQG\nBgIACgoKkJeXBwCYN28epk6d6tm9JyIixfQ4g5k+fTpefvnlDstnz56NtWvXYu3atXK41NTUoLi4\nGOnp6VixYgVycnIghAAA5OTkYMmSJcjIyMDp06dRVlYGADhw4AB0Oh0yMzMxa9YsbNu2DQDgdDqx\nc+dOrFmzBqtXr8bHH3+MpqYmj+04EREpq8eAGTduHLRabYflbcFxpdLSUkyePBlqtRpmsxnBwcGw\n2Wyor69Hc3MzwsLCAADx8fEoKSkBAJSUlMgzk7i4OFRWVgIAysvLERkZCY1GA61Wi8jISDmUiIho\n4OvxFFlX9u7di8LCQowZMwaPPvooNBoNHA4HwsPD5W2MRiMcDgfUajVMJpO83GQyweFwAAAcDoe8\nTqVSQaPRwOl0tlt+ZV9ERHRz6FfA3H///UhKSoIkSdi+fTs++OADLFmyxCMD6mxm1JOqqipUVVXJ\nz5OTk6GSup+cST2sV6u9oNHr+zyWwcjb2xt61sJjWE/PYj09Kzc3V35ssVhgsVj63Ve/AsbPz09+\nnJiYiLVr1wK4PMs4d+6cvM5ut8NoNMJoNMJut3dY3tam7bnb7UZzczN0Oh2MRmO70LDb7YiIiOh0\nPJ0VwS3c3e6D6GF9a6sLjY2N3W5zq9Dr9ayFB7GensV6eo5er0dycrLH+uvVbcpCiHYzi/r6evnx\n4cOHMXz4cABATEwMioqK4HK5UFdXhzNnziAsLAwGgwEajQY2mw1CCBQWFiI2NlZuc/DgQQBAcXGx\nHCJRUVGoqKhAU1MTnE4nKioq5DvSiIho4OtxBpORkYFjx46hsbERTz31FJKTk1FVVYXvvvsOkiQh\nKCgIixcvBgCEhoZi0qRJsFqt8PLywqJFiyBJEgAgNTUV2dnZ8m3KbXeeJSQkYNOmTVi2bBn0ej2W\nL18OANDpdJg/fz7S0tIgSRKSkpI6vdmAiIgGJkn056LHTaB2dy4uvb2u03WSyQzv1Gdxcd2vu2zv\nnbYWrWPuUGp4NxWegvAs1tOzWE/PCQkJ8Wh//CQ/EREpggFDRESKYMAQEZEi+v1By8FO8vKC+uQ3\nXW9gDEJrQOD1GxAR0U2GAdOVxvO4lPHbLld7p60FGDBERF3iKTIiIlIEA4aIiBTBgCEiIkUwYIiI\nSBEMGCIiUgQDhoiIFMGAISIiRTBgiIhIEQwYIiJSBAOGiIgUwYAhIiJFMGCIiEgRDBgiIlIEA4aI\niBTR49f1b9myBUeOHIG/vz/eeustAIDT6cTGjRtx9uxZmM1mWK1WaDQaAEBeXh7y8/OhVquRkpKC\nqKgoAEB1dTU2b96MlpYWTJgwASkpKQAAl8uFrKwsVFdXQ6/Xw2q1IjDw8tfgFxQUIC8vDwAwb948\nTJ061eMFICIiZfQ4g5k+fTpefvnldst27dqF8ePHIyMjAxaLRQ6BmpoaFBcXIz09HStWrEBOTg6E\nEACAnJwcLFmyBBkZGTh9+jTKysoAAAcOHIBOp0NmZiZmzZqFbdu2AbgcYjt37sSaNWuwevVqfPzx\nx2hqavLozhMRkXJ6DJhx48ZBq9W2W1ZaWirPJqZNm4aSkhJ5+eTJk6FWq2E2mxEcHAybzYb6+no0\nNzcjLCwMABAfHy+3KSkpkfuKi4tDZWUlAKC8vByRkZHQaDTQarWIjIyUQ4mIiAa+fl2DaWhogMFg\nAAAYDAY0NDQAABwOh3x6CwCMRiMcDgccDgdMJpO83GQyweFwyG3a1qlUKmg0Gjidzg5t2voiIqKb\ng0d+ZbIkSZ7oBgDkU2p9UVVVhaqqKvl5cnIyVFL32Sn1uL77fVKrvaDR63s/yJuYt7c39LfIvl4P\nrKdnsZ6elZubKz+2WCywWCz97qtfAWMwGFBfXy//7e/vD+DyLOPcuXPydna7HUajEUajEXa7vcPy\ntjZtz91uN5qbm6HT6WA0GtuFht1uR0RERKfj6awIbuHudh9Ej+u7D7rWVhcaGxu73Waw0Ov1t8y+\nXg+sp2exnp6j1+uRnJzssf56dYpMCNHuDXfixIkoKCgAcPlOr5iYGABATEwMioqK4HK5UFdXhzNn\nziAsLAwGgwEajQY2mw1CCBQWFiI2NlZuc/DgQQBAcXGxHCJRUVGoqKhAU1MTnE4nKioq5DvSiIho\n4OtxBpORkYFjx46hsbERTz31FJKTkzFnzhykp6cjPz8fQUFBsFqtAIDQ0FBMmjQJVqsVXl5eWLRo\nkXyqKTU1FdnZ2fJtytHR0QCAhIQEbNq0CcuWLYNer8fy5csBADqdDvPnz0daWhokSUJSUlKHmw2I\niGjgkkR/LnrcBGp35+LS2+s6XSeZzPBOfRYX1/26y/ZDl6/ExYzfdrneO20tWsfccc3jvBnwFIRn\nsZ6exXp6TkhIiEf74yf5iYhIEQwYIiJSBAOGiIgUwYAhIiJFMGCIiEgRDBgiIlIEA4aIiBTBgCEi\nIkUwYIiISBEMGCIiUgQDhoiIFMGAISIiRTBgiIhIEQwYIiJShEd+ZfKtSPLygvrkN11vYAxCa0Dg\n9RsQEdEAw4Dpr8bzuNTD74sBA4aIbmE8RUZERIpgwBARkSIYMEREpIhrugazdOlSaDQaSJIEtVqN\nNWvWwOl0YuPGjTh79izMZjOsVis0Gg0AIC8vD/n5+VCr1UhJSUFUVBQAoLq6Gps3b0ZLSwsmTJiA\nlJQUAIDL5UJWVhaqq6uh1+thtVoRGMjrGkREN4NrmsFIkoSVK1di3bp1WLNmDQBg165dGD9+PDIy\nMmCxWJCXlwcAqKmpQXFxMdLT07FixQrk5ORACAEAyMnJwZIlS5CRkYHTp0+jrKwMAHDgwAHodDpk\nZmZi1qxZ2LZt27UMl4iIrqNrChghhBwSbUpLSzF16lQAwLRp01BSUiIvnzx5MtRqNcxmM4KDg2Gz\n2VBfX4/m5maEhYUBAOLj4+U2JSUlcl9xcXGoqKi4luESEdF1dE2nyCRJwqpVq6BSqXDvvfciMTER\nDQ0NMBgMAACDwYCGhgYAgMPhQHh4uNzWaDTC4XBArVbDZDLJy00mExwOh9ymbZ1KpYJWq4XT6YRO\np7uWYRMR0XVwTQHz2muvISAgAOfPn8eqVasQEhLSYRtJkq7lJdq5erbUpqqqClVVVfLz5ORkqKTu\nJ2dSj+u7H3dP69VqL2j0+m63uVl4e3tDP0j2ZSBgPT2L9fSs3Nxc+bHFYoHFYul3X9cUMAEBAQAA\nPz8/xMbGwmazwWAwoL6+Xv7b398fwOUZy7lz5+S2drsdRqMRRqMRdru9w/K2Nm3P3W43mpubO529\ndFYEt3B3O3bR4/rOw6y361tbXWhsbOx2m5uFXq8fNPsyELCensV6eo5er0dycrLH+uv3NZiLFy/i\nwoULAIALFy7g6NGjGDFiBCZOnIiCggIAQEFBAWJiYgAAMTExKCoqgsvlQl1dHc6cOYOwsDAYDAZo\nNBrYbDYIIVBYWIjY2Fi5zcGDBwEAxcXFiIiIuJZ9JSKi66jfM5iGhga8+eabkCQJra2tmDJlCqKi\nojBmzBikp6cjPz8fQUFBsFqtAIDQ0FBMmjQJVqsVXl5eWLRokXyaKTU1FdnZ2fJtytHR0QCAhIQE\nbNq0CcuWLYNer8fy5cs9sMtERHQ99DtgzGYz3nzzzQ7LdTodfvOb33TaZu7cuZg7d26H5aNHj8b6\n9es7LB8yZAh+9atf9XeIRER0A/GT/EREpAgGDBERKYIBQ0REiuDvg1EIfyEZEd3qGDBK4S8kI6Jb\nHE+RERGRIhgwRESkCAYMEREpggFDRESKYMAQEZEiGDBERKQI3qZ8g3T7ORl+RoaIBgEGzI3Szedk\n+BkZIhoMeIqMiIgUwYAhIiJFMGCIiEgRvAYzAPGLMoloMGDADET8okwiGgRuioApKyvDH/7wBwgh\nMH36dMyZM+dGD4mIiHow4APG7XZj69atePXVVxEQEIAVK1YgNjYWw4YNu9FDu2F4Co2IbgYDPmBs\nNhuCg4MRFBQEALjnnntQUlJySwcMT6ER0c1gwAeMw+GAyWSSnxuNRthsths4ooGPMxwiGggGfMD0\nl2r4aAxZ+ItO10m+WgDS9R3Q9dTDDGfoK+uhdpztcr2k1UP8q1F+flHtBXWrq8v17TC8iOj/k4QQ\n4kYPojt/+9vfsGPHDrz88ssAgF27dgFAuwv9VVVVqKqqkp8nJydf30ESEQ0Subm58mOLxQKLxdLv\nvgb8DCYsLAxnzpzB2bNnERAQgC+//BLLly9vt83VRcjNzWXIeBDr6Vmsp2exnp7j6VoO+IBRqVRI\nTU3FqlWrIIRAQkICQkNDb/SwiIioBwM+YAAgOjoaGRkZN3oYRETUB4Pyu8iu5ZwhdcR6ehbr6Vms\np+d4upYD/iI/ERHdnAblDIaIiG48BgwRESniprjI3xf8Ysy+W7p0KTQaDSRJglqtxpo1a+B0OrFx\n40acPXsWZrMZVqsVGo0GAJCXl4f8/Hyo1WqkpKQgKirqBu/BjbVlyxYcOXIE/v7+eOuttwCgX/Wr\nrq7G5s2b0dLSggkTJiAlJeVG7dIN1Vk9d+zYgf3798Pf3x8AsGDBAkRHRwNgPbtjt9uRlZWFhoYG\nSJKExMREzJw58/odn2IQaW1tFb/85S9FXV2daGlpEc8//7yoqam50cMa8JYuXSoaGxvbLfvjH/8o\ndu3aJYQQIi8vT2zbtk0IIcQ//vEP8cILLwiXyyVqa2vFL3/5S+F2u6/7mAeSb775Rvz9738Xzz33\nnLysP/VbsWKF+Pbbb4UQQqxevVp8/fXX13lPBobO6pmbmyt2797dYVvWs3s//PCD+Pvf/y6EEKK5\nuVksW7ZM1NTUXLfjc1CdIrvyizG9vLzkL8ak7gkhIK6616O0tBRTp04FAEybNk2uY2lpKSZPngy1\nWg2z2Yzg4OBb/rvhxo0bB61W225ZX+tXX1+P5uZmhIWFAQDi4+Nv2WO3s3oC6HCMAqxnTwwGA0aO\nHAkA8PHxwbBhw2C326/b8TmoTpHxizH7R5IkrFq1CiqVCvfeey8SExPR0NAAg8EA4PJB2tDQAOBy\njcPDw+W2RqMRDofjhox7IOtr/dRqdbtj12Qysa5X2bt3LwoLCzFmzBg8+uij0Gg0rGcf1NXV4dSp\nUwgPD79ux+egChjqn9deew0BAQE4f/48Vq1ahZCQkA7bSNIg/nLQ64D1uzb3338/kpKSIEkStm/f\njg8++ABLliy50cO6aVy4cAEbNmxASkoKfHx8OqxX6vgcVKfIjEYjzp07Jz93OBwwGo03cEQ3h4CA\nAACAn58fYmNjYbPZYDAYUF9fDwCor6+XL65eXWO73c4ad6Kv9TMajbDb7R2W02V+fn7ym2BiYqJ8\nZoL17FlrayvWr1+P+Ph4xMbGArh+x+egCpgrvxjT5XLhyy+/RExMzI0e1oB28eJFXLhwAcDl/+Uc\nPXoUI0aMwMSJE1FQUAAAKCgokOsYExODoqIiuFwu1NXV4cyZM/J52VvZ1dex+lo/g8EAjUYDm80G\nIQQKCwvlN4Nb0dX1bHszBIDDhw9j+PDhAFjP3tiyZQtCQ0Mxc+ZMedn1Oj4H3Sf5y8rK8P7778tf\njMnblLtXV1eHN998E5IkobW1FVOmTMGcOXPgdDqRnp6Oc+fOISgoCFarVb7wmpeXhwMHDsDLy4u3\nKQPIyMjAsWPH0NjYCH9/fyQnJyM2NrbP9auurkZ2drZ8G+jjjz9+I3frhumsnlVVVfjuu+8gSRKC\ngoKwePFi+RoC69m148ePY+XKlRgxYgQkSYIkSViwYAHCwsKuy/E56AKGiIgGhkF1ioyIiAYOBgwR\nESmCAUNERIpgwBARkSIYMEREpAgGDBERKYIBQ0REimDAEBGRIv4fkWdvajvJJbIAAAAASUVORK5C\nYII=\n",
256 | "text/plain": [
257 | ""
258 | ]
259 | },
260 | "metadata": {},
261 | "output_type": "display_data"
262 | },
263 | {
264 | "data": {
265 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEPCAYAAACKplkeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtcVHX+P/DXmSGkYUbGGUBRdL2wrNsoyFcotETU2ktW\na+rS/RGm3zItbSoLt9q2zctmKaJ4qaVv2+Z3/a5l0LZb9m0TtIRaSFHENAl0vXFxJpBxMB3m/f2D\nH+fnCArosWHs9Xw8fMCccz5nXudweXEuMyoiIiAiItKAzt8BiIjoysFSISIizbBUiIhIMywVIiLS\nDEuFiIg0w1IhIiLNsFSuIPv27YNOp8P27du7NC4qKgrLli27TKnoSnbXXXfhtttu83eMTtEi60cf\nfQS9Xg+n06lRqisPS+V7pNPpoNfrodPp2v03ePDgS1p/bGwsqqurMWLEiC6N2717N2bNmnVJz90Z\n3333HXQ6Hd59990289auXQuTyaQ+fu655/DTn/60zXL9+/fHkiVLLmvO7uJi/0i4HD755BPodDrU\n1tZqsr66ujrMmjULgwYNQkhICHr37o3U1FSf740xY8Zc1Pfl66+/jquvvrrN9Ndeew3r1q3r1Dqa\nm5uh0+mwYcMGn+kTJkzAsWPHYLFYupzrhyLI3wF+SKqrq9XPt23bhqlTp2LHjh3o06cPAECv17c7\n7syZM7jqqqs6XL+iKIiMjOxyLqvV2uUxWlMUBYqitJl2OTU3N593n3cXl3sfdJaIQFEUaPVa6Vtv\nvRVerxdvvPEGhgwZgrq6OhQVFcHhcFzyuluznuvsP1o6s472BAUFXdTP2A+KkF8UFBSITqeTI0eO\ntJnXp08feeGFF+TBBx8Ui8UiqampIiLyyiuvSFxcnISGhkrfvn3l3nvvldraWnXc3r17RVEU+fLL\nL30ev/vuu/LLX/5SDAaDxMTEyF/+8pc2z7d06VKfxwsXLpTZs2eL2WyWPn36yNNPP+0z5uTJkzJt\n2jTp2bOnhIeHy+OPPy5PPvmkDBs27LzbfOrUKVEURTZu3Nhm3tq1a8VkMqmfK4oiOp1O/fjSSy9J\ncnJym+k1NTUiIvLVV1/Jr371KwkLCxOLxSK/+MUvZM+ePT7rNxqN8tFHH0l8fLwEBwdLQUGB7N+/\nXyZNmiR9+vQRg8Eg8fHx8te//tUnW3JyssyePVt++9vfSmRkpFitVpk+fbo0NTX5LPfWW2/JiBEj\nJCQkRMLDw+XWW2+VkydPqvOXLl0qsbGxEhISIkOHDpUlS5ZIc3PzeffXuV/P9nz++ecyYcIECQ0N\nld69e0taWpocPnxYnZ+RkSHDhg2Tt99+W2JjY8VoNMqECRPkwIEDPut58803ZdCgQXL11VdLamqq\n/O1vf1OfuzVH635XFEV++ctfiojInXfeKbfeequsWrVKBgwYIGFhYTJ58mRxOBznzVxdXS2Kosgn\nn3xy3mXuvPPONl/rL774QkRE5s2bJ0OHDhWDwSADBgyQRx99VN3PmzZtajPu4YcfFhGRO+64Q269\n9Vb1OXbu3Ck33nijhIWFSWhoqNhsNtmwYYOItPwM6HQ6dT1XX321iIh8+OGHoiiKz/bt27dPJk2a\nJL169RKDwSAJCQny8ccfi4jIt99+K/fee6/07t1bevToIQMHDpTf/OY3593uKwFLxU86KpWwsDBZ\nvHixVFRUyL59+0REZNmyZZKfny8HDhyQwsJCue666+QXv/iFOm7v3r2i0+nalEpsbKzk5eXJN998\nI08++aQEBwfLwYMHfZ7v3FKxWq2ybNkyqaiokPXr14ter/cpo//8z/+U6Oho2bRpk+zbt0+eeOIJ\nMZvNMnz48PNuc2dLpampSex2u8TGxkptba3U1NSI2+0Wp9Mpffv2leeee05qamrUQjly5IiEh4eL\n3W6XPXv2yL59+2TmzJnSp08fqa+vV9cfFBQkycnJ8umnn0plZaU4nU7Zvn27rF27Vnbv3i2VlZWS\nmZkpQUFBUlRUpGZLTk6WXr16SUZGhnz99dfy4YcfSs+ePWXRokXqMqtXr5bg4GBZsmSJ7N27V8rL\nyyUrK0tOnDghIiJPP/20DBkyRP7+97/LgQMH5O9//7v069fPZx3n6qhUduzYIQaDQRYvXiz79++X\nXbt2ye233y42m008Ho+ItJSKyWSSW265RXbu3CmlpaUSFxcnP/vZz9T1bNu2TXQ6nSxYsED2798v\n7777rgwePFj9XvJ6vfL222+LTqeT8vJyqampUffrnXfeKWazWdLT02XPnj2ybds26d+/vzz44IMX\n/D4wGAzyyCOPiNvtbneZhoYGue666yQ9PV39Hmjdpt///vdSWFgoBw8elI8//lh+/OMfy8yZM0VE\n5PTp07Js2TK5+uqr1XGNjY1q1rNLJTY2Vh544AHZt2+fVFVVyQcffCCbNm0SEZFjx46Joijyxz/+\nUWpqatQ/3jZt2iQ6nU4tlcOHD0t4eLhMnDhRvvjiC6mqqpL3339f/vnPf4pIy89JUlKSfPnll/Lv\nf/9btm3bJm+88cZ5982VgKXiJx2Vytnf/OdTWFgoOp1OnE6niJz/SGXt2rXqmNOnT0uPHj3kz3/+\ns8/znVsqd955p89zjRs3Th544AERafnr66qrrpL169f7LDNixAhNSkVE5Nlnn5Wf/vSnbZaLjo6W\nl156yWdaRkaGjBs3zmdac3OzREdHy6uvvqqu/+zCvZCf//znMmfOHPVxcnKyXHfddT7LTJs2TcaP\nH68+7t27tzz11FPtrq+hoUF69OghW7Zs8Zn+2muvSZ8+fc6bo6NSufPOO2XatGk+01wul1x11VXy\n0UcfiUjLvgkJCZGGhgZ1mTfffFOCg4PF6/WKiMiUKVN8SkZEZPny5T7765///KfPkeHZGaKjo9Vf\n+CIiL7zwggwePPi82yUi8vbbb4vVapUePXrItddeK3a7vc3+ueGGG9SjjAtZv3699OzZU32ck5Oj\nHlmcm/Xsn6uQkJA2R6WtPB6PKIrSZv65pfLkk0/KgAED5PTp0+2u5+c//3mntuFKwgv13VRSUlKb\naf/85z/xs5/9DAMGDEDPnj1x0003AQAOHjx4wXXFx8ern1911VUIDw9HTU1Np8cAQN++fdUxX3/9\nNZqbm3Hdddf5LDNq1KgLrvNyKS4uxrZt22AymdR/YWFhqK6uxv79+9Xl9Ho9EhISfMaePHkS8+bN\ng81mg8VigclkQn5+fpt9eu7ND2fvj0OHDqG2tlb9epxr165dOH36NCZOnOiTce7cuairq8PJkycv\nervXr1/vs84+ffrA6/X6bPePfvQj9OzZ0ye7x+NRr1/s2bMHycnJPuvuytfSZrP5XJs6e9+cz9Sp\nU3H06FF88MEHmDRpEnbt2oXU1FTMmzevw+f761//ijFjxqBv374wmUx44IEH4HK5UF9f3+nMAPDk\nk0/i3nvvxYQJE/Diiy9i165dXRoPANu3b8eYMWPOe83zkUcewZtvvomEhAQ88cQT+Pjjj7v8HIGG\nF+q7qdDQUJ/H33zzDW699VY8+OCD+P3vfw+r1YqKigrccsstOH369AXXFRwc7PNYURR4vd5LHtPV\ni8g9evRASEgIGhoa2syrr69HSEhIl9bXyuv14uabb8ayZcvaXGA1m83q5yEhIW0yz5kzB/n5+Vi6\ndCliYmIQGhqK2bNnt9mnF7MPz84HAO+//z4GDBjQZv65X+vO8nq9mDFjBh5//PE22x0eHq5+3l72\ns3OdPe1iXOy+CQ4Oxvjx4zF+/HjMnz8fzz33HBYtWoR58+ad92L41q1bcc899+B3v/sdfvGLX8Bs\nNqOgoAAPPfRQhz8H53rxxRcxbdo0bNq0CZ988gkWLFiA3/72t3jmmWe6tJ4LueWWW3Do0CF89NFH\nyM/Pxx133IFrr70WmzZt0uw5uhuWSoD44osv4PF4kJmZCZ2u5QDz008/bbPc93EHVWxsLPR6PYqK\nijBw4EB1+ueff96psf/6178wbdo0n+lffPEFhg4dqj4ODg5Gc3Nzm/HtTU9MTERubi769++PoKCu\nfUt/+umnuP/++3H77bcDaLkjbP/+/YiNje30Ovr374/IyEj87//+L2688cY28+Pi4nDVVVehsrIS\nqampXcp3oa9fYmIidu3ahUGDBnVpnee65pprUFRU5DPt3MetxdHe10QrrV//uro6REZGtvu1/uyz\nz9C/f388++yz6rQ///nPbbJ2NufgwYMxa9YszJo1Cy+88ALWrl2LZ555Bnq9Hnq9vsP1jBw5En/9\n619x+vTpNuXaymKx4K677sJdd92Fe+65B+PGjcM333yDIUOGdCpjoOHpLz8696/LC4mNjYXX68Wy\nZctw4MABbNy4EX/4wx86XGdXnqOzzGYzpk2bhqeffhqbNm3C119/jaeeegqVlZUdlti8efPwxhtv\nIDMzE/v27cOePXuwYMEC/O1vf8Pjjz+uLjdo0CAcOnQIX375JRwOB06dOqVO//TTT3HkyBH19M1j\njz0Gl8uF22+/HYWFhTh48CA+/fRTzJ8/v8PXePzkJz/Bu+++i+3bt6O8vBwPPPAAjh8/3uV98txz\nz2HFihV46aWXsG/fPuzevRsrVqxAY2MjzGYz5s2bhyeffBKvvvoq9u/fj/Lycqxfv97nl2N7RAT7\n9u3Dzp07ff6dOHECzz77LLZv344HHngAX375JaqqqvDJJ5/g0UcfxdGjRztcb6snnngCn3zyCRYu\nXIiKigq8++67WLlyJYD/X2qtfzz84x//QF1dHRobG7u8j1odO3YMN910E/77v/8bu3btwoEDB/De\ne+/ht7/9LYYOHYprrrkGQMvXuri4GFVVVXA4HGhubsZPfvITHDlyBOvWrUNVVRX+67/+C6+//rrP\n+gcNGgSPx4MPP/wQDocDbre7TYZvv/0Wc+fORUFBAQ4ePIgvv/wSH3/8MWw2m7rMj370I2zevBnV\n1dU+L3Y8e9/NmTMHJ0+exO23347PP/8cVVVVeP/99/HJJ58AADIyMvDee+9h//792LdvH/7yl78g\nLCwM/fr1u+j91+3573LOD9uFLtRHRUX5XDhvtXz5cunfv78YDAYZN26c/OMf//C51bK9u7/auzjd\nv39/n4vd5z5fe89/7733qreRioi43W554IEHpGfPnmK1WmXu3Lny8MMPy7XXXtvhtm/YsEGSk5PF\nYrFIeHi4pKSkqBeWW506dUrS0tKkV69e6i3FIiJFRUWSkJAgISEhPheOq6qq5K677pKIiAgJCQmR\nQYMGyf3336/eXnvujQCtqqqq5MYbbxSj0Sj9+vWThQsXyn333eezraNGjZJHH33UZ1x7NxK8+eab\nEhcXJz169JCIiAi57bbbfG4pfvXVVyU+Pl5CQkLEarXK6NGj5fXXXz/vfmr9+rX377333hMRkdLS\nUrn11lvV21ljY2Pl4YcfVu94ysjIaHPzRHsX3f/85z/L4MGD5eqrr5aUlBRZv3696HQ6n9uyFy5c\nKP369RO9Xt/mluKzne9CeSu32y3z58+XpKQksVqt6q3uc+bMkWPHjqnLff3113L99ddLaGioz/d5\nRkaGREZGitFolF/96leybt26Ntsze/ZsiYyM9Lml+OysLpdL7rzzThk0aJCEhIRInz595L777pPq\n6mp1HX//+99l6NChEhwcrG7PuRfqRXxvZzcajfIf//Ef6t1fzz33nAwbNkyMRqP06tVLJkyYIP/6\n17/Ou2+uBIrIhf+UPXr0KJYvX66+8KmmpgZ33HEHUlJSsHz5cvVQ1W63w2AwAAByc3ORn58PvV6P\n9PR09aJvZWUlVq9ejTNnziAhIQHp6ekAAI/Hg+zsbFRWVsJkMsFut6vnhAsKCpCbmwsAmDx5MsaO\nHXu5+pUu0fXXX4/Bgwfjrbfe8ncUukSvvfYaHn30UTQ0NFz0tS76gepKAzU3N8uDDz4odXV18tZb\nb0leXp6IiOTm5sq6detEROTQoUMyb9488Xg8UlNTI4888oh66+L8+fNl//79IiKyaNEi2bFjh4iI\nfPTRR/LHP/5RRFrumc/MzBQRkcbGRnnkkUfk5MmT4nK51M87snv37q5sVrcTCPl37Ngh69atU18f\n8dhjj4lOp5MtW7YERP4L+SHmX7JkiWzfvl0qKyvlL3/5i0RERKi3kH/fAnn/B3J2EW3yd+maSllZ\nGXr37o3w8HCUlJSoRw2pqakoLi4GAJSUlGD06NHQ6/WIjIxEVFQUKioqUF9fj6amJsTExAAAUlJS\n1DHFxcXqupKTk7F7924AwM6dOxEXFweDwYDQ0FDExcWhtLS0w5zl5eVd2axuJ1Dyr1ixAomJiRgz\nZgw+//xzfPDBB0hJSQmY/OfzQ8y/fft2TJw4Eddccw1+97vf4cEHH8SaNWsuQ7qOBfL+D+TsgDb5\nu3SrTGFhIW644QYAQENDg3q7ptlsVm8TdTqdPnfOWCwWOJ1O6PV6n/eYslqt6sUvp9OpztPpdDAY\nDHC5XD7Tz14X+d+IESPwxRdf+DsGaWT9+vX+jkBXiE4fqXg8HpSUlLR5kVQrLW9dlctwxxIREV1+\nnT5SKS0txeDBg9VX5prNZtTX16sfw8LCALQcTZx9S6bD4YDFYoHFYvF5B9LW6a1jWh97vV40NTXB\naDTCYrH4HI45HA4MGzasTbby8nKf5dLS0jq7Wd0S8/sX8/tXIOcP5OxAS/6z3+7fZrP53GbdGZ0u\nlc8++wzXX3+9+njkyJEoKCjApEmTUFBQgMTERAAtL8hasWIFbrnlFjidTlRXVyMmJgaKosBgMKCi\nogJDhgzB1q1b8ctf/lIds2XLFvz4xz9GUVGRWhzx8fH4n//5H7jdbni9XpSVleGee+5pk629De/o\nPv3uzGQyXdLrAPyN+f2L+f0nkLMDLW+xc6nF2KlS+e6771BWVoaHHnpInTZp0iRkZmYiPz8fERER\nsNvtAIDo6GiMGjUKdrsdQUFBmDFjhnpqbPr06Vi1apV6S3Hr+ymNHz8eK1euxJw5c9T3RAIAo9GI\nKVOmICMjA4qiYOrUqRf9lhZERHT5dfg6lUDFIxX/YX7/Yn7/CeTsQMuRyqXi27QQEZFmWCpERKQZ\nlgoREWmGpUJERJphqRARkWZYKkREpBmWChERaYalQkREmmGpEBGRZlgqRESkGZYKERFphqVCRESa\nYakQEZFmuvTfCQcSffFWyImG9ucNH4kzkZf+bpxEROTrii0Vz4cb4T1U1e48JawXwFIhItIcT38R\nEZFmWCpERKQZlgoREWmGpUJERJphqRARkWZYKkREpBmWChERaYalQkREmunUix/dbjfWrl2LQ4cO\nQVEUPPzww4iKisLy5ctRV1eHyMhI2O12GAwGAEBubi7y8/Oh1+uRnp6O+Ph4AEBlZSVWr16NM2fO\nICEhAenp6QAAj8eD7OxsVFZWwmQywW63Izw8HABQUFCA3NxcAMDkyZMxduxYrfcBERFppFNHKm+8\n8QYSEhKQmZmJl19+Gf369UNeXh6GDx+OrKws2Gw29Rf/4cOHUVRUhMzMTMyfPx85OTkQEQBATk4O\nZs6ciaysLBw7dgylpaUAgM2bN8NoNGLFihWYOHEi1q1bBwBwuVzYuHEjFi9ejEWLFuGdd96B2+2+\nHPuBiIg00GGpuN1u7N27F+PGjQMA6PV6GAwGlJSUqEcNqampKC4uBgCUlJRg9OjR0Ov1iIyMRFRU\nFCoqKlBfX4+mpibExMQAAFJSUtQxxcXF6rqSk5Oxe/duAMDOnTsRFxcHg8GA0NBQxMXFqUVERETd\nT4env2pra2EymbB69WocPHgQgwcPRnp6OhoaGmA2mwEAZrMZDQ0tb97odDoRGxurjrdYLHA6ndDr\n9bBarep0q9UKp9Opjmmdp9PpYDAY4HK5fKafvS4iIuqeOiwVr9eLqqoqTJ8+HUOGDMGf/vQn5OXl\ntVlOURTNQrWeLuus8vJylJeXq4/T0tIuuLxOp8BgMl1Utu9DcHAwTN04X0eY37+Y338COXurDRs2\nqJ/bbDbYbLYuje+wVCwWC6xWK4YMGQKg5fRUXl4ezGYz6uvr1Y9hYWHq8sePH1fHOxwOWCwWWCwW\nOByONtNbx7Q+9nq9aGpqgtFohMVi8SkLh8OBYcOGtcnY1Q33egWNjY2dXv77ZjKZunW+jjC/fzG/\n/wRydqAlf0d/lHekw2sqZrMZVqsVR48eBQCUlZUhOjoaI0eOREFBAYCWO7QSExMBAImJiSgsLITH\n40FtbS2qq6sRExMDs9kMg8GAiooKiAi2bt2KpKQkdcyWLVsAAEVFRWpxxMfHo6ysDG63Gy6XC2Vl\nZeqdZERE1P106pbiadOmYeXKlfB4POjduzdmzZoFr9eLzMxM5OfnIyIiAna7HQAQHR2NUaNGwW63\nIygoCDNmzFBPjU2fPh2rVq1SbykeMWIEAGD8+PFYuXIl5syZA5PJhLlz5wIAjEYjpkyZgoyMDCiK\ngqlTpyI0NPRy7AciItKAIl29gBEgjsz89Xn/k67gh55Cc+IN33OizrsSDqGZ33+Y338COTsA9O17\n6f95IV9RT0REmmGpEBGRZlgqRESkGZYKERFphqVCRESaYakQEZFmWCpERKQZlgoREWmGpUJERJph\nqRARkWZYKkREpBmWChERaYalQkREmmGpEBGRZlgqRESkGZYKERFphqVCRESaYakQEZFmWCpERKQZ\nlgoREWmGpUJERJphqRARkWZYKkREpJmgziw0e/ZsGAwGKIoCvV6PxYsXw+VyYfny5airq0NkZCTs\ndjsMBgMAIDc3F/n5+dDr9UhPT0d8fDwAoLKyEqtXr8aZM2eQkJCA9PR0AIDH40F2djYqKythMplg\nt9sRHh4OACgoKEBubi4AYPLkyRg7dqzW+4CIiDTSqSMVRVHw/PPPY8mSJVi8eDEAIC8vD8OHD0dW\nVhZsNpv6i//w4cMoKipCZmYm5s+fj5ycHIgIACAnJwczZ85EVlYWjh07htLSUgDA5s2bYTQasWLF\nCkycOBHr1q0DALhcLmzcuBGLFy/GokWL8M4778Dtdmu+E4iISBudKhURUYuhVUlJiXrUkJqaiuLi\nYnX66NGjodfrERkZiaioKFRUVKC+vh5NTU2IiYkBAKSkpKhjiouL1XUlJydj9+7dAICdO3ciLi4O\nBoMBoaGhiIuLU4uIiIi6n06d/lIUBQsWLIBOp8ONN96ICRMmoKGhAWazGQBgNpvR0NAAAHA6nYiN\njVXHWiwWOJ1O6PV6WK1WdbrVaoXT6VTHtM7T6XQwGAxwuVw+089eFxERdU+dKpUXX3wRvXr1wokT\nJ7BgwQL07du3zTKKomgW6tyjoo6Ul5ejvLxcfZyWlnbB5XU6BQaT6aKyfR+Cg4Nh6sb5OsL8/sX8\n/hPI2Vtt2LBB/dxms8Fms3VpfKdKpVevXgCAnj17IikpCRUVFTCbzaivr1c/hoWFAWg5mjh+/Lg6\n1uFwwGKxwGKxwOFwtJneOqb1sdfrRVNTE4xGIywWi09ZOBwODBs2rE2+rm641ytobGzs9PLfN5PJ\n1K3zdYT5/Yv5/SeQswMt+Tv6o7wjHV5T+e6773Dq1CkAwKlTp7Br1y4MGDAAI0eOREFBAYCWO7QS\nExMBAImJiSgsLITH40FtbS2qq6sRExMDs9kMg8GAiooKiAi2bt2KpKQkdcyWLVsAAEVFRWpxxMfH\no6ysDG63Gy6XC2VlZeqdZERE1P10eKTS0NCAl19+GYqioLm5GWPGjEF8fDyGDBmCzMxM5OfnIyIi\nAna7HQAQHR2NUaNGwW63IygoCDNmzFBPjU2fPh2rVq1SbykeMWIEAGD8+PFYuXIl5syZA5PJhLlz\n5wIAjEYjpkyZgoyMDCiKgqlTpyI0NPRy7QsiIrpEinT1AkaAODLz1/Aeqmp3XvBDT6E58YbvOVHn\nXQmH0MzvP8zvP4GcHUC718u7iq+oJyIizbBUiIhIMywVIiLSDEuFiIg0w1IhIiLNsFSIiEgzLBUi\nItIMS4WIiDTDUiEiIs2wVIiISDMsFSIi0gxLhYiINMNSISIizbBUiIhIMywVIiLSDEuFiIg0w1Ih\nIiLNsFSIiEgzLBUiItIMS4WIiDTDUiEiIs2wVIiISDMsFSIi0kxQZxf0er2YP38+LBYLnn76abhc\nLixfvhx1dXWIjIyE3W6HwWAAAOTm5iI/Px96vR7p6emIj48HAFRWVmL16tU4c+YMEhISkJ6eDgDw\neDzIzs5GZWUlTCYT7HY7wsPDAQAFBQXIzc0FAEyePBljx47VcvuJiEhDnT5S+eCDD9CvXz/1cV5e\nHoYPH46srCzYbDb1F//hw4dRVFSEzMxMzJ8/Hzk5ORARAEBOTg5mzpyJrKwsHDt2DKWlpQCAzZs3\nw2g0YsWKFZg4cSLWrVsHAHC5XNi4cSMWL16MRYsW4Z133oHb7dZs44mISFudKhWHw4EdO3ZgwoQJ\n6rSSkhL1qCE1NRXFxcXq9NGjR0Ov1yMyMhJRUVGoqKhAfX09mpqaEBMTAwBISUlRxxQXF6vrSk5O\nxu7duwEAO3fuRFxcHAwGA0JDQxEXF6cWERERdT+dKpU333wT9913HxRFUac1NDTAbDYDAMxmMxoa\nGgAATqdTPXUFABaLBU6nE06nE1arVZ1utVrhdDrVMa3zdDodDAYDXC5XmzGt6yIiou6pw2sq27dv\nR1hYGAYOHIjy8vLzLnd24Vyq1tNlnVVeXu6TLS0t7YLL63QKDCbTRWX7PgQHB8PUjfN1hPn9i/n9\nJ5Czt9qwYYP6uc1mg81m69L4Dktl7969KCkpwY4dO3D69Gk0NTVh5cqVMJvNqK+vVz+GhYUBaDma\nOH78uDre4XDAYrHAYrHA4XC0md46pvWx1+tFU1MTjEYjLBaLT1k4HA4MGzasTcaubrjXK2hsbOz0\n8t83k8nUrfN1hPn9i/n9J5CzAy35O/qjvCMdnv66++67sWbNGmRnZ+Oxxx7DsGHD8Oijj2LkyJEo\nKCgA0HKHVmJiIgAgMTERhYWF8Hg8qK2tRXV1NWJiYmA2m2EwGFBRUQERwdatW5GUlKSO2bJlCwCg\nqKhILY72fupPAAAUv0lEQVT4+HiUlZXB7XbD5XKhrKxMvZOMiIi6n07fUnyuSZMmITMzE/n5+YiI\niIDdbgcAREdHY9SoUbDb7QgKCsKMGTPUU2PTp0/HqlWr1FuKR4wYAQAYP348Vq5ciTlz5sBkMmHu\n3LkAAKPRiClTpiAjIwOKomDq1KkIDQ291G0mIqLLRJGuXsAIEEdm/hreQ1Xtzgt+6Ck0J97wPSfq\nvCvhEJr5/Yf5/SeQswNA3759L3kdfEU9ERFphqVCRESaYakQEZFmWCpERKQZlgoREWmGpUJERJph\nqRARkWZYKkREpBmWChERaYalQkREmmGpEBGRZlgqRESkGZYKERFphqVCRESaYakQEZFmWCpERKQZ\nlgoREWmGpUJERJphqRARkWZYKkREpBmWChERaYalQkREmmGpEBGRZoI6WuDMmTN4/vnn4fF44PF4\nkJiYiLvvvhsulwvLly9HXV0dIiMjYbfbYTAYAAC5ubnIz8+HXq9Heno64uPjAQCVlZVYvXo1zpw5\ng4SEBKSnpwMAPB4PsrOzUVlZCZPJBLvdjvDwcABAQUEBcnNzAQCTJ0/G2LFjL8d+ICIiDXR4pHLV\nVVfh+eefx5IlS/DKK6+gvLwce/fuRV5eHoYPH46srCzYbDb1F//hw4dRVFSEzMxMzJ8/Hzk5ORAR\nAEBOTg5mzpyJrKwsHDt2DKWlpQCAzZs3w2g0YsWKFZg4cSLWrVsHAHC5XNi4cSMWL16MRYsW4Z13\n3oHb7b5c+4KIiC5Rp05/9ejRA0DLUYvX64XRaERJSYl61JCamori4mIAQElJCUaPHg29Xo/IyEhE\nRUWhoqIC9fX1aGpqQkxMDAAgJSVFHVNcXKyuKzk5Gbt37wYA7Ny5E3FxcTAYDAgNDUVcXJxaRERE\n1P10ePoLALxeLzIyMlBTU4ObbroJ0dHRaGhogNlsBgCYzWY0NDQAAJxOJ2JjY9WxFosFTqcTer0e\nVqtVnW61WuF0OtUxrfN0Oh0MBgNcLpfP9LPXRURE3VOnSkWn02HJkiVwu91YuHAhysvL2yyjKIpm\noVpPl3VWeXm5T6a0tLQLLq/TKTCYTBeV7fsQHBwMUzfO1xHm9y/m959Azt5qw4YN6uc2mw02m61L\n4ztVKq0MBgMSEhLwzTffwGw2o76+Xv0YFhYGoOVo4vjx4+oYh8MBi8UCi8UCh8PRZnrrmNbHXq8X\nTU1NMBqNsFgsPmXhcDgwbNiwNrm6uuFer6CxsbErm/69MplM3TpfR5jfv5jffwI5O9CSv6M/yjvS\n4TWVEydOqBfHT58+jbKyMgwaNAgjR45EQUEBgJY7tBITEwEAiYmJKCwshMfjQW1tLaqrqxETEwOz\n2QyDwYCKigqICLZu3YqkpCR1zJYtWwAARUVFanHEx8ejrKwMbrcbLpcLZWVl6p1kRETU/XR4pFJf\nX49Vq1ZBRCAiGDNmDIYPH45BgwYhMzMT+fn5iIiIgN1uBwBER0dj1KhRsNvtCAoKwowZM9RTY9On\nT8eqVavUW4pHjBgBABg/fjxWrlyJOXPmwGQyYe7cuQAAo9GIKVOmICMjA4qiYOrUqQgNDb1c+4KI\niC6RIl29gBEgjsz8NbyHqtqdF/zQU2hOvOF7TtR5V8IhNPP7D/P7TyBnB4C+ffte8jr4inoiItIM\nS4WIiDTDUiEiIs2wVIiISDMsFSIi0gxLhYiINMNSISIizbBUiIhIMywVIiLSDEuFiIg0w1IhIiLN\nsFSIiEgzLBUiItIMS4WIiDTTpf/58Uqi//Y44Kw7/wKWCDT3Cv/+AhERXQF+sKUCZx1O/+Hp884O\nzngJYKkQEXUJT38REZFmWCpERKQZlgoREWmGpUJERJphqRARkWZYKkREpBmWChERaabD16k4HA5k\nZ2ejoaEBiqJgwoQJuPnmm+FyubB8+XLU1dUhMjISdrsdBoMBAJCbm4v8/Hzo9Xqkp6cjPj4eAFBZ\nWYnVq1fjzJkzSEhIQHp6OgDA4/EgOzsblZWVMJlMsNvtCA9veY1IQUEBcnNzAQCTJ0/G2LFjL8d+\nICIiDXR4pKLX63H//fdj2bJlWLhwIT766CMcOXIEeXl5GD58OLKysmCz2dRf/IcPH0ZRUREyMzMx\nf/585OTkQEQAADk5OZg5cyaysrJw7NgxlJaWAgA2b94Mo9GIFStWYOLEiVi3bh0AwOVyYePGjVi8\neDEWLVqEd955B263+3LtCyIiukQdlorZbMbAgQMBACEhIejXrx8cDgdKSkrUo4bU1FQUFxcDAEpK\nSjB69Gjo9XpERkYiKioKFRUVqK+vR1NTE2JiYgAAKSkp6pji4mJ1XcnJydi9ezcAYOfOnYiLi4PB\nYEBoaCji4uLUIiIiou6nS9dUamtrcfDgQcTGxqKhoQFmsxlAS/E0NDQAAJxOp3rqCgAsFgucTiec\nTiesVqs63Wq1wul0qmNa5+l0OhgMBrhcrjZjWtdFRETdU6ff++vUqVNYtmwZ0tPTERIS0ma+oiia\nhWo9XdZZ5eXlKC8vVx+npaVdcHmdToGiv/Cm6/VBMJhMXcqhleDgYJj89NxaYH7/Yn7/CeTsrTZs\n2KB+brPZYLPZujS+U6XS3NyMpUuXIiUlBUlJSQBajk7q6+vVj2FhYQBajiaOHz+ujnU4HLBYLLBY\nLHA4HG2mt45pfez1etHU1ASj0QiLxeJTFg6HA8OGDWuTr6sb7vUK0OzpYJs9aGxs7PQ6tWQymfz2\n3Fpgfv9ifv8J5OxAS/6O/ijvSKdOf61ZswbR0dG4+eab1WkjR45EQUEBgJY7tBITEwEAiYmJKCws\nhMfjQW1tLaqrqxETEwOz2QyDwYCKigqICLZu3aoWVGJiIrZs2QIAKCoqUosjPj4eZWVlcLvdcLlc\nKCsrU+8kIyKi7qfDI5W9e/fi008/xYABA/DUU09BURTcddddmDRpEjIzM5Gfn4+IiAjY7XYAQHR0\nNEaNGgW73Y6goCDMmDFDPTU2ffp0rFq1Sr2leMSIEQCA8ePHY+XKlZgzZw5MJhPmzp0LADAajZgy\nZQoyMjKgKAqmTp2K0NDQy7UviIjoEinS1QsYAeLIzF/De6iq3XnBDz0F9LJ2+P+pNA/56eWKd0FX\nwiE08/sP8/tPIGcHgL59+17yOviKeiIi0gxLhYiINMNSISIizbBUiIhIMywVIiLSDEuFiIg0w1Ih\nIiLNsFSIiEgzLBUiItIMS4WIiDTDUiEiIs2wVIiISDMsFSIi0gxLhYiINMNSISIizbBUiIhIMywV\nIiLSDEuFiIg0w1IhIiLNsFSIiEgzLBUiItIMS4WIiDTDUiEiIs0EdbTAmjVrsH37doSFheGVV14B\nALhcLixfvhx1dXWIjIyE3W6HwWAAAOTm5iI/Px96vR7p6emIj48HAFRWVmL16tU4c+YMEhISkJ6e\nDgDweDzIzs5GZWUlTCYT7HY7wsPDAQAFBQXIzc0FAEyePBljx47VfAcQEZF2OjxSGTduHJ555hmf\naXl5eRg+fDiysrJgs9nUX/yHDx9GUVERMjMzMX/+fOTk5EBEAAA5OTmYOXMmsrKycOzYMZSWlgIA\nNm/eDKPRiBUrVmDixIlYt24dgJbi2rhxIxYvXoxFixbhnXfegdvt1nTjiYhIWx2WytChQxEaGuoz\nraSkRD1qSE1NRXFxsTp99OjR0Ov1iIyMRFRUFCoqKlBfX4+mpibExMQAAFJSUtQxxcXF6rqSk5Ox\ne/duAMDOnTsRFxcHg8GA0NBQxMXFqUVERETd00VdU2loaIDZbAYAmM1mNDQ0AACcTqd66goALBYL\nnE4nnE4nrFarOt1qtcLpdKpjWufpdDoYDAa4XK42Y1rXRURE3VeH11Q6Q1EULVYDAOrpsq4oLy9H\neXm5+jgtLe2Cy+t0ChT9hTddrw+CwWTqchYtBAcHw+Sn59YC8/sX8/tPIGdvtWHDBvVzm80Gm83W\npfEXVSpmsxn19fXqx7CwMAAtRxPHjx9Xl3M4HLBYLLBYLHA4HG2mt45pfez1etHU1ASj0QiLxeJT\nFA6HA8OGDWs3T1c33OsVoNlzwWWamz1obGzs9Dq1ZDKZ/PbcWmB+/2J+/wnk7EBL/o7+KO9Ip05/\niYjPEcTIkSNRUFAAoOUOrcTERABAYmIiCgsL4fF4UFtbi+rqasTExMBsNsNgMKCiogIigq1btyIp\nKUkds2XLFgBAUVGRWhzx8fEoKyuD2+2Gy+VCWVmZeicZERF1Tx0eqWRlZWHPnj1obGzEww8/jLS0\nNEyaNAmZmZnIz89HREQE7HY7ACA6OhqjRo2C3W5HUFAQZsyYoZ4amz59OlatWqXeUjxixAgAwPjx\n47Fy5UrMmTMHJpMJc+fOBQAYjUZMmTIFGRkZUBQFU6dObXPDABERdS+KXMxFjABwZOav4T1U1e68\n4IeeAnpZcfoPT593fHDGS2ge8tPLFe+CroRDaOb3H+b3n0DODgB9+/a95HXwFfVERKQZlgoREWmG\npUJERJphqRARkWY0efHjlUgJCoL+m6/an2mJQHOv8PbnERH9gLFUzqfxBE5nvdDurOCMlwCWChFR\nGzz9RUREmmGpEBGRZlgqRESkGZYKERFphqVCRESaYakQEZFmWCpERKQZlgoREWmGpUJERJphqRAR\nkWZYKkREpBmWChERaYalQkREmuG7FF+EC74tPsC3xieiHyyWysW4wNviA3xrfCL64eLpLyIi0kxA\nHKmUlpbiT3/6E0QE48aNw6RJk/wdiYiI2tHtj1S8Xi9ef/11PPPMM1i6dCm2bduGI0eO+DsWERG1\no9sfqVRUVCAqKgoREREAgOuvvx7FxcXo16+fn5OdHy/kE9EPVbcvFafTCavVqj62WCyoqKjwY6JO\n6OBCfo9nl0LvrDvv/DO9+wLGsMuRjIjosur2pXKxgm7+NaSxod15ugGD4T3PvO9FB6Wjey4T+pqj\n5x/PIx0i6qYUERF/h7iQr7/+Gm+//TaeeeYZAEBeXh4A+FysLy8vR3l5ufo4LS3t+w1JRHSF2LBh\ng/q5zWaDzWbr0vhuf6E+JiYG1dXVqKurg8fjwbZt25CYmOizjM1mQ1pamvrv7J0SiJjfv5jfvwI5\nfyBnB1ryn/27tKuFAgTA6S+dTofp06djwYIFEBGMHz8e0dHR/o5FRETt6PalAgAjRoxAVlaWv2MQ\nEVEHuv3pr4txMYds3Qnz+xfz+1cg5w/k7IA2+bv9hXoiIgocV+SRChER+QdLhYiINBMQF+q7IhDe\nfHLNmjXYvn07wsLC8MorrwAAXC4Xli9fjrq6OkRGRsJut8NgMAAAcnNzkZ+fD71ej/T0dMTHx/st\nu8PhQHZ2NhoaGqAoCiZMmICbb745YPKfOXMGzz//PDweDzweDxITE3H33XcHTP5WXq8X8+fPh8Vi\nwdNPPx1Q+WfPng2DwQBFUaDX67F48eKAyu92u7F27VocOnQIiqLg4YcfRlRUVEDkP3r0KJYvXw5F\nUSAiqKmpwR133IGUlBTt8ssVpLm5WR555BGpra2VM2fOyJNPPimHDx/2d6w2vvrqK6mqqpInnnhC\nnfbWW29JXl6eiIjk5ubKunXrRETk0KFDMm/ePPF4PFJTUyOPPPKIeL1ev+QWEfn222+lqqpKRESa\nmppkzpw5cvjw4YDJLyJy6tQpEWn5fvnNb34jX331VUDlFxF5//33JSsrS/7whz+ISOB8/4iIzJ49\nWxobG32mBVL+7Oxs2bx5s4iIeDweOXnyZEDlb9Xc3CwPPvig1NXVaZr/ijr9dfabTwYFBalvPtnd\nDB06FKGhoT7TSkpKMHbsWABAamqqmrukpASjR4+GXq9HZGQkoqKi/PreZ2azGQMHDgQAhISEoF+/\nfnA4HAGTHwB69OgBoOWoxev1wmg0BlR+h8OBHTt2YMKECeq0QMovIpBz7g8KlPxutxt79+7FuHHj\nAAB6vR4GgyFg8p+trKwMvXv3Rnh4uKb5r6jTXwH55pP/T0NDA8xmM4CWX9wNDS3vTeZ0OhEbG6su\nZ7FY4HQ6/ZLxXLW1tTh48CBiY2MDKr/X60VGRgZqampw0003ITo6OqDyv/nmm7jvvvvgdrvVaYGU\nX1EULFiwADqdDjfeeCMmTJgQMPlra2thMpmwevVqHDx4EIMHD0Z6enrA5D9bYWEhbrjhBgDafv9c\nUaVyJVEUxd8RLujUqVNYtmwZ0tPTERIS0mZ+d86v0+mwZMkSuN1uLFy40Od941p11/yt1+IGDhzY\nbu5W3TU/ALz44ovo1asXTpw4gQULFqBv375tlumu+b1eL6qqqjB9+nQMGTIEf/rTn9T3Izxbd83f\nyuPxoKSkBPfcc0+78y8l/xVVKhaLBcePH1cfO51OWCwWPybqPLPZjPr6evVjWFjLW9+fu00Oh8Pv\n29Tc3IylS5ciJSUFSUlJAAIrfyuDwYCEhAR88803AZN/7969KCkpwY4dO3D69Gk0NTVh5cqVAZMf\nAHr16gUA6NmzJ5KSklBRUREw+S0WC6xWK4YMGQIASE5ORl5eXsDkb1VaWorBgwejZ8+eALT9+b2i\nrql05s0nu4tzzyuPHDkSBQUFAICCggI1d2JiIgoLC+HxeFBbW4vq6mrExMT4I7JqzZo1iI6Oxs03\n36xOC5T8J06cUE8bnT59GmVlZRg0aFDA5L/77ruxZs0aZGdn47HHHsOwYcPw6KOPBkz+7777DqdO\nnQLQcrS7a9cuDBgwIGDym81mWK1WHD3a8l9TlJWVITo6OmDyt/rss89w/fXXq4+1zH/FvaK+tLQU\nb7zxhvrmk93xluKsrCzs2bMHjY2NCAsLQ1paGpKSkpCZmYnjx48jIiICdrtdvZifm5uLzZs3Iygo\nyO+3JO7duxfPP/88BgwYAEVRoCgK7rrrLsTExARE/n//+99YtWqVWupjxozBbbfdBpfLFRD5z7Zn\nzx68//776i3FgZC/trYWL7/8MhRFQXNzM8aMGYNJkyYFTH4AOHDgAF599VV4PB707t0bs2bNgtfr\nDZj83333HWbNmoXs7GxcffXVAKDp/r/iSoWIiPznijr9RURE/sVSISIizbBUiIhIMywVIiLSDEuF\niIg0w1IhIiLNsFSIiEgzLBUiItLM/wF7T2WRKBy3/QAAAABJRU5ErkJggg==\n",
266 | "text/plain": [
267 | ""
268 | ]
269 | },
270 | "metadata": {},
271 | "output_type": "display_data"
272 | }
273 | ],
274 | "source": [
275 | "plt.figure(1)\n",
276 | "train_df_context_len = train_df.Context.str.split(\" \").apply(len)\n",
277 | "train_df_context_len.hist(bins=40)\n",
278 | "plt.title(\"Training Context Length Statistics\")\n",
279 | "print(train_df_context_len.describe())\n",
280 | "\n",
281 | "plt.figure(2)\n",
282 | "train_df_utterance_len = train_df.Utterance.str.split(\" \").apply(len)\n",
283 | "train_df_utterance_len.hist(bins=40)\n",
284 | "plt.title(\"Training Utterance Length Statistics\")\n",
285 | "print(train_df_utterance_len.describe())"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 14,
291 | "metadata": {
292 | "collapsed": false
293 | },
294 | "outputs": [
295 | {
296 | "data": {
297 | "text/html": [
298 | "\n",
299 | "
\n",
300 | " \n",
301 | " \n",
302 | " | \n",
303 | " Context | \n",
304 | " Ground Truth Utterance | \n",
305 | " Distractor_0 | \n",
306 | " Distractor_1 | \n",
307 | " Distractor_2 | \n",
308 | " Distractor_3 | \n",
309 | " Distractor_4 | \n",
310 | " Distractor_5 | \n",
311 | " Distractor_6 | \n",
312 | " Distractor_7 | \n",
313 | " Distractor_8 | \n",
314 | "
\n",
315 | " \n",
316 | " \n",
317 | " \n",
318 | " 0 | \n",
319 | " anyon know whi my stock oneir export env var usernam ' ? i mean what be that use for ? i know of $ user but not $ usernam . my precis instal doe n't export usernam __eou__ __eot__ look like it use to be export by lightdm , but the line have the comment `` // fixm : be this requir ? '' so i guess it be n't surpris it be go __eou__ __eot__ thank ! how the heck do you figur that out ? __eou__ __eot__ https : //bugs.launchpad.net/lightdm/+bug/864109/comments/3 __eou__ __eot__ | \n",
320 | " nice thank ! __eou__ | \n",
321 | " wrong channel for it , but check efnet.org , unoffici page . __eou__ | \n",
322 | " everi time the kernel chang , you will lose video __eou__ yep __eou__ | \n",
323 | " ok __eou__ | \n",
324 | " ! nomodeset > acer __eou__ i 'm assum it be a driver issu . __eou__ ! pm > acer __eou__ i do n't pm . ; ) __eou__ oop sorri for the cap __eou__ | \n",
325 | " http : //www.ubuntu.com/project/about-ubuntu/deriv ( some call them deriv , other call them flavor , same differ ) __eou__ | \n",
326 | " thx __eou__ unfortun the program be n't instal from the repositori __eou__ | \n",
327 | " how can i check ? by do a recoveri for test ? __eou__ | \n",
328 | " my humbl apolog __eou__ | \n",
329 | " # ubuntu-offtop __eou__ | \n",
330 | "
\n",
331 | " \n",
332 | " 1 | \n",
333 | " i set up my hd such that i have to type a passphras to access it at boot . how can i remov that passwrd , and just boot up normal . i do this at instal , it work fine , just tire of have reboot where i need to be at termin to type passwd in . help ? __eou__ __eot__ backup your data , and re-instal without encrypt `` might '' be the easiest method __eou__ __eot__ | \n",
334 | " so you dont know , ok , anyon els ? __eou__ you be like , yah my mous doesnt work , reinstal your os lolol what a joke __eou__ | \n",
335 | " nmap be nice , but it be n't what i be look for . i final find it again : mtr ( my tracerout ) be what i be look for . i ll be keep nmap handi though . __eou__ | \n",
336 | " ok __eou__ | \n",
337 | " cdrom work fine on window . __eou__ i dont think it have anyth to do with the bure process , cds work fine on my desktop and my other ubuntu lap __eou__ | \n",
338 | " ah yes , i have read return as rerun __eou__ | \n",
339 | " hm ? __eou__ | \n",
340 | " not the case , lts be everi other .04 releas . the .04 be n't alway more stabl __eou__ i would reinstal with precis __eou__ you can restor user data and such from backup __eou__ | \n",
341 | " pretti much __eou__ | \n",
342 | " i use the one i download from amd __eou__ | \n",
343 | " ffmpeg be part of the packag , quixotedon , at least i 'm quit sure it still be __eou__ if not just instal ffmpeg __eou__ | \n",
344 | "
\n",
345 | " \n",
346 | " 2 | \n",
347 | " im tri to use ubuntu on my macbook pro retina __eou__ i read in the forum that ubuntu have a appl version now ? __eou__ __eot__ not that ive ever hear of.. normal ubutnu should work on an intel base mac . there be the ppc version also . __eou__ you want total control ? or what be you want exact ? __eou__ __eot__ | \n",
348 | " just wonder how it run __eou__ | \n",
349 | " yes , that 's what i do , export it to a `` id_dsa '' file , then back to ubuntu copi it into ~/.ssh/ __eou__ | \n",
350 | " noth - i be talk about the question of myhero __eou__ | \n",
351 | " that should fix the font be too larg __eou__ | \n",
352 | " okay , so hcitool echo back hci0 < mac address of control > but the bluetooth devic panel keep disconnect and reconnect the devic ( or so it seem ) ani idea whi that would be ? __eou__ | \n",
353 | " i get to the menu with option such as tri ubuntu ' , instal ubuntu ' , check disc ' __eou__ | \n",
354 | " whi do u need analyz __eou__ it be a toy __eou__ ok msp301 __eou__ but y , i mean it be the same ubunut , onli with older program __eou__ ubuntu 804 or 1204 __eou__ no i dont use 804 __eou__ i be ask hypo qs __eou__ | \n",
355 | " cntrl-c may stop the command but it doe n't fix my hdd problem . __eou__ | \n",
356 | " if you re onli go to run ubuntu , just get a normal pc rather than a mac __eou__ that say , i 'm run it on a macbook , becaus i get one relat cheapli __eou__ | \n",
357 | " the one which be not pick up at the moment be on stderr and not stdout and > be onli cover stdout __eou__ | \n",
358 | "
\n",
359 | " \n",
360 | " 3 | \n",
361 | " no suggest ? __eou__ link ? __eou__ how can i remov luk passphras at boot . i dont want to use featur anymor ... __eou__ __eot__ you may need to creat a new volum __eou__ __eot__ that lead me to the next question lol ... i dont know how to creat new volum exact in cmdline , usual i use a gui . im just tri to access this server via usb load with next os im go to load , the luk pw be stop me __eou__ __eot__ for someth like that i would like use someth like a live gpart disk to avoid the confli... | \n",
362 | " you cant load anyth via usb or cd when luk be run __eou__ it wont allow usb boot , i tri with 2 diff usb drive __eou__ | \n",
363 | " -p sorri ... __eou__ nmap -p22 __eou__ it doe n't say : 22/tcp open ssh ? __eou__ | \n",
364 | " i guess so i ca n't even launch it . __eou__ | \n",
365 | " note __eou__ | \n",
366 | " rxvt-unicod be one __eou__ | \n",
367 | " i tar all of ~ __eou__ | \n",
368 | " i tar all of ~ __eou__ | \n",
369 | " i do n't realli know if i can help , but i be curious . lol __eou__ that 's cool . i ll look into it . now , we better stop talk about this sinc it 's offtop . : p __eou__ | \n",
370 | " that work just fine , thank ! __eou__ | \n",
371 | " thank you __eou__ | \n",
372 | "
\n",
373 | " \n",
374 | " 4 | \n",
375 | " i just ad a second usb printer but not sure what the uri should read - can anyon help with usb printer ? __eou__ __eot__ firefox localhost:631 __eou__ __eot__ firefox ? __eou__ __eot__ yes __eou__ firefox localhost:631 __eou__ firefox http : //localhost:631 __eou__ cup have a web base interfac __eou__ __eot__ | \n",
376 | " i be set it up under the printer configur __eou__ thank ! __eou__ | \n",
377 | " i 'd say the most common venu would be via launchpad . check out the factoid ! bug as well __eou__ | \n",
378 | " the old hardi man page , http : //manpages.ubuntu.com/manpages/hardy/man1/gcalctool.1.html say `` delet '' clear the screen , but it doe n't __eou__ becaus lts be good __eou__ | \n",
379 | " i ll give a tri __eou__ | \n",
380 | " by the way , the url you post for davf be from dapper ... that 's 5.xx iirc __eou__ | \n",
381 | " http : //ubuntuforums.org/showthread.php ? t=1549847 __eou__ | \n",
382 | " so i load up putti gui , then what do i do ? __eou__ | \n",
383 | " you should read error messag , it say be you root ? ' __eou__ | \n",
384 | " wait the colleg semest to close just to make sure i will not need to reconfigur my environ again __eou__ | \n",
385 | " i be call myself a jerk . all i know be that you download a game success . __eou__ | \n",
386 | "
\n",
387 | " \n",
388 | "
\n",
389 | "
"
390 | ],
391 | "text/plain": [
392 | " Context \\\n",
393 | "0 anyon know whi my stock oneir export env var usernam ' ? i mean what be that use for ? i know of $ user but not $ usernam . my precis instal doe n't export usernam __eou__ __eot__ look like it use to be export by lightdm , but the line have the comment `` // fixm : be this requir ? '' so i guess it be n't surpris it be go __eou__ __eot__ thank ! how the heck do you figur that out ? __eou__ __eot__ https : //bugs.launchpad.net/lightdm/+bug/864109/comments/3 __eou__ __eot__ \n",
394 | "1 i set up my hd such that i have to type a passphras to access it at boot . how can i remov that passwrd , and just boot up normal . i do this at instal , it work fine , just tire of have reboot where i need to be at termin to type passwd in . help ? __eou__ __eot__ backup your data , and re-instal without encrypt `` might '' be the easiest method __eou__ __eot__ \n",
395 | "2 im tri to use ubuntu on my macbook pro retina __eou__ i read in the forum that ubuntu have a appl version now ? __eou__ __eot__ not that ive ever hear of.. normal ubutnu should work on an intel base mac . there be the ppc version also . __eou__ you want total control ? or what be you want exact ? __eou__ __eot__ \n",
396 | "3 no suggest ? __eou__ link ? __eou__ how can i remov luk passphras at boot . i dont want to use featur anymor ... __eou__ __eot__ you may need to creat a new volum __eou__ __eot__ that lead me to the next question lol ... i dont know how to creat new volum exact in cmdline , usual i use a gui . im just tri to access this server via usb load with next os im go to load , the luk pw be stop me __eou__ __eot__ for someth like that i would like use someth like a live gpart disk to avoid the confli... \n",
397 | "4 i just ad a second usb printer but not sure what the uri should read - can anyon help with usb printer ? __eou__ __eot__ firefox localhost:631 __eou__ __eot__ firefox ? __eou__ __eot__ yes __eou__ firefox localhost:631 __eou__ firefox http : //localhost:631 __eou__ cup have a web base interfac __eou__ __eot__ \n",
398 | "\n",
399 | " Ground Truth Utterance \\\n",
400 | "0 nice thank ! __eou__ \n",
401 | "1 so you dont know , ok , anyon els ? __eou__ you be like , yah my mous doesnt work , reinstal your os lolol what a joke __eou__ \n",
402 | "2 just wonder how it run __eou__ \n",
403 | "3 you cant load anyth via usb or cd when luk be run __eou__ it wont allow usb boot , i tri with 2 diff usb drive __eou__ \n",
404 | "4 i be set it up under the printer configur __eou__ thank ! __eou__ \n",
405 | "\n",
406 | " Distractor_0 \\\n",
407 | "0 wrong channel for it , but check efnet.org , unoffici page . __eou__ \n",
408 | "1 nmap be nice , but it be n't what i be look for . i final find it again : mtr ( my tracerout ) be what i be look for . i ll be keep nmap handi though . __eou__ \n",
409 | "2 yes , that 's what i do , export it to a `` id_dsa '' file , then back to ubuntu copi it into ~/.ssh/ __eou__ \n",
410 | "3 -p sorri ... __eou__ nmap -p22 __eou__ it doe n't say : 22/tcp open ssh ? __eou__ \n",
411 | "4 i 'd say the most common venu would be via launchpad . check out the factoid ! bug as well __eou__ \n",
412 | "\n",
413 | " Distractor_1 \\\n",
414 | "0 everi time the kernel chang , you will lose video __eou__ yep __eou__ \n",
415 | "1 ok __eou__ \n",
416 | "2 noth - i be talk about the question of myhero __eou__ \n",
417 | "3 i guess so i ca n't even launch it . __eou__ \n",
418 | "4 the old hardi man page , http : //manpages.ubuntu.com/manpages/hardy/man1/gcalctool.1.html say `` delet '' clear the screen , but it doe n't __eou__ becaus lts be good __eou__ \n",
419 | "\n",
420 | " Distractor_2 \\\n",
421 | "0 ok __eou__ \n",
422 | "1 cdrom work fine on window . __eou__ i dont think it have anyth to do with the bure process , cds work fine on my desktop and my other ubuntu lap __eou__ \n",
423 | "2 that should fix the font be too larg __eou__ \n",
424 | "3 note __eou__ \n",
425 | "4 i ll give a tri __eou__ \n",
426 | "\n",
427 | " Distractor_3 \\\n",
428 | "0 ! nomodeset > acer __eou__ i 'm assum it be a driver issu . __eou__ ! pm > acer __eou__ i do n't pm . ; ) __eou__ oop sorri for the cap __eou__ \n",
429 | "1 ah yes , i have read return as rerun __eou__ \n",
430 | "2 okay , so hcitool echo back hci0 < mac address of control > but the bluetooth devic panel keep disconnect and reconnect the devic ( or so it seem ) ani idea whi that would be ? __eou__ \n",
431 | "3 rxvt-unicod be one __eou__ \n",
432 | "4 by the way , the url you post for davf be from dapper ... that 's 5.xx iirc __eou__ \n",
433 | "\n",
434 | " Distractor_4 \\\n",
435 | "0 http : //www.ubuntu.com/project/about-ubuntu/deriv ( some call them deriv , other call them flavor , same differ ) __eou__ \n",
436 | "1 hm ? __eou__ \n",
437 | "2 i get to the menu with option such as tri ubuntu ' , instal ubuntu ' , check disc ' __eou__ \n",
438 | "3 i tar all of ~ __eou__ \n",
439 | "4 http : //ubuntuforums.org/showthread.php ? t=1549847 __eou__ \n",
440 | "\n",
441 | " Distractor_5 \\\n",
442 | "0 thx __eou__ unfortun the program be n't instal from the repositori __eou__ \n",
443 | "1 not the case , lts be everi other .04 releas . the .04 be n't alway more stabl __eou__ i would reinstal with precis __eou__ you can restor user data and such from backup __eou__ \n",
444 | "2 whi do u need analyz __eou__ it be a toy __eou__ ok msp301 __eou__ but y , i mean it be the same ubunut , onli with older program __eou__ ubuntu 804 or 1204 __eou__ no i dont use 804 __eou__ i be ask hypo qs __eou__ \n",
445 | "3 i tar all of ~ __eou__ \n",
446 | "4 so i load up putti gui , then what do i do ? __eou__ \n",
447 | "\n",
448 | " Distractor_6 \\\n",
449 | "0 how can i check ? by do a recoveri for test ? __eou__ \n",
450 | "1 pretti much __eou__ \n",
451 | "2 cntrl-c may stop the command but it doe n't fix my hdd problem . __eou__ \n",
452 | "3 i do n't realli know if i can help , but i be curious . lol __eou__ that 's cool . i ll look into it . now , we better stop talk about this sinc it 's offtop . : p __eou__ \n",
453 | "4 you should read error messag , it say be you root ? ' __eou__ \n",
454 | "\n",
455 | " Distractor_7 \\\n",
456 | "0 my humbl apolog __eou__ \n",
457 | "1 i use the one i download from amd __eou__ \n",
458 | "2 if you re onli go to run ubuntu , just get a normal pc rather than a mac __eou__ that say , i 'm run it on a macbook , becaus i get one relat cheapli __eou__ \n",
459 | "3 that work just fine , thank ! __eou__ \n",
460 | "4 wait the colleg semest to close just to make sure i will not need to reconfigur my environ again __eou__ \n",
461 | "\n",
462 | " Distractor_8 \n",
463 | "0 # ubuntu-offtop __eou__ \n",
464 | "1 ffmpeg be part of the packag , quixotedon , at least i 'm quit sure it still be __eou__ if not just instal ffmpeg __eou__ \n",
465 | "2 the one which be not pick up at the moment be on stderr and not stdout and > be onli cover stdout __eou__ \n",
466 | "3 thank you __eou__ \n",
467 | "4 i be call myself a jerk . all i know be that you download a game success . __eou__ "
468 | ]
469 | },
470 | "execution_count": 14,
471 | "metadata": {},
472 | "output_type": "execute_result"
473 | }
474 | ],
475 | "source": [
476 | "pd.options.display.max_colwidth = 500\n",
477 | "test_df.head()"
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": 15,
483 | "metadata": {
484 | "collapsed": false
485 | },
486 | "outputs": [
487 | {
488 | "data": {
489 | "text/html": [
490 | "\n",
491 | "
\n",
492 | " \n",
493 | " \n",
494 | " | \n",
495 | " Context | \n",
496 | " Ground Truth Utterance | \n",
497 | " Distractor_0 | \n",
498 | " Distractor_1 | \n",
499 | " Distractor_2 | \n",
500 | " Distractor_3 | \n",
501 | " Distractor_4 | \n",
502 | " Distractor_5 | \n",
503 | " Distractor_6 | \n",
504 | " Distractor_7 | \n",
505 | " Distractor_8 | \n",
506 | "
\n",
507 | " \n",
508 | " \n",
509 | " \n",
510 | " count | \n",
511 | " 18920 | \n",
512 | " 18920 | \n",
513 | " 18920 | \n",
514 | " 18920 | \n",
515 | " 18920 | \n",
516 | " 18920 | \n",
517 | " 18920 | \n",
518 | " 18920 | \n",
519 | " 18920 | \n",
520 | " 18920 | \n",
521 | " 18920 | \n",
522 | "
\n",
523 | " \n",
524 | " unique | \n",
525 | " 18920 | \n",
526 | " 17914 | \n",
527 | " 13982 | \n",
528 | " 13902 | \n",
529 | " 14077 | \n",
530 | " 14041 | \n",
531 | " 14101 | \n",
532 | " 14072 | \n",
533 | " 13969 | \n",
534 | " 13975 | \n",
535 | " 14123 | \n",
536 | "
\n",
537 | " \n",
538 | " top | \n",
539 | " hi , when be the new gstreamersdk will be upload to ubuntu repositori ? __eou__ __eot__ ubuntu version most doe not allow the `` new '' softwar in various releas , version be `` freeze '' apart from select applic and import secur fix __eou__ __eot__ from what i understand , the gstreamersdk be go to be the onli possibl way to develop an applic use gstreamer lib . __eou__ __eot__ | \n",
540 | " thank __eou__ | \n",
541 | " thank __eou__ | \n",
542 | " thank __eou__ | \n",
543 | " thank __eou__ | \n",
544 | " thank __eou__ | \n",
545 | " thank __eou__ | \n",
546 | " thank __eou__ | \n",
547 | " thank __eou__ | \n",
548 | " thank __eou__ | \n",
549 | " thank __eou__ | \n",
550 | "
\n",
551 | " \n",
552 | " freq | \n",
553 | " 1 | \n",
554 | " 186 | \n",
555 | " 176 | \n",
556 | " 186 | \n",
557 | " 194 | \n",
558 | " 195 | \n",
559 | " 167 | \n",
560 | " 197 | \n",
561 | " 190 | \n",
562 | " 188 | \n",
563 | " 201 | \n",
564 | "
\n",
565 | " \n",
566 | "
\n",
567 | "
"
568 | ],
569 | "text/plain": [
570 | " Context \\\n",
571 | "count 18920 \n",
572 | "unique 18920 \n",
573 | "top hi , when be the new gstreamersdk will be upload to ubuntu repositori ? __eou__ __eot__ ubuntu version most doe not allow the `` new '' softwar in various releas , version be `` freeze '' apart from select applic and import secur fix __eou__ __eot__ from what i understand , the gstreamersdk be go to be the onli possibl way to develop an applic use gstreamer lib . __eou__ __eot__ \n",
574 | "freq 1 \n",
575 | "\n",
576 | " Ground Truth Utterance Distractor_0 Distractor_1 Distractor_2 \\\n",
577 | "count 18920 18920 18920 18920 \n",
578 | "unique 17914 13982 13902 14077 \n",
579 | "top thank __eou__ thank __eou__ thank __eou__ thank __eou__ \n",
580 | "freq 186 176 186 194 \n",
581 | "\n",
582 | " Distractor_3 Distractor_4 Distractor_5 Distractor_6 \\\n",
583 | "count 18920 18920 18920 18920 \n",
584 | "unique 14041 14101 14072 13969 \n",
585 | "top thank __eou__ thank __eou__ thank __eou__ thank __eou__ \n",
586 | "freq 195 167 197 190 \n",
587 | "\n",
588 | " Distractor_7 Distractor_8 \n",
589 | "count 18920 18920 \n",
590 | "unique 13975 14123 \n",
591 | "top thank __eou__ thank __eou__ \n",
592 | "freq 188 201 "
593 | ]
594 | },
595 | "execution_count": 15,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "test_df.describe()"
602 | ]
603 | }
604 | ],
605 | "metadata": {
606 | "kernelspec": {
607 | "display_name": "Python 3",
608 | "language": "python",
609 | "name": "python3"
610 | },
611 | "language_info": {
612 | "codemirror_mode": {
613 | "name": "ipython",
614 | "version": 3
615 | },
616 | "file_extension": ".py",
617 | "mimetype": "text/x-python",
618 | "name": "python",
619 | "nbconvert_exporter": "python",
620 | "pygments_lexer": "ipython3",
621 | "version": "3.5.0"
622 | }
623 | },
624 | "nbformat": 4,
625 | "nbformat_minor": 0
626 | }
627 |
--------------------------------------------------------------------------------
/notebooks/TFIDF Baseline Evaluation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import numpy as np\n",
13 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
14 | "from sklearn.feature_extraction.text import TfidfTransformer"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 18,
20 | "metadata": {
21 | "collapsed": false
22 | },
23 | "outputs": [],
24 | "source": [
25 | "# Load Data\n",
26 | "train_df = pd.read_csv(\"../data/train.csv\")\n",
27 | "test_df = pd.read_csv(\"../data/test.csv\")\n",
28 | "validation_df = pd.read_csv(\"../data/valid.csv\")\n",
29 | "y_test = np.zeros(len(test_df))"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 9,
35 | "metadata": {
36 | "collapsed": false
37 | },
38 | "outputs": [],
39 | "source": [
40 | "def evaluate_recall(y, y_test, k=1):\n",
41 | " num_examples = float(len(y))\n",
42 | " num_correct = 0\n",
43 | " for predictions, label in zip(y, y_test):\n",
44 | " if label in predictions[:k]:\n",
45 | " num_correct += 1\n",
46 | " return num_correct/num_examples"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 10,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "def predict_random(context, utterances):\n",
58 | " return np.random.choice(len(utterances), 10, replace=False)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 19,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "Recall @ (1, 10): 0.103541\n",
73 | "Recall @ (2, 10): 0.205391\n",
74 | "Recall @ (5, 10): 0.503805\n",
75 | "Recall @ (10, 10): 1\n"
76 | ]
77 | }
78 | ],
79 | "source": [
80 | "# Evaluate Random predictor\n",
81 | "y_random = [predict_random(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(len(test_df))]\n",
82 | "for n in [1, 2, 5, 10]:\n",
83 | " print(\"Recall @ ({}, 10): {:g}\".format(n, evaluate_recall(y_random, y_test, n)))"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 20,
89 | "metadata": {
90 | "collapsed": false
91 | },
92 | "outputs": [],
93 | "source": [
94 | "class TFIDFPredictor:\n",
95 | " def __init__(self):\n",
96 | " self.vectorizer = TfidfVectorizer()\n",
97 | "\n",
98 | " def train(self, data):\n",
99 | " self.vectorizer.fit(np.append(data.Context.values,data.Utterance.values))\n",
100 | "\n",
101 | " def predict(self, context, utterances):\n",
102 | " # Convert context and utterances into tfidf vector\n",
103 | " vector_context = self.vectorizer.transform([context])\n",
104 | " vector_doc = self.vectorizer.transform(utterances)\n",
105 | " # The dot product measures the similarity of the resulting vectors\n",
106 | " result = np.dot(vector_doc, vector_context.T).todense()\n",
107 | " result = np.asarray(result).flatten()\n",
108 | " # Sort by top results and return the indices in descending order\n",
109 | " return np.argsort(result, axis=0)[::-1]"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 21,
115 | "metadata": {
116 | "collapsed": false
117 | },
118 | "outputs": [
119 | {
120 | "name": "stdout",
121 | "output_type": "stream",
122 | "text": [
123 | "Recall @ (1, 10): 0.495032\n",
124 | "Recall @ (2, 10): 0.596882\n",
125 | "Recall @ (5, 10): 0.766121\n",
126 | "Recall @ (10, 10): 1\n"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "# Evaluate TFIDF predictor\n",
132 | "pred = TFIDFPredictor()\n",
133 | "pred.train(train_df)\n",
134 | "y = [pred.predict(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(len(test_df))]\n",
135 | "for n in [1, 2, 5, 10]:\n",
136 | " print(\"Recall @ ({}, 10): {:g}\".format(n, evaluate_recall(y, y_test, n)))"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {
143 | "collapsed": true
144 | },
145 | "outputs": [],
146 | "source": []
147 | }
148 | ],
149 | "metadata": {
150 | "kernelspec": {
151 | "display_name": "Python 3",
152 | "language": "python",
153 | "name": "python3"
154 | },
155 | "language_info": {
156 | "codemirror_mode": {
157 | "name": "ipython",
158 | "version": 3
159 | },
160 | "file_extension": ".py",
161 | "mimetype": "text/x-python",
162 | "name": "python",
163 | "nbconvert_exporter": "python",
164 | "pygments_lexer": "ipython3",
165 | "version": "3.5.1"
166 | }
167 | },
168 | "nbformat": 4,
169 | "nbformat_minor": 0
170 | }
171 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.0
2 | backports.shutil-get-terminal-size==1.0.0
3 | cycler==0.10.0
4 | decorator==4.0.10
5 | entrypoints==0.2.2
6 | gnureadline==6.3.3
7 | ipykernel==4.3.1
8 | ipython==4.2.0
9 | ipython-genutils==0.1.0
10 | ipywidgets==5.1.5
11 | Jinja2==2.8
12 | jsonschema==2.5.1
13 | jupyter==1.0.0
14 | jupyter-client==4.3.0
15 | jupyter-console==4.1.1
16 | jupyter-core==4.1.0
17 | MarkupSafe==0.23
18 | matplotlib==1.5.1
19 | mistune==0.7.2
20 | nbconvert==4.2.0
21 | nbformat==4.0.1
22 | notebook==4.2.1
23 | numpy==1.11.1
24 | pandas==0.18.1
25 | pexpect==4.1.0
26 | pickleshare==0.7.2
27 | protobuf==3.0.0b2
28 | ptyprocess==0.5.1
29 | Pygments==2.1.3
30 | pyparsing==2.1.5
31 | python-dateutil==2.5.3
32 | pytz==2016.4
33 | pyzmq==15.2.0
34 | qtconsole==4.2.1
35 | scikit-learn==0.17.1
36 | scipy==0.17.1
37 | simplegeneric==0.8.1
38 | six==1.10.0
39 | tensorflow==0.9.0
40 | terminado==0.6
41 | tornado==4.3
42 | traitlets==4.2.1
43 | widgetsnbextension==1.2.3
44 |
--------------------------------------------------------------------------------
/scripts/prepare_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import itertools
4 | import functools
5 | import tensorflow as tf
6 | import numpy as np
7 | import array
8 |
9 | tf.flags.DEFINE_integer(
10 | "min_word_frequency", 5, "Minimum frequency of words in the vocabulary")
11 |
12 | tf.flags.DEFINE_integer("max_sentence_len", 160, "Maximum Sentence Length")
13 |
14 | tf.flags.DEFINE_string(
15 | "input_dir", os.path.abspath("./data"),
16 | "Input directory containing original CSV data files (default = './data')")
17 |
18 | tf.flags.DEFINE_string(
19 | "output_dir", os.path.abspath("./data"),
20 | "Output directory for TFrEcord files (default = './data')")
21 |
22 | FLAGS = tf.flags.FLAGS
23 |
24 | TRAIN_PATH = os.path.join(FLAGS.input_dir, "train.csv")
25 | VALIDATION_PATH = os.path.join(FLAGS.input_dir, "valid.csv")
26 | TEST_PATH = os.path.join(FLAGS.input_dir, "test.csv")
27 |
28 | def tokenizer_fn(iterator):
29 | return (x.split(" ") for x in iterator)
30 |
31 | def create_csv_iter(filename):
32 | """
33 | Returns an iterator over a CSV file. Skips the header.
34 | """
35 | with open(filename) as csvfile:
36 | reader = csv.reader(csvfile)
37 | # Skip the header
38 | next(reader)
39 | for row in reader:
40 | yield row
41 |
42 |
43 | def create_vocab(input_iter, min_frequency):
44 | """
45 | Creates and returns a VocabularyProcessor object with the vocabulary
46 | for the input iterator.
47 | """
48 | vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
49 | FLAGS.max_sentence_len,
50 | min_frequency=min_frequency,
51 | tokenizer_fn=tokenizer_fn)
52 | vocab_processor.fit(input_iter)
53 | return vocab_processor
54 |
55 |
56 | def transform_sentence(sequence, vocab_processor):
57 | """
58 | Maps a single sentence into the integer vocabulary. Returns a python array.
59 | """
60 | return next(vocab_processor.transform([sequence])).tolist()
61 |
62 |
63 | def create_text_sequence_feature(fl, sentence, sentence_len, vocab):
64 | """
65 | Writes a sentence to FeatureList protocol buffer
66 | """
67 | sentence_transformed = transform_sentence(sentence, vocab)
68 | for word_id in sentence_transformed:
69 | fl.feature.add().int64_list.value.extend([word_id])
70 | return fl
71 |
72 |
73 | def create_example_train(row, vocab):
74 | """
75 | Creates a training example for the Ubuntu Dialog Corpus dataset.
76 | Returnsthe a tensorflow.Example Protocol Buffer object.
77 | """
78 | context, utterance, label = row
79 | context_transformed = transform_sentence(context, vocab)
80 | utterance_transformed = transform_sentence(utterance, vocab)
81 | context_len = len(next(vocab._tokenizer([context])))
82 | utterance_len = len(next(vocab._tokenizer([utterance])))
83 | label = int(float(label))
84 |
85 | # New Example
86 | example = tf.train.Example()
87 | example.features.feature["context"].int64_list.value.extend(context_transformed)
88 | example.features.feature["utterance"].int64_list.value.extend(utterance_transformed)
89 | example.features.feature["context_len"].int64_list.value.extend([context_len])
90 | example.features.feature["utterance_len"].int64_list.value.extend([utterance_len])
91 | example.features.feature["label"].int64_list.value.extend([label])
92 | return example
93 |
94 |
95 | def create_example_test(row, vocab):
96 | """
97 | Creates a test/validation example for the Ubuntu Dialog Corpus dataset.
98 | Returnsthe a tensorflow.Example Protocol Buffer object.
99 | """
100 | context, utterance = row[:2]
101 | distractors = row[2:]
102 | context_len = len(next(vocab._tokenizer([context])))
103 | utterance_len = len(next(vocab._tokenizer([utterance])))
104 | context_transformed = transform_sentence(context, vocab)
105 | utterance_transformed = transform_sentence(utterance, vocab)
106 |
107 | # New Example
108 | example = tf.train.Example()
109 | example.features.feature["context"].int64_list.value.extend(context_transformed)
110 | example.features.feature["utterance"].int64_list.value.extend(utterance_transformed)
111 | example.features.feature["context_len"].int64_list.value.extend([context_len])
112 | example.features.feature["utterance_len"].int64_list.value.extend([utterance_len])
113 |
114 | # Distractor sequences
115 | for i, distractor in enumerate(distractors):
116 | dis_key = "distractor_{}".format(i)
117 | dis_len_key = "distractor_{}_len".format(i)
118 | # Distractor Length Feature
119 | dis_len = len(next(vocab._tokenizer([distractor])))
120 | example.features.feature[dis_len_key].int64_list.value.extend([dis_len])
121 | # Distractor Text Feature
122 | dis_transformed = transform_sentence(distractor, vocab)
123 | example.features.feature[dis_key].int64_list.value.extend(dis_transformed)
124 | return example
125 |
126 |
127 | def create_tfrecords_file(input_filename, output_filename, example_fn):
128 | """
129 | Creates a TFRecords file for the given input data and
130 | example transofmration function
131 | """
132 | writer = tf.python_io.TFRecordWriter(output_filename)
133 | print("Creating TFRecords file at {}...".format(output_filename))
134 | for i, row in enumerate(create_csv_iter(input_filename)):
135 | x = example_fn(row)
136 | writer.write(x.SerializeToString())
137 | writer.close()
138 | print("Wrote to {}".format(output_filename))
139 |
140 |
141 | def write_vocabulary(vocab_processor, outfile):
142 | """
143 | Writes the vocabulary to a file, one word per line.
144 | """
145 | vocab_size = len(vocab_processor.vocabulary_)
146 | with open(outfile, "w") as vocabfile:
147 | for id in range(vocab_size):
148 | word = vocab_processor.vocabulary_._reverse_mapping[id]
149 | vocabfile.write(word + "\n")
150 | print("Saved vocabulary to {}".format(outfile))
151 |
152 |
153 | if __name__ == "__main__":
154 | print("Creating vocabulary...")
155 | input_iter = create_csv_iter(TRAIN_PATH)
156 | input_iter = (x[0] + " " + x[1] for x in input_iter)
157 | vocab = create_vocab(input_iter, min_frequency=FLAGS.min_word_frequency)
158 | print("Total vocabulary size: {}".format(len(vocab.vocabulary_)))
159 |
160 | # Create vocabulary.txt file
161 | write_vocabulary(
162 | vocab, os.path.join(FLAGS.output_dir, "vocabulary.txt"))
163 |
164 | # Save vocab processor
165 | vocab.save(os.path.join(FLAGS.output_dir, "vocab_processor.bin"))
166 |
167 | # Create validation.tfrecords
168 | create_tfrecords_file(
169 | input_filename=VALIDATION_PATH,
170 | output_filename=os.path.join(FLAGS.output_dir, "validation.tfrecords"),
171 | example_fn=functools.partial(create_example_test, vocab=vocab))
172 |
173 | # Create test.tfrecords
174 | create_tfrecords_file(
175 | input_filename=TEST_PATH,
176 | output_filename=os.path.join(FLAGS.output_dir, "test.tfrecords"),
177 | example_fn=functools.partial(create_example_test, vocab=vocab))
178 |
179 | # Create train.tfrecords
180 | create_tfrecords_file(
181 | input_filename=TRAIN_PATH,
182 | output_filename=os.path.join(FLAGS.output_dir, "train.tfrecords"),
183 | example_fn=functools.partial(create_example_train, vocab=vocab))
184 |
--------------------------------------------------------------------------------
/udc_hparams.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from collections import namedtuple
3 |
4 | # Model Parameters
5 | tf.flags.DEFINE_integer(
6 | "vocab_size",
7 | 91620,
8 | "The size of the vocabulary. Only change this if you changed the preprocessing")
9 |
10 | # Model Parameters
11 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of the embeddings")
12 | tf.flags.DEFINE_integer("rnn_dim", 256, "Dimensionality of the RNN cell")
13 | tf.flags.DEFINE_integer("max_context_len", 160, "Truncate contexts to this length")
14 | tf.flags.DEFINE_integer("max_utterance_len", 80, "Truncate utterance to this length")
15 |
16 | # Pre-trained embeddings
17 | tf.flags.DEFINE_string("glove_path", None, "Path to pre-trained Glove vectors")
18 | tf.flags.DEFINE_string("vocab_path", None, "Path to vocabulary.txt file")
19 |
20 | # Training Parameters
21 | tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate")
22 | tf.flags.DEFINE_integer("batch_size", 128, "Batch size during training")
23 | tf.flags.DEFINE_integer("eval_batch_size", 16, "Batch size during evaluation")
24 | tf.flags.DEFINE_string("optimizer", "Adam", "Optimizer Name (Adam, Adagrad, etc)")
25 |
26 | FLAGS = tf.flags.FLAGS
27 |
28 | HParams = namedtuple(
29 | "HParams",
30 | [
31 | "batch_size",
32 | "embedding_dim",
33 | "eval_batch_size",
34 | "learning_rate",
35 | "max_context_len",
36 | "max_utterance_len",
37 | "optimizer",
38 | "rnn_dim",
39 | "vocab_size",
40 | "glove_path",
41 | "vocab_path"
42 | ])
43 |
44 | def create_hparams():
45 | return HParams(
46 | batch_size=FLAGS.batch_size,
47 | eval_batch_size=FLAGS.eval_batch_size,
48 | vocab_size=FLAGS.vocab_size,
49 | optimizer=FLAGS.optimizer,
50 | learning_rate=FLAGS.learning_rate,
51 | embedding_dim=FLAGS.embedding_dim,
52 | max_context_len=FLAGS.max_context_len,
53 | max_utterance_len=FLAGS.max_utterance_len,
54 | glove_path=FLAGS.glove_path,
55 | vocab_path=FLAGS.vocab_path,
56 | rnn_dim=FLAGS.rnn_dim)
--------------------------------------------------------------------------------
/udc_inputs.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | TEXT_FEATURE_SIZE = 160
4 |
5 | def get_feature_columns(mode):
6 | feature_columns = []
7 |
8 | feature_columns.append(tf.contrib.layers.real_valued_column(
9 | column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
10 | feature_columns.append(tf.contrib.layers.real_valued_column(
11 | column_name="context_len", dimension=1, dtype=tf.int64))
12 | feature_columns.append(tf.contrib.layers.real_valued_column(
13 | column_name="utterance", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
14 | feature_columns.append(tf.contrib.layers.real_valued_column(
15 | column_name="utterance_len", dimension=1, dtype=tf.int64))
16 |
17 | if mode == tf.contrib.learn.ModeKeys.TRAIN:
18 | # During training we have a label feature
19 | feature_columns.append(tf.contrib.layers.real_valued_column(
20 | column_name="label", dimension=1, dtype=tf.int64))
21 |
22 | if mode == tf.contrib.learn.ModeKeys.EVAL:
23 | # During evaluation we have distractors
24 | for i in range(9):
25 | feature_columns.append(tf.contrib.layers.real_valued_column(
26 | column_name="distractor_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
27 | feature_columns.append(tf.contrib.layers.real_valued_column(
28 | column_name="distractor_{}_len".format(i), dimension=1, dtype=tf.int64))
29 |
30 | return set(feature_columns)
31 |
32 |
33 | def create_input_fn(mode, input_files, batch_size, num_epochs):
34 | def input_fn():
35 | features = tf.contrib.layers.create_feature_spec_for_parsing(
36 | get_feature_columns(mode))
37 |
38 | feature_map = tf.contrib.learn.io.read_batch_features(
39 | file_pattern=input_files,
40 | batch_size=batch_size,
41 | features=features,
42 | reader=tf.TFRecordReader,
43 | randomize_input=True,
44 | num_epochs=num_epochs,
45 | queue_capacity=200000 + batch_size * 10,
46 | name="read_batch_features_{}".format(mode))
47 |
48 | # This is an ugly hack because of a current bug in tf.learn
49 | # During evaluation TF tries to restore the epoch variable which isn't defined during training
50 | # So we define the variable manually here
51 | if mode == tf.contrib.learn.ModeKeys.TRAIN:
52 | tf.get_variable(
53 | "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
54 | initializer=tf.constant(0, dtype=tf.int64))
55 |
56 | if mode == tf.contrib.learn.ModeKeys.TRAIN:
57 | target = feature_map.pop("label")
58 | else:
59 | # In evaluation we have 10 classes (utterances).
60 | # The first one (index 0) is always the correct one
61 | target = tf.zeros([batch_size, 1], dtype=tf.int64)
62 | return feature_map, target
63 | return input_fn
64 |
--------------------------------------------------------------------------------
/udc_metrics.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import functools
3 | from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
4 |
5 |
6 | def create_evaluation_metrics():
7 | eval_metrics = {}
8 | for k in [1, 2, 5, 10]:
9 | eval_metrics["recall_at_%d" % k] = MetricSpec(metric_fn=functools.partial(
10 | tf.contrib.metrics.streaming_sparse_recall_at_k,
11 | k=k))
12 | return eval_metrics
13 |
--------------------------------------------------------------------------------
/udc_model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import sys
3 |
4 | def get_id_feature(features, key, len_key, max_len):
5 | ids = features[key]
6 | ids_len = tf.squeeze(features[len_key], [1])
7 | ids_len = tf.minimum(ids_len, tf.constant(max_len, dtype=tf.int64))
8 | return ids, ids_len
9 |
10 | def create_train_op(loss, hparams):
11 | train_op = tf.contrib.layers.optimize_loss(
12 | loss=loss,
13 | global_step=tf.contrib.framework.get_global_step(),
14 | learning_rate=hparams.learning_rate,
15 | clip_gradients=10.0,
16 | optimizer=hparams.optimizer)
17 | return train_op
18 |
19 |
20 | def create_model_fn(hparams, model_impl):
21 |
22 | def model_fn(features, targets, mode):
23 | context, context_len = get_id_feature(
24 | features, "context", "context_len", hparams.max_context_len)
25 |
26 | utterance, utterance_len = get_id_feature(
27 | features, "utterance", "utterance_len", hparams.max_utterance_len)
28 |
29 | batch_size = targets.get_shape().as_list()[0]
30 |
31 | if mode == tf.contrib.learn.ModeKeys.TRAIN:
32 | probs, loss = model_impl(
33 | hparams,
34 | mode,
35 | context,
36 | context_len,
37 | utterance,
38 | utterance_len,
39 | targets)
40 | train_op = create_train_op(loss, hparams)
41 | return probs, loss, train_op
42 |
43 | if mode == tf.contrib.learn.ModeKeys.INFER:
44 | probs, loss = model_impl(
45 | hparams,
46 | mode,
47 | context,
48 | context_len,
49 | utterance,
50 | utterance_len,
51 | None)
52 | return probs, 0.0, None
53 |
54 | if mode == tf.contrib.learn.ModeKeys.EVAL:
55 |
56 | # We have 10 exampels per record, so we accumulate them
57 | all_contexts = [context]
58 | all_context_lens = [context_len]
59 | all_utterances = [utterance]
60 | all_utterance_lens = [utterance_len]
61 | all_targets = [tf.ones([batch_size, 1], dtype=tf.int64)]
62 |
63 | for i in range(9):
64 | distractor, distractor_len = get_id_feature(features,
65 | "distractor_{}".format(i),
66 | "distractor_{}_len".format(i),
67 | hparams.max_utterance_len)
68 | all_contexts.append(context)
69 | all_context_lens.append(context_len)
70 | all_utterances.append(distractor)
71 | all_utterance_lens.append(distractor_len)
72 | all_targets.append(
73 | tf.zeros([batch_size, 1], dtype=tf.int64)
74 | )
75 |
76 | probs, loss = model_impl(
77 | hparams,
78 | mode,
79 | tf.concat(0, all_contexts),
80 | tf.concat(0, all_context_lens),
81 | tf.concat(0, all_utterances),
82 | tf.concat(0, all_utterance_lens),
83 | tf.concat(0, all_targets))
84 |
85 | split_probs = tf.split(0, 10, probs)
86 | shaped_probs = tf.concat(1, split_probs)
87 |
88 | # Add summaries
89 | tf.histogram_summary("eval_correct_probs_hist", split_probs[0])
90 | tf.scalar_summary("eval_correct_probs_average", tf.reduce_mean(split_probs[0]))
91 | tf.histogram_summary("eval_incorrect_probs_hist", split_probs[1])
92 | tf.scalar_summary("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1]))
93 |
94 | return shaped_probs, loss, None
95 |
96 | return model_fn
97 |
--------------------------------------------------------------------------------
/udc_predict.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import itertools
4 | import sys
5 | import numpy as np
6 | import tensorflow as tf
7 | import udc_model
8 | import udc_hparams
9 | import udc_metrics
10 | import udc_inputs
11 | from models.dual_encoder import dual_encoder_model
12 | from models.helpers import load_vocab
13 |
14 | tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from")
15 | tf.flags.DEFINE_string("vocab_processor_file", "./data/vocab_processor.bin", "Saved vocabulary processor file")
16 | FLAGS = tf.flags.FLAGS
17 |
18 | if not FLAGS.model_dir:
19 | print("You must specify a model directory")
20 | sys.exit(1)
21 |
22 | def tokenizer_fn(iterator):
23 | return (x.split(" ") for x in iterator)
24 |
25 | # Load vocabulary
26 | vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
27 | FLAGS.vocab_processor_file)
28 |
29 | # Load your own data here
30 | INPUT_CONTEXT = "Example context"
31 | POTENTIAL_RESPONSES = ["Response 1", "Response 2"]
32 |
33 | def get_features(context, utterance):
34 | context_matrix = np.array(list(vp.transform([context])))
35 | utterance_matrix = np.array(list(vp.transform([utterance])))
36 | context_len = len(context.split(" "))
37 | utterance_len = len(utterance.split(" "))
38 | features = {
39 | "context": tf.convert_to_tensor(context_matrix, dtype=tf.int64),
40 | "context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
41 | "utterance": tf.convert_to_tensor(utterance_matrix, dtype=tf.int64),
42 | "utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
43 | }
44 | return features, None
45 |
46 | if __name__ == "__main__":
47 | hparams = udc_hparams.create_hparams()
48 | model_fn = udc_model.create_model_fn(hparams, model_impl=dual_encoder_model)
49 | estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir)
50 |
51 | # Ugly hack, seems to be a bug in Tensorflow
52 | # estimator.predict doesn't work without this line
53 | estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
54 |
55 | print("Context: {}".format(INPUT_CONTEXT))
56 | for r in POTENTIAL_RESPONSES:
57 | prob = estimator.predict(input_fn=lambda: get_features(INPUT_CONTEXT, r))
58 | print("{}: {:g}".format(r, prob[0,0]))
--------------------------------------------------------------------------------
/udc_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import itertools
4 | import sys
5 | import tensorflow as tf
6 | import udc_model
7 | import udc_hparams
8 | import udc_metrics
9 | import udc_inputs
10 | from models.dual_encoder import dual_encoder_model
11 |
12 | tf.flags.DEFINE_string("test_file", "./data/test.tfrecords", "Path of test data in TFRecords format")
13 | tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from")
14 | tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level")
15 | tf.flags.DEFINE_integer("test_batch_size", 16, "Batch size for testing")
16 | FLAGS = tf.flags.FLAGS
17 |
18 | if not FLAGS.model_dir:
19 | print("You must specify a model directory")
20 | sys.exit(1)
21 |
22 | tf.logging.set_verbosity(FLAGS.loglevel)
23 |
24 | if __name__ == "__main__":
25 | hparams = udc_hparams.create_hparams()
26 | model_fn = udc_model.create_model_fn(hparams, model_impl=dual_encoder_model)
27 | estimator = tf.contrib.learn.Estimator(
28 | model_fn=model_fn,
29 | model_dir=FLAGS.model_dir,
30 | config=tf.contrib.learn.RunConfig())
31 |
32 | input_fn_test = udc_inputs.create_input_fn(
33 | mode=tf.contrib.learn.ModeKeys.EVAL,
34 | input_files=[FLAGS.test_file],
35 | batch_size=FLAGS.test_batch_size,
36 | num_epochs=1)
37 |
38 | eval_metrics = udc_metrics.create_evaluation_metrics()
39 | estimator.evaluate(input_fn=input_fn_test, steps=None, metrics=eval_metrics)
40 |
--------------------------------------------------------------------------------
/udc_train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import itertools
4 | import tensorflow as tf
5 | import udc_model
6 | import udc_hparams
7 | import udc_metrics
8 | import udc_inputs
9 | from models.dual_encoder import dual_encoder_model
10 |
11 | tf.flags.DEFINE_string("input_dir", "./data", "Directory containing input data files 'train.tfrecords' and 'validation.tfrecords'")
12 | tf.flags.DEFINE_string("model_dir", None, "Directory to store model checkpoints (defaults to ./runs)")
13 | tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level")
14 | tf.flags.DEFINE_integer("num_epochs", None, "Number of training Epochs. Defaults to indefinite.")
15 | tf.flags.DEFINE_integer("eval_every", 2000, "Evaluate after this many train steps")
16 | FLAGS = tf.flags.FLAGS
17 |
18 | TIMESTAMP = int(time.time())
19 |
20 | if FLAGS.model_dir:
21 | MODEL_DIR = FLAGS.model_dir
22 | else:
23 | MODEL_DIR = os.path.abspath(os.path.join("./runs", str(TIMESTAMP)))
24 |
25 | TRAIN_FILE = os.path.abspath(os.path.join(FLAGS.input_dir, "train.tfrecords"))
26 | VALIDATION_FILE = os.path.abspath(os.path.join(FLAGS.input_dir, "validation.tfrecords"))
27 |
28 | tf.logging.set_verbosity(FLAGS.loglevel)
29 |
30 | def main(unused_argv):
31 | hparams = udc_hparams.create_hparams()
32 |
33 | model_fn = udc_model.create_model_fn(
34 | hparams,
35 | model_impl=dual_encoder_model)
36 |
37 | estimator = tf.contrib.learn.Estimator(
38 | model_fn=model_fn,
39 | model_dir=MODEL_DIR,
40 | config=tf.contrib.learn.RunConfig())
41 |
42 | input_fn_train = udc_inputs.create_input_fn(
43 | mode=tf.contrib.learn.ModeKeys.TRAIN,
44 | input_files=[TRAIN_FILE],
45 | batch_size=hparams.batch_size,
46 | num_epochs=FLAGS.num_epochs)
47 |
48 | input_fn_eval = udc_inputs.create_input_fn(
49 | mode=tf.contrib.learn.ModeKeys.EVAL,
50 | input_files=[VALIDATION_FILE],
51 | batch_size=hparams.eval_batch_size,
52 | num_epochs=1)
53 |
54 | eval_metrics = udc_metrics.create_evaluation_metrics()
55 |
56 | eval_monitor = tf.contrib.learn.monitors.ValidationMonitor(
57 | input_fn=input_fn_eval,
58 | every_n_steps=FLAGS.eval_every,
59 | metrics=eval_metrics)
60 |
61 | estimator.fit(input_fn=input_fn_train, steps=None, monitors=[eval_monitor])
62 |
63 | if __name__ == "__main__":
64 | tf.app.run()
65 |
--------------------------------------------------------------------------------