├── sql
    └── GetIssues.sql
├── README.md
├── requirements.txt
├── LICENSE
└── notebooks
    ├── seq2seq_utils.py
    └── Tutorial.ipynb


/sql/GetIssues.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |   url as issue_url
 3 |   -- replace more than one white-space character in a row with a single space
 4 | , REGEXP_REPLACE(title, r"\s{2,}", ' ') as issue_title
 5 | , REGEXP_REPLACE(body, r"\s{2,}", ' ') as body
 6 | 
 7 | FROM(
 8 |     SELECT
 9 |         JSON_EXTRACT(payload, '$.issue.html_url') as url
10 |         -- extract the title and body removing parentheses, brackets, and quotes
11 |       , LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.title'), r"\\n|\(|\)|\[|\]|#|\*|`", ' '))) as title
12 |       , LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.body'), r"\\n|\(|\)|\[|\]|#|\*|`", ' '))) as body
13 |     FROM `githubarchive.day.2017*`
14 |     WHERE 
15 |       -- 70 random days in 2017 (because it costs money to query these tables!!)  
16 |           _TABLE_SUFFIX BETWEEN '0101' and '1231'
17 |       and type="IssuesEvent" 
18 |       -- Only want the issue at a specific point otherwise will have duplicates
19 |       and JSON_EXTRACT(payload, '$.action') = "\"opened\"" 
20 | ) as tbl
21 | 
22 | WHERE 
23 |   -- the body must be at least 8 words long and the title at least 3 words long
24 |   --  this is an arbitrary way to filter out empty or sparse issues
25 |       ARRAY_LENGTH(SPLIT(body, ' ')) >= 6
26 |   and ARRAY_LENGTH(SPLIT(title, ' ')) >= 3
27 |   -- filter out issues that have really long titles or bodies
28 |   --    (these are outliers, and will slow tokenization down).
29 |   and LENGTH(title) <= 400
30 |   and LENGTH(body) <= 2000
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![GitHub license](https://img.shields.io/github/license/hamelsmu/Seq2Seq_Tutorial.svg)](https://github.com/hamelsmu/Seq2Seq_Tutorial/blob/master/LICENSE)
 2 | 
 3 | ## Sequence-to-Sequence Tutorial with Github Issues Data
 4 | Code For Medium Article: ["How To Create Data Products That Are Magical Using Sequence-to-Sequence Models"](https://medium.com/@hamelhusain/how-to-create-data-products-that-are-magical-using-sequence-to-sequence-models-703f86a231f8)
 5 | 
 6 | ## Installation
 7 | 
 8 | `pip install -r requirements.txt`
 9 | 
10 | If you are using the AWS Deep Learning Ubuntu AMI, many of the required dependencies will already be installed,
11 | so you only need to run:
12 | 
13 | ```
14 | source activate tensorflow_p36
15 | pip install ktext annoy nltk pydot
16 | ```
17 | 
18 | See #4 below if you wish to run this tutorial using Docker.
19 | 
20 | 
21 | ## Resources:
22 | 
23 | 1. [Tutorial Notebook](https://nbviewer.jupyter.org/github/hamelsmu/Seq2Seq_Tutorial/blob/master/notebooks/Tutorial.ipynb):  The Jupyter notebook that coincides with the Medium post.
24 | 
25 | 2. [seq2seq_utils.py](./notebooks/seq2seq_utils.py):  convenience functions that are used in the tutorial notebook to make predictions.
26 | 
27 | 3. [ktext](https://github.com/hamelsmu/ktext): this library is used in the tutorial to clean data.  This library can be installed with `pip`.  
28 | 
29 | 4. [Nvidia Docker Container](https://hub.docker.com/r/hamelsmu/seq2seq_tutorial/): contains all libraries that are required to run the tutorial.  This container is built with Nvidia-Docker v1.0.  You can install Nvidia-Docker and run this container like so:
30 | 
31 | 
32 | ```
33 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey |   sudo apt-key add -
34 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
35 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list |   sudo tee /etc/apt/sources.list.d/nvidia-docker.list
36 | sudo apt-get update
37 | sudo apt-get install nvidia-docker
38 | 
39 | sudo nvidia-docker run hamelsmu/seq2seq_tutorial
40 | 
41 | ```
42 | 
43 | This should work with both Nvidia-Docker v1.0 and v2.0.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | alabaster==0.7.10
  2 | altair==1.2.1
  3 | anaconda-client==1.6.5
  4 | anaconda-navigator==1.6.8
  5 | anaconda-project==0.8.0
  6 | annoy==1.10.0
  7 | asn1crypto==0.22.0
  8 | astroid==1.5.3
  9 | astropy==2.0.2
 10 | Babel==2.5.0
 11 | backports.functools-lru-cache==1.4
 12 | backports.shutil-get-terminal-size==1.0.0
 13 | bcolz==1.1.2
 14 | beautifulsoup4==4.6.0
 15 | bitarray==0.8.1
 16 | bkcharts==0.2
 17 | blaze==0.11.3
 18 | bleach==1.5.0
 19 | bokeh==0.12.7
 20 | boto==2.48.0
 21 | boto3==1.5.14
 22 | botocore==1.8.28
 23 | Bottleneck==1.2.1
 24 | bz2file==0.98
 25 | cachetools==2.0.1
 26 | certifi==2017.11.5
 27 | cffi==1.10.0
 28 | chardet==3.0.4
 29 | click==6.7
 30 | cloudpickle==0.4.0
 31 | clyent==1.2.2
 32 | colorama==0.3.9
 33 | conda==4.4.6
 34 | conda-build==3.0.23
 35 | conda-verify==2.0.0
 36 | contextlib2==0.5.5
 37 | cryptography==2.0.3
 38 | cycler==0.10.0
 39 | cymem==1.31.2
 40 | Cython==0.26.1
 41 | cytoolz==0.9.0
 42 | dask==0.16.1
 43 | datashape==0.5.4
 44 | decorator==4.2.1
 45 | dill==0.2.7.1
 46 | distributed==1.20.2
 47 | docopt==0.6.2
 48 | docutils==0.14
 49 | en-core-web-sm==2.0.0
 50 | entrypoints==0.2.3
 51 | et-xmlfile==1.0.1
 52 | fastcache==1.0.2
 53 | fastparquet==0.1.3
 54 | filelock==2.0.12
 55 | Flask==0.12.2
 56 | Flask-Cors==3.0.3
 57 | ftfy==4.4.3
 58 | future==0.16.0
 59 | gensim==3.2.0
 60 | gevent==1.2.2
 61 | glob2==0.5
 62 | gmpy2==2.0.8
 63 | graphviz==0.8.1
 64 | greenlet==0.4.12
 65 | h5py==2.7.1
 66 | hdfs==2.1.0
 67 | heapdict==1.0.0
 68 | html5lib==1.0.1
 69 | idna==2.6
 70 | ijson==2.3
 71 | imageio==2.2.0
 72 | imagesize==0.7.1
 73 | ipykernel==4.6.1
 74 | ipython==6.2.1
 75 | ipython-genutils==0.2.0
 76 | ipywidgets==7.0.0
 77 | isort==4.2.15
 78 | isoweek==1.3.3
 79 | itsdangerous==0.24
 80 | jdcal==1.3
 81 | jedi==0.11.0
 82 | Jinja2==2.9.6
 83 | jmespath==0.9.3
 84 | jsonschema==2.6.0
 85 | jupyter-client==5.1.0
 86 | jupyter-console==5.2.0
 87 | jupyter-core==4.3.0
 88 | jupyterlab==0.27.0
 89 | jupyterlab-launcher==0.4.0
 90 | Keras==2.1.2
 91 | ktext==0.27
 92 | lazy-object-proxy==1.3.1
 93 | llvmlite==0.20.0
 94 | locket==0.2.0
 95 | lxml==3.8.0
 96 | Markdown==2.6.9
 97 | MarkupSafe==1.0
 98 | matplotlib==2.1.0
 99 | mccabe==0.6.1
100 | mistune==0.7.4
101 | more-itertools==4.0.1
102 | mpmath==0.19
103 | msgpack==0.5.1
104 | msgpack-numpy==0.4.2
105 | msgpack-python==0.5.1
106 | multipledispatch==0.4.9
107 | multiprocess==0.70.5
108 | murmurhash==0.28.0
109 | navigator-updater==0.1.0
110 | nbconvert==5.3.1
111 | nbformat==4.4.0
112 | networkx==2.0
113 | nltk==3.2.5
114 | nose==1.3.7
115 | notebook==5.0.0
116 | numba==0.35.0+10.g143f70e90
117 | numexpr==2.6.2
118 | numpy==1.14.0
119 | numpydoc==0.7.0
120 | odo==0.5.1
121 | olefile==0.44
122 | openpyxl==2.4.8
123 | packaging==16.8
124 | pandas==0.22.0
125 | pandas-summary==0.0.41
126 | pandocfilters==1.4.2
127 | parso==0.1.0
128 | partd==0.3.8
129 | path.py==10.3.1
130 | pathlib==1.0.1
131 | pathlib2==2.3.0
132 | pathos==0.2.1
133 | patsy==0.4.1
134 | pep8==1.7.0
135 | pexpect==4.3.0
136 | pickleshare==0.7.4
137 | Pillow==4.3.0
138 | pkginfo==1.4.1
139 | plac==0.9.6
140 | ply==3.10
141 | pox==0.2.3
142 | ppft==1.6.4.7.1
143 | preshed==1.0.0
144 | prompt-toolkit==1.0.15
145 | protobuf==3.5.0
146 | psutil==5.2.2
147 | ptyprocess==0.5.2
148 | py==1.4.34
149 | pyarrow==0.8.0
150 | pycodestyle==2.3.1
151 | pycosat==0.6.3
152 | pycparser==2.18
153 | pycrypto==2.6.1
154 | pycurl==7.43.0
155 | pydot==1.2.3
156 | pydot-ng==1.0.0
157 | pyemd==0.4.4
158 | pyflakes==1.5.0
159 | Pygments==2.2.0
160 | PyHive==0.5.0
161 | pylint==1.7.2
162 | pyodbc==4.0.17
163 | pyOpenSSL==17.2.0
164 | pyparsing==2.2.0
165 | Pyphen==0.9.4
166 | PySocks==1.6.7
167 | pytest==3.2.1
168 | python-dateutil==2.6.1
169 | python-Levenshtein==0.12.0
170 | pytz==2017.3
171 | PyWavelets==0.5.2
172 | PyYAML==3.12
173 | pyzmq==16.0.2
174 | QtAwesome==0.4.4
175 | qtconsole==4.3.1
176 | QtPy==1.3.1
177 | regex==2017.4.5
178 | requests==2.18.4
179 | rope==0.10.5
180 | ruamel-yaml==0.11.14
181 | s3transfer==0.1.12
182 | scikit-image==0.13.0
183 | scikit-learn==0.19.1
184 | scipy==1.0.0
185 | seaborn==0.8
186 | simplegeneric==0.8.1
187 | singledispatch==3.4.0.3
188 | six==1.11.0
189 | sklearn-pandas==1.6.0
190 | smart-open==1.5.6
191 | snowballstemmer==1.2.1
192 | sortedcollections==0.5.3
193 | sortedcontainers==1.5.7
194 | spacy==2.0.5
195 | Sphinx==1.6.3
196 | sphinxcontrib-websupport==1.0.1
197 | spyder==3.2.3
198 | SQLAlchemy==1.1.13
199 | statsmodels==0.8.0
200 | sympy==1.1.1
201 | tables==3.4.2
202 | tabulate==0.8.2
203 | tblib==1.3.2
204 | tensorflow-gpu==1.3.0
205 | tensorflow-tensorboard==0.1.8
206 | termcolor==1.1.0
207 | terminado==0.6
208 | testpath==0.3.1
209 | textacy==0.5.0
210 | thinc==6.10.2
211 | thrift==0.10.0
212 | toolz==0.9.0
213 | torch==0.2.0.post4
214 | torchtext==0.2.0
215 | torchvision==0.1.9
216 | tornado==4.5.2
217 | tqdm==4.19.5
218 | traitlets==4.3.2
219 | typing==3.6.2
220 | ujson==1.35
221 | unicodecsv==0.14.1
222 | Unidecode==1.0.22
223 | urllib3==1.22
224 | vega==0.4.4
225 | wcwidth==0.1.7
226 | webencodings==0.5.1
227 | Werkzeug==0.12.2
228 | widgetsnbextension==3.0.2
229 | wrapt==1.10.11
230 | xlrd==1.1.0
231 | XlsxWriter==0.9.8
232 | xlwt==1.3.0
233 | zict==0.1.3
234 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/notebooks/seq2seq_utils.py:
--------------------------------------------------------------------------------
  1 | from matplotlib import pyplot as plt
  2 | import tensorflow as tf
  3 | from keras import backend as K
  4 | from keras.layers import Input
  5 | from keras.models import Model
  6 | from IPython.display import SVG, display
  7 | from keras.utils.vis_utils import model_to_dot
  8 | import logging
  9 | import numpy as np
 10 | import dill as dpickle
 11 | from annoy import AnnoyIndex
 12 | from tqdm import tqdm, tqdm_notebook
 13 | from random import random
 14 | from nltk.translate.bleu_score import corpus_bleu
 15 | 
 16 | 
 17 | def load_text_processor(fname='title_pp.dpkl'):
 18 |     """
 19 |     Load preprocessors from disk.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     fname: str
 24 |         file name of ktext.proccessor object
 25 | 
 26 |     Returns
 27 |     -------
 28 |     num_tokens : int
 29 |         size of vocabulary loaded into ktext.processor
 30 |     pp : ktext.processor
 31 |         the processor you are trying to load
 32 | 
 33 |     Typical Usage:
 34 |     -------------
 35 | 
 36 |     num_decoder_tokens, title_pp = load_text_processor(fname='title_pp.dpkl')
 37 |     num_encoder_tokens, body_pp = load_text_processor(fname='body_pp.dpkl')
 38 | 
 39 |     """
 40 |     # Load files from disk
 41 |     with open(fname, 'rb') as f:
 42 |         pp = dpickle.load(f)
 43 | 
 44 |     num_tokens = max(pp.id2token.keys()) + 1
 45 |     print(f'Size of vocabulary for {fname}: {num_tokens:,}')
 46 |     return num_tokens, pp
 47 | 
 48 | 
 49 | def load_decoder_inputs(decoder_np_vecs='train_title_vecs.npy'):
 50 |     """
 51 |     Load decoder inputs.
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     decoder_np_vecs : str
 56 |         filename of serialized numpy.array of decoder input (issue title)
 57 | 
 58 |     Returns
 59 |     -------
 60 |     decoder_input_data : numpy.array
 61 |         The data fed to the decoder as input during training for teacher forcing.
 62 |         This is the same as `decoder_np_vecs` except the last position.
 63 |     decoder_target_data : numpy.array
 64 |         The data that the decoder data is trained to generate (issue title).
 65 |         Calculated by sliding `decoder_np_vecs` one position forward.
 66 | 
 67 |     """
 68 |     vectorized_title = np.load(decoder_np_vecs)
 69 |     # For Decoder Input, you don't need the last word as that is only for prediction
 70 |     # when we are training using Teacher Forcing.
 71 |     decoder_input_data = vectorized_title[:, :-1]
 72 | 
 73 |     # Decoder Target Data Is Ahead By 1 Time Step From Decoder Input Data (Teacher Forcing)
 74 |     decoder_target_data = vectorized_title[:, 1:]
 75 | 
 76 |     print(f'Shape of decoder input: {decoder_input_data.shape}')
 77 |     print(f'Shape of decoder target: {decoder_target_data.shape}')
 78 |     return decoder_input_data, decoder_target_data
 79 | 
 80 | 
 81 | def load_encoder_inputs(encoder_np_vecs='train_body_vecs.npy'):
 82 |     """
 83 |     Load variables & data that are inputs to encoder.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     encoder_np_vecs : str
 88 |         filename of serialized numpy.array of encoder input (issue title)
 89 | 
 90 |     Returns
 91 |     -------
 92 |     encoder_input_data : numpy.array
 93 |         The issue body
 94 |     doc_length : int
 95 |         The standard document length of the input for the encoder after padding
 96 |         the shape of this array will be (num_examples, doc_length)
 97 | 
 98 |     """
 99 |     vectorized_body = np.load(encoder_np_vecs)
100 |     # Encoder input is simply the body of the issue text
101 |     encoder_input_data = vectorized_body
102 |     doc_length = encoder_input_data.shape[1]
103 |     print(f'Shape of encoder input: {encoder_input_data.shape}')
104 |     return encoder_input_data, doc_length
105 | 
106 | 
107 | def viz_model_architecture(model):
108 |     """Visualize model architecture in Jupyter notebook."""
109 |     display(SVG(model_to_dot(model).create(prog='dot', format='svg')))
110 | 
111 | 
112 | def free_gpu_mem():
113 |     """Attempt to free gpu memory."""
114 |     K.get_session().close()
115 |     cfg = K.tf.ConfigProto()
116 |     cfg.gpu_options.allow_growth = True
117 |     K.set_session(K.tf.Session(config=cfg))
118 | 
119 | 
120 | def test_gpu():
121 |     """Run a toy computation task in tensorflow to test GPU."""
122 |     config = tf.ConfigProto()
123 |     config.gpu_options.allow_growth = True
124 |     session = tf.Session(config=config)
125 |     hello = tf.constant('Hello, TensorFlow!')
126 |     print(session.run(hello))
127 | 
128 | 
129 | def plot_model_training_history(history_object):
130 |     """Plots model train vs. validation loss."""
131 |     plt.title('model accuracy')
132 |     plt.ylabel('accuracy')
133 |     plt.xlabel('epoch')
134 |     plt.plot(history_object.history['loss'])
135 |     plt.plot(history_object.history['val_loss'])
136 |     plt.legend(['train', 'test'], loc='upper left')
137 |     plt.show()
138 | 
139 | 
140 | def extract_encoder_model(model):
141 |     """
142 |     Extract the encoder from the original Sequence to Sequence Model.
143 | 
144 |     Returns a keras model object that has one input (body of issue) and one
145 |     output (encoding of issue, which is the last hidden state).
146 | 
147 |     Input:
148 |     -----
149 |     model: keras model object
150 | 
151 |     Returns:
152 |     -----
153 |     keras model object
154 | 
155 |     """
156 |     encoder_model = model.get_layer('Encoder-Model')
157 |     return encoder_model
158 | 
159 | 
160 | def extract_decoder_model(model):
161 |     """
162 |     Extract the decoder from the original model.
163 | 
164 |     Inputs:
165 |     ------
166 |     model: keras model object
167 | 
168 |     Returns:
169 |     -------
170 |     A Keras model object with the following inputs and outputs:
171 | 
172 |     Inputs of Keras Model That Is Returned:
173 |     1: the embedding index for the last predicted word or the <Start> indicator
174 |     2: the last hidden state, or in the case of the first word the hidden state from the encoder
175 | 
176 |     Outputs of Keras Model That Is Returned:
177 |     1.  Prediction (class probabilities) for the next word
178 |     2.  The hidden state of the decoder, to be fed back into the decoder at the next time step
179 | 
180 |     Implementation Notes:
181 |     ----------------------
182 |     Must extract relevant layers and reconstruct part of the computation graph
183 |     to allow for different inputs as we are not going to use teacher forcing at
184 |     inference time.
185 | 
186 |     """
187 |     # the latent dimension is the same throughout the architecture so we are going to
188 |     # cheat and grab the latent dimension of the embedding because that is the same as what is
189 |     # output from the decoder
190 |     latent_dim = model.get_layer('Decoder-Word-Embedding').output_shape[-1]
191 | 
192 |     # Reconstruct the input into the decoder
193 |     decoder_inputs = model.get_layer('Decoder-Input').input
194 |     dec_emb = model.get_layer('Decoder-Word-Embedding')(decoder_inputs)
195 |     dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb)
196 | 
197 |     # Instead of setting the intial state from the encoder and forgetting about it, during inference
198 |     # we are not doing teacher forcing, so we will have to have a feedback loop from predictions back into
199 |     # the GRU, thus we define this input layer for the state so we can add this capability
200 |     gru_inference_state_input = Input(shape=(latent_dim,), name='hidden_state_input')
201 | 
202 |     # we need to reuse the weights that is why we are getting this
203 |     # If you inspect the decoder GRU that we created for training, it will take as input
204 |     # 2 tensors -> (1) is the embedding layer output for the teacher forcing
205 |     #                  (which will now be the last step's prediction, and will be _start_ on the first time step)
206 |     #              (2) is the state, which we will initialize with the encoder on the first time step, but then
207 |     #                   grab the state after the first prediction and feed that back in again.
208 |     gru_out, gru_state_out = model.get_layer('Decoder-GRU')([dec_bn, gru_inference_state_input])
209 | 
210 |     # Reconstruct dense layers
211 |     dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out)
212 |     dense_out = model.get_layer('Final-Output-Dense')(dec_bn2)
213 |     decoder_model = Model([decoder_inputs, gru_inference_state_input],
214 |                           [dense_out, gru_state_out])
215 |     return decoder_model
216 | 
217 | 
218 | class Seq2Seq_Inference(object):
219 |     def __init__(self,
220 |                  encoder_preprocessor,
221 |                  decoder_preprocessor,
222 |                  seq2seq_model):
223 | 
224 |         self.pp_body = encoder_preprocessor
225 |         self.pp_title = decoder_preprocessor
226 |         self.seq2seq_model = seq2seq_model
227 |         self.encoder_model = extract_encoder_model(seq2seq_model)
228 |         self.decoder_model = extract_decoder_model(seq2seq_model)
229 |         self.default_max_len_title = self.pp_title.padding_maxlen
230 |         self.nn = None
231 |         self.rec_df = None
232 | 
233 |     def generate_issue_title(self,
234 |                              raw_input_text,
235 |                              max_len_title=None):
236 |         """
237 |         Use the seq2seq model to generate a title given the body of an issue.
238 | 
239 |         Inputs
240 |         ------
241 |         raw_input: str
242 |             The body of the issue text as an input string
243 | 
244 |         max_len_title: int (optional)
245 |             The maximum length of the title the model will generate
246 | 
247 |         """
248 |         if max_len_title is None:
249 |             max_len_title = self.default_max_len_title
250 |         # get the encoder's features for the decoder
251 |         raw_tokenized = self.pp_body.transform([raw_input_text])
252 |         body_encoding = self.encoder_model.predict(raw_tokenized)
253 |         # we want to save the encoder's embedding before its updated by decoder
254 |         #   because we can use that as an embedding for other tasks.
255 |         original_body_encoding = body_encoding
256 |         state_value = np.array(self.pp_title.token2id['_start_']).reshape(1, 1)
257 | 
258 |         decoded_sentence = []
259 |         stop_condition = False
260 |         while not stop_condition:
261 |             preds, st = self.decoder_model.predict([state_value, body_encoding])
262 | 
263 |             # We are going to ignore indices 0 (padding) and indices 1 (unknown)
264 |             # Argmax will return the integer index corresponding to the
265 |             #  prediction + 2 b/c we chopped off first two
266 |             pred_idx = np.argmax(preds[:, :, 2:]) + 2
267 | 
268 |             # retrieve word from index prediction
269 |             pred_word_str = self.pp_title.id2token[pred_idx]
270 | 
271 |             if pred_word_str == '_end_' or len(decoded_sentence) >= max_len_title:
272 |                 stop_condition = True
273 |                 break
274 |             decoded_sentence.append(pred_word_str)
275 | 
276 |             # update the decoder for the next word
277 |             body_encoding = st
278 |             state_value = np.array(pred_idx).reshape(1, 1)
279 | 
280 |         return original_body_encoding, ' '.join(decoded_sentence)
281 | 
282 | 
283 |     def print_example(self,
284 |                       i,
285 |                       body_text,
286 |                       title_text,
287 |                       url,
288 |                       threshold):
289 |         """
290 |         Prints an example of the model's prediction for manual inspection.
291 |         """
292 |         if i:
293 |             print('\n\n==============================================')
294 |             print(f'============== Example # {i} =================\n')
295 | 
296 |         if url:
297 |             print(url)
298 | 
299 |         print(f"Issue Body:\n {body_text} \n")
300 | 
301 |         if title_text:
302 |             print(f"Original Title:\n {title_text}")
303 | 
304 |         emb, gen_title = self.generate_issue_title(body_text)
305 |         print(f"\n****** Machine Generated Title (Prediction) ******:\n {gen_title}")
306 | 
307 |         if self.nn:
308 |             # return neighbors and distances
309 |             n, d = self.nn.get_nns_by_vector(emb.flatten(), n=4,
310 |                                              include_distances=True)
311 |             neighbors = n[1:]
312 |             dist = d[1:]
313 | 
314 |             if min(dist) <= threshold:
315 |                 cols = ['issue_url', 'issue_title', 'body']
316 |                 dfcopy = self.rec_df.iloc[neighbors][cols].copy(deep=True)
317 |                 dfcopy['dist'] = dist
318 |                 similar_issues_df = dfcopy.query(f'dist <= {threshold}')
319 | 
320 |                 print("\n**** Similar Issues (using encoder embedding) ****:\n")
321 |                 display(similar_issues_df)
322 | 
323 | 
324 |     def demo_model_predictions(self,
325 |                                n,
326 |                                issue_df,
327 |                                threshold=1):
328 |         """
329 |         Pick n random Issues and display predictions.
330 | 
331 |         Input:
332 |         ------
333 |         n : int
334 |             Number of issues to display from issue_df
335 |         issue_df : pandas DataFrame
336 |             DataFrame that contains two columns: `body` and `issue_title`.
337 |         threshold : float
338 |             distance threshold for recommendation of similar issues.
339 | 
340 |         Returns:
341 |         --------
342 |         None
343 |             Prints the original issue body and the model's prediction.
344 |         """
345 |         # Extract body and title from DF
346 |         body_text = issue_df.body.tolist()
347 |         title_text = issue_df.issue_title.tolist()
348 |         url = issue_df.issue_url.tolist()
349 | 
350 |         demo_list = np.random.randint(low=1, high=len(body_text), size=n)
351 |         for i in demo_list:
352 |             self.print_example(i,
353 |                                body_text=body_text[i],
354 |                                title_text=title_text[i],
355 |                                url=url[i],
356 |                                threshold=threshold)
357 | 
358 |     def prepare_recommender(self, vectorized_array, original_df):
359 |         """
360 |         Use the annoy library to build recommender
361 | 
362 |         Parameters
363 |         ----------
364 |         vectorized_array : List[List[int]]
365 |             This is the list of list of integers that represents your corpus
366 |             that is fed into the seq2seq model for training.
367 |         original_df : pandas.DataFrame
368 |             This is the original dataframe that has the columns
369 |             ['issue_url', 'issue_title', 'body']
370 | 
371 |         Returns
372 |         -------
373 |         annoy.AnnoyIndex  object (see https://github.com/spotify/annoy)
374 |         """
375 |         self.rec_df = original_df
376 |         emb = self.encoder_model.predict(x=vectorized_array,
377 |                                          batch_size=vectorized_array.shape[0]//200)
378 | 
379 |         f = emb.shape[1]
380 |         self.nn = AnnoyIndex(f)
381 |         logging.warning('Adding embeddings')
382 |         for i in tqdm(range(len(emb))):
383 |             self.nn.add_item(i, emb[i])
384 |         logging.warning('Building trees for similarity lookup.')
385 |         self.nn.build(50)
386 |         return self.nn
387 | 
388 |     def set_recsys_data(self, original_df):
389 |         self.rec_df = original_df
390 | 
391 |     def set_recsys_annoyobj(self, annoyobj):
392 |         self.nn = annoyobj
393 | 
394 |     def evaluate_model(self, holdout_bodies, holdout_titles):
395 |         """
396 |         Method for calculating BLEU Score.
397 | 
398 |         Parameters
399 |         ----------
400 |         holdout_bodies : List[str]
401 |             These are the issue bodies that we want to summarize
402 |         holdout_titles : List[str]
403 |             This is the ground truth we are trying to predict --> issue titles
404 | 
405 |         Returns
406 |         -------
407 |         bleu : float
408 |             The BLEU Score
409 | 
410 |         """
411 |         actual, predicted = list(), list()
412 |         assert len(holdout_bodies) == len(holdout_titles)
413 |         num_examples = len(holdout_bodies)
414 | 
415 |         logging.warning('Generating predictions.')
416 |         # step over the whole set TODO: parallelize this
417 |         for i in tqdm_notebook(range(num_examples)):
418 |             _, yhat = self.generate_issue_title(holdout_bodies[i])
419 | 
420 |             actual.append(self.pp_title.process_text([holdout_titles[i]])[0])
421 |             predicted.append(self.pp_title.process_text([yhat])[0])
422 |         # calculate BLEU score
423 |         logging.warning('Calculating BLEU.')
424 |         
425 |         #must be careful with nltk api for corpus_bleu!, 
426 |         # expects List[List[List[str]]] for ground truth, using List[List[str]] will give you
427 |         # erroneous results.
428 |         bleu = corpus_bleu([[a] for a in actual], predicted)
429 |         return bleu
430 | 


--------------------------------------------------------------------------------
/notebooks/Tutorial.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "toc": true
   7 |    },
   8 |    "source": [
   9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
  10 |     "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Process-Data\" data-toc-modified-id=\"Process-Data-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Process Data</a></span></li><li><span><a href=\"#Pre-Process-Data-For-Deep-Learning\" data-toc-modified-id=\"Pre-Process-Data-For-Deep-Learning-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>Pre-Process Data For Deep Learning</a></span><ul class=\"toc-item\"><li><ul class=\"toc-item\"><li><ul class=\"toc-item\"><li><span><a href=\"#Look-at-one-example-of-processed-issue-bodies\" data-toc-modified-id=\"Look-at-one-example-of-processed-issue-bodies-2.0.0.1\"><span class=\"toc-item-num\">2.0.0.1&nbsp;&nbsp;</span>Look at one example of processed issue bodies</a></span></li><li><span><a href=\"#Look-at-one-example-of-processed-issue-titles\" data-toc-modified-id=\"Look-at-one-example-of-processed-issue-titles-2.0.0.2\"><span class=\"toc-item-num\">2.0.0.2&nbsp;&nbsp;</span>Look at one example of processed issue titles</a></span></li></ul></li></ul></li></ul></li><li><span><a href=\"#Define-Model-Architecture\" data-toc-modified-id=\"Define-Model-Architecture-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>Define Model Architecture</a></span><ul class=\"toc-item\"><li><ul class=\"toc-item\"><li><span><a href=\"#Load-the-data-from-disk-into-variables\" data-toc-modified-id=\"Load-the-data-from-disk-into-variables-3.0.1\"><span class=\"toc-item-num\">3.0.1&nbsp;&nbsp;</span>Load the data from disk into variables</a></span></li><li><span><a href=\"#Define-Model-Architecture\" data-toc-modified-id=\"Define-Model-Architecture-3.0.2\"><span class=\"toc-item-num\">3.0.2&nbsp;&nbsp;</span>Define Model Architecture</a></span></li></ul></li></ul></li><li><span><a href=\"#Train-Model\" data-toc-modified-id=\"Train-Model-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>Train Model</a></span></li><li><span><a href=\"#See-Results-On-Holdout-Set\" data-toc-modified-id=\"See-Results-On-Holdout-Set-5\"><span class=\"toc-item-num\">5&nbsp;&nbsp;</span>See Results On Holdout Set</a></span></li><li><span><a href=\"#Feature-Extraction-Demo\" data-toc-modified-id=\"Feature-Extraction-Demo-6\"><span class=\"toc-item-num\">6&nbsp;&nbsp;</span>Feature Extraction Demo</a></span><ul class=\"toc-item\"><li><ul class=\"toc-item\"><li><span><a href=\"#Example-1:-Issues-Installing-Python-Packages\" data-toc-modified-id=\"Example-1:-Issues-Installing-Python-Packages-6.0.1\"><span class=\"toc-item-num\">6.0.1&nbsp;&nbsp;</span>Example 1: Issues Installing Python Packages</a></span></li><li><span><a href=\"#Example-2:--Issues-asking-for-feature-improvements\" data-toc-modified-id=\"Example-2:--Issues-asking-for-feature-improvements-6.0.2\"><span class=\"toc-item-num\">6.0.2&nbsp;&nbsp;</span>Example 2:  Issues asking for feature improvements</a></span></li></ul></li></ul></li></ul></div>"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 1,
  16 |    "metadata": {
  17 |     "collapsed": true
  18 |    },
  19 |    "outputs": [],
  20 |    "source": [
  21 |     "import pandas as pd\n",
  22 |     "import logging\n",
  23 |     "import glob\n",
  24 |     "from sklearn.model_selection import train_test_split\n",
  25 |     "pd.set_option('display.max_colwidth', 500)\n",
  26 |     "logger = logging.getLogger()\n",
  27 |     "logger.setLevel(logging.WARNING)"
  28 |    ]
  29 |   },
  30 |   {
  31 |    "cell_type": "markdown",
  32 |    "metadata": {},
  33 |    "source": [
  34 |     "# Process Data"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "markdown",
  39 |    "metadata": {},
  40 |    "source": [
  41 |     "Look at filesystem to see files extracted from BigQuery (or Kaggle: https://www.kaggle.com/davidshinn/github-issues/)"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 9,
  47 |    "metadata": {},
  48 |    "outputs": [
  49 |     {
  50 |      "name": "stdout",
  51 |      "output_type": "stream",
  52 |      "text": [
  53 |       "-rw-r--r-- 1 40294 40294 2.7G Jan 18  2018 github_issues.csv\r\n"
  54 |      ]
  55 |     }
  56 |    ],
  57 |    "source": [
  58 |     "!ls -lah | grep github_issues.csv"
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "markdown",
  63 |    "metadata": {},
  64 |    "source": [
  65 |     "Split data into train and test set and preview data"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "code",
  70 |    "execution_count": 11,
  71 |    "metadata": {},
  72 |    "outputs": [
  73 |     {
  74 |      "name": "stdout",
  75 |      "output_type": "stream",
  76 |      "text": [
  77 |       "Train: 1,800,000 rows 3 columns\n",
  78 |       "Test: 200,000 rows 3 columns\n"
  79 |      ]
  80 |     },
  81 |     {
  82 |      "data": {
  83 |       "text/html": [
  84 |        "<div>\n",
  85 |        "<style scoped>\n",
  86 |        "    .dataframe tbody tr th:only-of-type {\n",
  87 |        "        vertical-align: middle;\n",
  88 |        "    }\n",
  89 |        "\n",
  90 |        "    .dataframe tbody tr th {\n",
  91 |        "        vertical-align: top;\n",
  92 |        "    }\n",
  93 |        "\n",
  94 |        "    .dataframe thead th {\n",
  95 |        "        text-align: right;\n",
  96 |        "    }\n",
  97 |        "</style>\n",
  98 |        "<table border=\"1\" class=\"dataframe\">\n",
  99 |        "  <thead>\n",
 100 |        "    <tr style=\"text-align: right;\">\n",
 101 |        "      <th></th>\n",
 102 |        "      <th>issue_url</th>\n",
 103 |        "      <th>issue_title</th>\n",
 104 |        "      <th>body</th>\n",
 105 |        "    </tr>\n",
 106 |        "  </thead>\n",
 107 |        "  <tbody>\n",
 108 |        "    <tr>\n",
 109 |        "      <th>3165423</th>\n",
 110 |        "      <td>\"https://github.com/1000hz/bootstrap-validator/issues/574\"</td>\n",
 111 |        "      <td>uncaught typeerror: f b is not a function when using $ ... .validator 'update'</td>\n",
 112 |        "      <td>the above error is being thrown when i try and run update via js to include some new fields that have been added dynamically. i'm using backbone.js rendering a script template element to add a new set up fields based on user interaction. the full error message is: uncaught typeerror: f b is not a function at htmlformelement.&lt;anonymous&gt; validator.min.js:9 at function.each jquery.min.js:2 at n.fn.init.each jquery.min.js:2 at n.fn.init.b as validator validator.min.js:9 at n.initskillgroup app.l...</td>\n",
 113 |        "    </tr>\n",
 114 |        "    <tr>\n",
 115 |        "      <th>2763145</th>\n",
 116 |        "      <td>\"https://github.com/quasar-analytics/quasar/issues/2821\"</td>\n",
 117 |        "      <td>invoke endpoint regression</td>\n",
 118 |        "      <td>problem accures in versions: 21.x.x , 23.x.x and 24.x.x didn't check 22.x.x first query is put to view mount sql select from /test-mount/testdb/flatviz the second one sql select row.seriesone as seriesone, row.seriestwo as seriestwo, min row.measureone as measureone from output_of_first_query as row group by row.seriesone, row.seriestwo order by row.seriesone asc, row.seriestwo asc the third one is sql select from output_of_second_query where seriesone = one-one in 20.14.13 this works as exp...</td>\n",
 119 |        "    </tr>\n",
 120 |        "    <tr>\n",
 121 |        "      <th>3882729</th>\n",
 122 |        "      <td>\"https://github.com/msharov/ustl/issues/79\"</td>\n",
 123 |        "      <td>build ustl with clang on linux</td>\n",
 124 |        "      <td>hi, on ubuntu 14.04 clang 3.4, gcc 4.8.4 and fedora 22 clang 3.5, gcc 5.3.1 : cc=clang cxx=clang++ ./configure --libdir=path/to/libsupc++.a without --libdir it searches for libcxxabi when cc=clang make works fine, make check however shows quite a few diffs. is such configuration supposed to work? thanks!</td>\n",
 125 |        "    </tr>\n",
 126 |        "  </tbody>\n",
 127 |        "</table>\n",
 128 |        "</div>"
 129 |       ],
 130 |       "text/plain": [
 131 |        "                                                          issue_url  \\\n",
 132 |        "3165423  \"https://github.com/1000hz/bootstrap-validator/issues/574\"   \n",
 133 |        "2763145    \"https://github.com/quasar-analytics/quasar/issues/2821\"   \n",
 134 |        "3882729                 \"https://github.com/msharov/ustl/issues/79\"   \n",
 135 |        "\n",
 136 |        "                                                                            issue_title  \\\n",
 137 |        "3165423  uncaught typeerror: f b is not a function when using $ ... .validator 'update'   \n",
 138 |        "2763145                                                      invoke endpoint regression   \n",
 139 |        "3882729                                                  build ustl with clang on linux   \n",
 140 |        "\n",
 141 |        "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        body  \n",
 142 |        "3165423  the above error is being thrown when i try and run update via js to include some new fields that have been added dynamically. i'm using backbone.js rendering a script template element to add a new set up fields based on user interaction. the full error message is: uncaught typeerror: f b is not a function at htmlformelement.<anonymous> validator.min.js:9 at function.each jquery.min.js:2 at n.fn.init.each jquery.min.js:2 at n.fn.init.b as validator validator.min.js:9 at n.initskillgroup app.l...  \n",
 143 |        "2763145  problem accures in versions: 21.x.x , 23.x.x and 24.x.x didn't check 22.x.x first query is put to view mount sql select from /test-mount/testdb/flatviz the second one sql select row.seriesone as seriesone, row.seriestwo as seriestwo, min row.measureone as measureone from output_of_first_query as row group by row.seriesone, row.seriestwo order by row.seriesone asc, row.seriestwo asc the third one is sql select from output_of_second_query where seriesone = one-one in 20.14.13 this works as exp...  \n",
 144 |        "3882729                                                                                                                                                                                                    hi, on ubuntu 14.04 clang 3.4, gcc 4.8.4 and fedora 22 clang 3.5, gcc 5.3.1 : cc=clang cxx=clang++ ./configure --libdir=path/to/libsupc++.a without --libdir it searches for libcxxabi when cc=clang make works fine, make check however shows quite a few diffs. is such configuration supposed to work? thanks!  "
 145 |       ]
 146 |      },
 147 |      "execution_count": 11,
 148 |      "metadata": {},
 149 |      "output_type": "execute_result"
 150 |     }
 151 |    ],
 152 |    "source": [
 153 |     "#read in data sample 2M rows (for speed of tutorial)\n",
 154 |     "traindf, testdf = train_test_split(pd.read_csv('github_issues.csv').sample(n=2000000), \n",
 155 |     "                                   test_size=.10)\n",
 156 |     "\n",
 157 |     "\n",
 158 |     "#print out stats about shape of data\n",
 159 |     "print(f'Train: {traindf.shape[0]:,} rows {traindf.shape[1]:,} columns')\n",
 160 |     "print(f'Test: {testdf.shape[0]:,} rows {testdf.shape[1]:,} columns')\n",
 161 |     "\n",
 162 |     "# preview data\n",
 163 |     "traindf.head(3)"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "markdown",
 168 |    "metadata": {},
 169 |    "source": [
 170 |     "**Convert to lists in preparation for modeling**"
 171 |    ]
 172 |   },
 173 |   {
 174 |    "cell_type": "code",
 175 |    "execution_count": 9,
 176 |    "metadata": {},
 177 |    "outputs": [
 178 |     {
 179 |      "data": {
 180 |       "text/plain": [
 181 |        "'some of the sds alerts do not have clearing alerts. so it always present in alerting directory. these kinds of alerts should be stored in etcd under /alerting/notify, it never goes to alerting/alerts directory and it is not displayed under alerts in ui also. these kinds of alerts are notified via notification channel and deleted via ttl. node_agent should have a logic to handle this in alerting framework.'"
 182 |       ]
 183 |      },
 184 |      "execution_count": 9,
 185 |      "metadata": {},
 186 |      "output_type": "execute_result"
 187 |     }
 188 |    ],
 189 |    "source": [
 190 |     "train_body_raw = traindf.body.tolist()\n",
 191 |     "train_title_raw = traindf.issue_title.tolist()\n",
 192 |     "#preview output of first element\n",
 193 |     "train_body_raw[0]"
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "markdown",
 198 |    "metadata": {},
 199 |    "source": [
 200 |     "# Pre-Process Data For Deep Learning\n",
 201 |     "\n",
 202 |     "See [this repo](https://github.com/hamelsmu/ktext) for documentation on the ktext package"
 203 |    ]
 204 |   },
 205 |   {
 206 |    "cell_type": "code",
 207 |    "execution_count": 10,
 208 |    "metadata": {},
 209 |    "outputs": [
 210 |     {
 211 |      "name": "stderr",
 212 |      "output_type": "stream",
 213 |      "text": [
 214 |       "/opt/conda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 215 |       "  from ._conv import register_converters as _register_converters\n",
 216 |       "Using TensorFlow backend.\n"
 217 |      ]
 218 |     }
 219 |    ],
 220 |    "source": [
 221 |     "%reload_ext autoreload\n",
 222 |     "%autoreload 2\n",
 223 |     "from ktext.preprocess import processor"
 224 |    ]
 225 |   },
 226 |   {
 227 |    "cell_type": "code",
 228 |    "execution_count": 11,
 229 |    "metadata": {},
 230 |    "outputs": [
 231 |     {
 232 |      "name": "stderr",
 233 |      "output_type": "stream",
 234 |      "text": [
 235 |       "WARNING:root:....tokenizing data\n",
 236 |       "WARNING:root:(1/3) done. 1738 sec\n",
 237 |       "WARNING:root:....building corpus\n",
 238 |       "WARNING:root:(2/3) done. 568 sec\n",
 239 |       "WARNING:root:....consolidating corpus\n",
 240 |       "WARNING:root:(3/3) done. 9 sec\n",
 241 |       "WARNING:root:Finished parsing 1,800,000 documents.\n",
 242 |       "WARNING:root:...fit is finished, beginning transform\n",
 243 |       "WARNING:root:done. 733 sec\n"
 244 |      ]
 245 |     },
 246 |     {
 247 |      "name": "stdout",
 248 |      "output_type": "stream",
 249 |      "text": [
 250 |       "CPU times: user 22min 17s, sys: 1min 16s, total: 23min 34s\n",
 251 |       "Wall time: 50min 53s\n"
 252 |      ]
 253 |     }
 254 |    ],
 255 |    "source": [
 256 |     "%%time\n",
 257 |     "# Clean, tokenize, and apply padding / truncating such that each document length = 70\n",
 258 |     "#  also, retain only the top 8,000 words in the vocabulary and set the remaining words\n",
 259 |     "#  to 1 which will become common index for rare words \n",
 260 |     "body_pp = processor(keep_n=8000, padding_maxlen=70)\n",
 261 |     "train_body_vecs = body_pp.fit_transform(train_body_raw)"
 262 |    ]
 263 |   },
 264 |   {
 265 |    "cell_type": "markdown",
 266 |    "metadata": {},
 267 |    "source": [
 268 |     "#### Look at one example of processed issue bodies"
 269 |    ]
 270 |   },
 271 |   {
 272 |    "cell_type": "code",
 273 |    "execution_count": 12,
 274 |    "metadata": {},
 275 |    "outputs": [
 276 |     {
 277 |      "name": "stdout",
 278 |      "output_type": "stream",
 279 |      "text": [
 280 |       "\n",
 281 |       "original string:\n",
 282 |       " some of the sds alerts do not have clearing alerts. so it always present in alerting directory. these kinds of alerts should be stored in etcd under /alerting/notify, it never goes to alerting/alerts directory and it is not displayed under alerts in ui also. these kinds of alerts are notified via notification channel and deleted via ttl. node_agent should have a logic to handle this in alerting framework. \n",
 283 |       "\n",
 284 |       "after pre-processing:\n",
 285 |       " [37 33 39  1  6 17 29 22 13  6  3 36 25  8 34 23  1 15  3 40 26 33  6 35\n",
 286 |       " 11 38 23 18 45  1  4 32  2 25 28 20 42  1  4  6 15  9 25 24 29 16 45  6\n",
 287 |       " 23 44  7  3 40 26 33  6 10 31 46 30 12  9 14 46 43  3  1 35 22  5] \n",
 288 |       "\n"
 289 |      ]
 290 |     }
 291 |    ],
 292 |    "source": [
 293 |     "print('\\noriginal string:\\n', train_body_raw[0], '\\n')\n",
 294 |     "print('after pre-processing:\\n', train_body_vecs[0], '\\n')"
 295 |    ]
 296 |   },
 297 |   {
 298 |    "cell_type": "code",
 299 |    "execution_count": 13,
 300 |    "metadata": {},
 301 |    "outputs": [
 302 |     {
 303 |      "name": "stderr",
 304 |      "output_type": "stream",
 305 |      "text": [
 306 |       "WARNING:root:....tokenizing data\n",
 307 |       "WARNING:root:(1/3) done. 222 sec\n",
 308 |       "WARNING:root:....building corpus\n",
 309 |       "WARNING:root:(2/3) done. 35 sec\n",
 310 |       "WARNING:root:....consolidating corpus\n",
 311 |       "WARNING:root:(3/3) done. 2 sec\n",
 312 |       "WARNING:root:Finished parsing 1,800,000 documents.\n",
 313 |       "WARNING:root:...fit is finished, beginning transform\n",
 314 |       "WARNING:root:done. 101 sec\n"
 315 |      ]
 316 |     }
 317 |    ],
 318 |    "source": [
 319 |     "# Instantiate a text processor for the titles, with some different parameters\n",
 320 |     "#  append_indicators = True appends the tokens '_start_' and '_end_' to each\n",
 321 |     "#                      document\n",
 322 |     "#  padding = 'post' means that zero padding is appended to the end of the \n",
 323 |     "#             of the document (as opposed to the default which is 'pre')\n",
 324 |     "title_pp = processor(append_indicators=True, keep_n=4500, \n",
 325 |     "                     padding_maxlen=12, padding ='post')\n",
 326 |     "\n",
 327 |     "# process the title data\n",
 328 |     "train_title_vecs = title_pp.fit_transform(train_title_raw)"
 329 |    ]
 330 |   },
 331 |   {
 332 |    "cell_type": "markdown",
 333 |    "metadata": {},
 334 |    "source": [
 335 |     "#### Look at one example of processed issue titles"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": 14,
 341 |    "metadata": {},
 342 |    "outputs": [
 343 |     {
 344 |      "name": "stdout",
 345 |      "output_type": "stream",
 346 |      "text": [
 347 |       "\n",
 348 |       "original string:\n",
 349 |       " node_agent should handle sds native alerts also\n",
 350 |       "after pre-processing:\n",
 351 |       " [3 1 8 6 1 7 4 5 2 0 0 0]\n"
 352 |      ]
 353 |     }
 354 |    ],
 355 |    "source": [
 356 |     "print('\\noriginal string:\\n', train_title_raw[0])\n",
 357 |     "print('after pre-processing:\\n', train_title_vecs[0])"
 358 |    ]
 359 |   },
 360 |   {
 361 |    "cell_type": "markdown",
 362 |    "metadata": {},
 363 |    "source": [
 364 |     "Serialize all of this to disk for later use"
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "code",
 369 |    "execution_count": 15,
 370 |    "metadata": {
 371 |     "collapsed": true
 372 |    },
 373 |    "outputs": [],
 374 |    "source": [
 375 |     "import dill as dpickle\n",
 376 |     "import numpy as np\n",
 377 |     "\n",
 378 |     "# Save the preprocessor\n",
 379 |     "with open('body_pp.dpkl', 'wb') as f:\n",
 380 |     "    dpickle.dump(body_pp, f)\n",
 381 |     "\n",
 382 |     "with open('title_pp.dpkl', 'wb') as f:\n",
 383 |     "    dpickle.dump(title_pp, f)\n",
 384 |     "\n",
 385 |     "# Save the processed data\n",
 386 |     "np.save('train_title_vecs.npy', train_title_vecs)\n",
 387 |     "np.save('train_body_vecs.npy', train_body_vecs)"
 388 |    ]
 389 |   },
 390 |   {
 391 |    "cell_type": "markdown",
 392 |    "metadata": {},
 393 |    "source": [
 394 |     "# Define Model Architecture"
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "markdown",
 399 |    "metadata": {},
 400 |    "source": [
 401 |     "### Load the data from disk into variables"
 402 |    ]
 403 |   },
 404 |   {
 405 |    "cell_type": "code",
 406 |    "execution_count": 16,
 407 |    "metadata": {
 408 |     "collapsed": true
 409 |    },
 410 |    "outputs": [],
 411 |    "source": [
 412 |     "from seq2seq_utils import load_decoder_inputs, load_encoder_inputs, load_text_processor"
 413 |    ]
 414 |   },
 415 |   {
 416 |    "cell_type": "code",
 417 |    "execution_count": 17,
 418 |    "metadata": {},
 419 |    "outputs": [
 420 |     {
 421 |      "name": "stdout",
 422 |      "output_type": "stream",
 423 |      "text": [
 424 |       "Shape of encoder input: (1800000, 70)\n",
 425 |       "Shape of decoder input: (1800000, 11)\n",
 426 |       "Shape of decoder target: (1800000, 11)\n"
 427 |      ]
 428 |     }
 429 |    ],
 430 |    "source": [
 431 |     "encoder_input_data, doc_length = load_encoder_inputs('train_body_vecs.npy')\n",
 432 |     "decoder_input_data, decoder_target_data = load_decoder_inputs('train_title_vecs.npy')"
 433 |    ]
 434 |   },
 435 |   {
 436 |    "cell_type": "code",
 437 |    "execution_count": 18,
 438 |    "metadata": {},
 439 |    "outputs": [
 440 |     {
 441 |      "name": "stdout",
 442 |      "output_type": "stream",
 443 |      "text": [
 444 |       "Size of vocabulary for body_pp.dpkl: 8,002\n",
 445 |       "Size of vocabulary for title_pp.dpkl: 4,502\n"
 446 |      ]
 447 |     }
 448 |    ],
 449 |    "source": [
 450 |     "num_encoder_tokens, body_pp = load_text_processor('body_pp.dpkl')\n",
 451 |     "num_decoder_tokens, title_pp = load_text_processor('title_pp.dpkl')"
 452 |    ]
 453 |   },
 454 |   {
 455 |    "cell_type": "markdown",
 456 |    "metadata": {},
 457 |    "source": [
 458 |     "### Define Model Architecture"
 459 |    ]
 460 |   },
 461 |   {
 462 |    "cell_type": "code",
 463 |    "execution_count": 19,
 464 |    "metadata": {
 465 |     "collapsed": true
 466 |    },
 467 |    "outputs": [],
 468 |    "source": [
 469 |     "%matplotlib inline\n",
 470 |     "from keras.models import Model\n",
 471 |     "from keras.layers import Input, LSTM, GRU, Dense, Embedding, Bidirectional, BatchNormalization\n",
 472 |     "from keras import optimizers"
 473 |    ]
 474 |   },
 475 |   {
 476 |    "cell_type": "code",
 477 |    "execution_count": 20,
 478 |    "metadata": {
 479 |     "collapsed": true
 480 |    },
 481 |    "outputs": [],
 482 |    "source": [
 483 |     "#arbitrarly set latent dimension for embedding and hidden units\n",
 484 |     "latent_dim = 300\n",
 485 |     "\n",
 486 |     "##### Define Model Architecture ######\n",
 487 |     "\n",
 488 |     "########################\n",
 489 |     "#### Encoder Model ####\n",
 490 |     "encoder_inputs = Input(shape=(doc_length,), name='Encoder-Input')\n",
 491 |     "\n",
 492 |     "# Word embeding for encoder (ex: Issue Body)\n",
 493 |     "x = Embedding(num_encoder_tokens, latent_dim, name='Body-Word-Embedding', mask_zero=False)(encoder_inputs)\n",
 494 |     "x = BatchNormalization(name='Encoder-Batchnorm-1')(x)\n",
 495 |     "\n",
 496 |     "# Intermediate GRU layer (optional)\n",
 497 |     "#x = GRU(latent_dim, name='Encoder-Intermediate-GRU', return_sequences=True)(x)\n",
 498 |     "#x = BatchNormalization(name='Encoder-Batchnorm-2')(x)\n",
 499 |     "\n",
 500 |     "# We do not need the `encoder_output` just the hidden state.\n",
 501 |     "_, state_h = GRU(latent_dim, return_state=True, name='Encoder-Last-GRU')(x)\n",
 502 |     "\n",
 503 |     "# Encapsulate the encoder as a separate entity so we can just \n",
 504 |     "#  encode without decoding if we want to.\n",
 505 |     "encoder_model = Model(inputs=encoder_inputs, outputs=state_h, name='Encoder-Model')\n",
 506 |     "\n",
 507 |     "seq2seq_encoder_out = encoder_model(encoder_inputs)\n",
 508 |     "\n",
 509 |     "########################\n",
 510 |     "#### Decoder Model ####\n",
 511 |     "decoder_inputs = Input(shape=(None,), name='Decoder-Input')  # for teacher forcing\n",
 512 |     "\n",
 513 |     "# Word Embedding For Decoder (ex: Issue Titles)\n",
 514 |     "dec_emb = Embedding(num_decoder_tokens, latent_dim, name='Decoder-Word-Embedding', mask_zero=False)(decoder_inputs)\n",
 515 |     "dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)\n",
 516 |     "\n",
 517 |     "# Set up the decoder, using `decoder_state_input` as initial state.\n",
 518 |     "decoder_gru = GRU(latent_dim, return_state=True, return_sequences=True, name='Decoder-GRU')\n",
 519 |     "decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=seq2seq_encoder_out)\n",
 520 |     "x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output)\n",
 521 |     "\n",
 522 |     "# Dense layer for prediction\n",
 523 |     "decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='Final-Output-Dense')\n",
 524 |     "decoder_outputs = decoder_dense(x)\n",
 525 |     "\n",
 526 |     "########################\n",
 527 |     "#### Seq2Seq Model ####\n",
 528 |     "\n",
 529 |     "#seq2seq_decoder_out = decoder_model([decoder_inputs, seq2seq_encoder_out])\n",
 530 |     "seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)\n",
 531 |     "\n",
 532 |     "\n",
 533 |     "seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=0.001), loss='sparse_categorical_crossentropy')"
 534 |    ]
 535 |   },
 536 |   {
 537 |    "cell_type": "markdown",
 538 |    "metadata": {},
 539 |    "source": [
 540 |     "** Examine Model Architecture Summary **"
 541 |    ]
 542 |   },
 543 |   {
 544 |    "cell_type": "code",
 545 |    "execution_count": 21,
 546 |    "metadata": {},
 547 |    "outputs": [
 548 |     {
 549 |      "name": "stdout",
 550 |      "output_type": "stream",
 551 |      "text": [
 552 |       "__________________________________________________________________________________________________\n",
 553 |       "Layer (type)                    Output Shape         Param #     Connected to                     \n",
 554 |       "==================================================================================================\n",
 555 |       "Decoder-Input (InputLayer)      (None, None)         0                                            \n",
 556 |       "__________________________________________________________________________________________________\n",
 557 |       "Decoder-Word-Embedding (Embeddi (None, None, 300)    1350600     Decoder-Input[0][0]              \n",
 558 |       "__________________________________________________________________________________________________\n",
 559 |       "Encoder-Input (InputLayer)      (None, 70)           0                                            \n",
 560 |       "__________________________________________________________________________________________________\n",
 561 |       "Decoder-Batchnorm-1 (BatchNorma (None, None, 300)    1200        Decoder-Word-Embedding[0][0]     \n",
 562 |       "__________________________________________________________________________________________________\n",
 563 |       "Encoder-Model (Model)           (None, 300)          2942700     Encoder-Input[0][0]              \n",
 564 |       "__________________________________________________________________________________________________\n",
 565 |       "Decoder-GRU (GRU)               [(None, None, 300),  540900      Decoder-Batchnorm-1[0][0]        \n",
 566 |       "                                                                 Encoder-Model[1][0]              \n",
 567 |       "__________________________________________________________________________________________________\n",
 568 |       "Decoder-Batchnorm-2 (BatchNorma (None, None, 300)    1200        Decoder-GRU[0][0]                \n",
 569 |       "__________________________________________________________________________________________________\n",
 570 |       "Final-Output-Dense (Dense)      (None, None, 4502)   1355102     Decoder-Batchnorm-2[0][0]        \n",
 571 |       "==================================================================================================\n",
 572 |       "Total params: 6,191,702\n",
 573 |       "Trainable params: 6,189,902\n",
 574 |       "Non-trainable params: 1,800\n",
 575 |       "__________________________________________________________________________________________________\n"
 576 |      ]
 577 |     },
 578 |     {
 579 |      "data": {
 580 |       "image/svg+xml": [
 581 |        "<svg height=\"410pt\" viewBox=\"0.00 0.00 548.00 410.00\" width=\"548pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
 582 |        "<g class=\"graph\" id=\"graph0\" transform=\"scale(1 1) rotate(0) translate(4 406)\">\n",
 583 |        "<title>G</title>\n",
 584 |        "<polygon fill=\"white\" points=\"-4,4 -4,-406 544,-406 544,4 -4,4\" stroke=\"none\"/>\n",
 585 |        "<!-- 140465408816968 -->\n",
 586 |        "<g class=\"node\" id=\"node1\"><title>140465408816968</title>\n",
 587 |        "<polygon fill=\"none\" points=\"59,-365.5 59,-401.5 268,-401.5 268,-365.5 59,-365.5\" stroke=\"black\"/>\n",
 588 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"163.5\" y=\"-379.8\">Decoder-Input: InputLayer</text>\n",
 589 |        "</g>\n",
 590 |        "<!-- 140462192858392 -->\n",
 591 |        "<g class=\"node\" id=\"node2\"><title>140462192858392</title>\n",
 592 |        "<polygon fill=\"none\" points=\"15.5,-292.5 15.5,-328.5 311.5,-328.5 311.5,-292.5 15.5,-292.5\" stroke=\"black\"/>\n",
 593 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"163.5\" y=\"-306.8\">Decoder-Word-Embedding: Embedding</text>\n",
 594 |        "</g>\n",
 595 |        "<!-- 140465408816968&#45;&gt;140462192858392 -->\n",
 596 |        "<g class=\"edge\" id=\"edge1\"><title>140465408816968-&gt;140462192858392</title>\n",
 597 |        "<path d=\"M163.5,-365.313C163.5,-357.289 163.5,-347.547 163.5,-338.569\" fill=\"none\" stroke=\"black\"/>\n",
 598 |        "<polygon fill=\"black\" points=\"167,-338.529 163.5,-328.529 160,-338.529 167,-338.529\" stroke=\"black\"/>\n",
 599 |        "</g>\n",
 600 |        "<!-- 140454723112688 -->\n",
 601 |        "<g class=\"node\" id=\"node4\"><title>140454723112688</title>\n",
 602 |        "<polygon fill=\"none\" points=\"0,-219.5 0,-255.5 327,-255.5 327,-219.5 0,-219.5\" stroke=\"black\"/>\n",
 603 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"163.5\" y=\"-233.8\">Decoder-Batchnorm-1: BatchNormalization</text>\n",
 604 |        "</g>\n",
 605 |        "<!-- 140462192858392&#45;&gt;140454723112688 -->\n",
 606 |        "<g class=\"edge\" id=\"edge2\"><title>140462192858392-&gt;140454723112688</title>\n",
 607 |        "<path d=\"M163.5,-292.313C163.5,-284.289 163.5,-274.547 163.5,-265.569\" fill=\"none\" stroke=\"black\"/>\n",
 608 |        "<polygon fill=\"black\" points=\"167,-265.529 163.5,-255.529 160,-265.529 167,-265.529\" stroke=\"black\"/>\n",
 609 |        "</g>\n",
 610 |        "<!-- 140462193067792 -->\n",
 611 |        "<g class=\"node\" id=\"node3\"><title>140462193067792</title>\n",
 612 |        "<polygon fill=\"none\" points=\"331,-292.5 331,-328.5 540,-328.5 540,-292.5 331,-292.5\" stroke=\"black\"/>\n",
 613 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"435.5\" y=\"-306.8\">Encoder-Input: InputLayer</text>\n",
 614 |        "</g>\n",
 615 |        "<!-- 140462193062128 -->\n",
 616 |        "<g class=\"node\" id=\"node5\"><title>140462193062128</title>\n",
 617 |        "<polygon fill=\"none\" points=\"345.5,-219.5 345.5,-255.5 525.5,-255.5 525.5,-219.5 345.5,-219.5\" stroke=\"black\"/>\n",
 618 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"435.5\" y=\"-233.8\">Encoder-Model: Model</text>\n",
 619 |        "</g>\n",
 620 |        "<!-- 140462193067792&#45;&gt;140462193062128 -->\n",
 621 |        "<g class=\"edge\" id=\"edge3\"><title>140462193067792-&gt;140462193062128</title>\n",
 622 |        "<path d=\"M435.5,-292.313C435.5,-284.289 435.5,-274.547 435.5,-265.569\" fill=\"none\" stroke=\"black\"/>\n",
 623 |        "<polygon fill=\"black\" points=\"439,-265.529 435.5,-255.529 432,-265.529 439,-265.529\" stroke=\"black\"/>\n",
 624 |        "</g>\n",
 625 |        "<!-- 140465622676536 -->\n",
 626 |        "<g class=\"node\" id=\"node6\"><title>140465622676536</title>\n",
 627 |        "<polygon fill=\"none\" points=\"222,-146.5 222,-182.5 377,-182.5 377,-146.5 222,-146.5\" stroke=\"black\"/>\n",
 628 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"299.5\" y=\"-160.8\">Decoder-GRU: GRU</text>\n",
 629 |        "</g>\n",
 630 |        "<!-- 140454723112688&#45;&gt;140465622676536 -->\n",
 631 |        "<g class=\"edge\" id=\"edge4\"><title>140454723112688-&gt;140465622676536</title>\n",
 632 |        "<path d=\"M196.075,-219.494C214.528,-209.861 237.808,-197.707 257.607,-187.37\" fill=\"none\" stroke=\"black\"/>\n",
 633 |        "<polygon fill=\"black\" points=\"259.474,-190.344 266.719,-182.614 256.235,-184.139 259.474,-190.344\" stroke=\"black\"/>\n",
 634 |        "</g>\n",
 635 |        "<!-- 140462193062128&#45;&gt;140465622676536 -->\n",
 636 |        "<g class=\"edge\" id=\"edge5\"><title>140462193062128-&gt;140465622676536</title>\n",
 637 |        "<path d=\"M402.925,-219.494C384.472,-209.861 361.192,-197.707 341.393,-187.37\" fill=\"none\" stroke=\"black\"/>\n",
 638 |        "<polygon fill=\"black\" points=\"342.765,-184.139 332.281,-182.614 339.526,-190.344 342.765,-184.139\" stroke=\"black\"/>\n",
 639 |        "</g>\n",
 640 |        "<!-- 140462190791816 -->\n",
 641 |        "<g class=\"node\" id=\"node7\"><title>140462190791816</title>\n",
 642 |        "<polygon fill=\"none\" points=\"136,-73.5 136,-109.5 463,-109.5 463,-73.5 136,-73.5\" stroke=\"black\"/>\n",
 643 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"299.5\" y=\"-87.8\">Decoder-Batchnorm-2: BatchNormalization</text>\n",
 644 |        "</g>\n",
 645 |        "<!-- 140465622676536&#45;&gt;140462190791816 -->\n",
 646 |        "<g class=\"edge\" id=\"edge6\"><title>140465622676536-&gt;140462190791816</title>\n",
 647 |        "<path d=\"M299.5,-146.313C299.5,-138.289 299.5,-128.547 299.5,-119.569\" fill=\"none\" stroke=\"black\"/>\n",
 648 |        "<polygon fill=\"black\" points=\"303,-119.529 299.5,-109.529 296,-119.529 303,-119.529\" stroke=\"black\"/>\n",
 649 |        "</g>\n",
 650 |        "<!-- 140462190791200 -->\n",
 651 |        "<g class=\"node\" id=\"node8\"><title>140462190791200</title>\n",
 652 |        "<polygon fill=\"none\" points=\"195,-0.5 195,-36.5 404,-36.5 404,-0.5 195,-0.5\" stroke=\"black\"/>\n",
 653 |        "<text font-family=\"Times,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"299.5\" y=\"-14.8\">Final-Output-Dense: Dense</text>\n",
 654 |        "</g>\n",
 655 |        "<!-- 140462190791816&#45;&gt;140462190791200 -->\n",
 656 |        "<g class=\"edge\" id=\"edge7\"><title>140462190791816-&gt;140462190791200</title>\n",
 657 |        "<path d=\"M299.5,-73.3129C299.5,-65.2895 299.5,-55.5475 299.5,-46.5691\" fill=\"none\" stroke=\"black\"/>\n",
 658 |        "<polygon fill=\"black\" points=\"303,-46.5288 299.5,-36.5288 296,-46.5289 303,-46.5288\" stroke=\"black\"/>\n",
 659 |        "</g>\n",
 660 |        "</g>\n",
 661 |        "</svg>"
 662 |       ],
 663 |       "text/plain": [
 664 |        "<IPython.core.display.SVG object>"
 665 |       ]
 666 |      },
 667 |      "metadata": {},
 668 |      "output_type": "display_data"
 669 |     }
 670 |    ],
 671 |    "source": [
 672 |     "from seq2seq_utils import viz_model_architecture\n",
 673 |     "seq2seq_Model.summary()\n",
 674 |     "viz_model_architecture(seq2seq_Model)"
 675 |    ]
 676 |   },
 677 |   {
 678 |    "cell_type": "markdown",
 679 |    "metadata": {},
 680 |    "source": [
 681 |     "# Train Model"
 682 |    ]
 683 |   },
 684 |   {
 685 |    "cell_type": "code",
 686 |    "execution_count": 22,
 687 |    "metadata": {},
 688 |    "outputs": [
 689 |     {
 690 |      "name": "stdout",
 691 |      "output_type": "stream",
 692 |      "text": [
 693 |       "Train on 1584000 samples, validate on 216000 samples\n",
 694 |       "Epoch 1/7\n",
 695 |       "1584000/1584000 [==============================] - 265s 167us/step - loss: 2.7234 - val_loss: 2.4321\n"
 696 |      ]
 697 |     },
 698 |     {
 699 |      "name": "stderr",
 700 |      "output_type": "stream",
 701 |      "text": [
 702 |       "/ds/.local/lib/python3.6/site-packages/keras/engine/topology.py:2344: UserWarning: Layer Decoder-GRU was passed non-serializable keyword arguments: {'initial_state': [<tf.Tensor 'Encoder-Model/Encoder-Last-GRU/while/Exit_2:0' shape=(?, 300) dtype=float32>]}. They will not be included in the serialized model (and thus will be missing at deserialization time).\n",
 703 |       "  str(node.arguments) + '. They will not be included '\n"
 704 |      ]
 705 |     },
 706 |     {
 707 |      "name": "stdout",
 708 |      "output_type": "stream",
 709 |      "text": [
 710 |       "Epoch 2/7\n",
 711 |       "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.3446 - val_loss: 2.3563\n",
 712 |       "Epoch 3/7\n",
 713 |       "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.2608 - val_loss: 2.3281\n",
 714 |       "Epoch 4/7\n",
 715 |       "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.2117 - val_loss: 2.3161\n",
 716 |       "Epoch 5/7\n",
 717 |       "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.1767 - val_loss: 2.3110\n",
 718 |       "Epoch 6/7\n",
 719 |       "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.1494 - val_loss: 2.3095\n",
 720 |       "Epoch 7/7\n",
 721 |       "1584000/1584000 [==============================] - 265s 167us/step - loss: 2.1268 - val_loss: 2.3124\n"
 722 |      ]
 723 |     }
 724 |    ],
 725 |    "source": [
 726 |     "from keras.callbacks import CSVLogger, ModelCheckpoint\n",
 727 |     "\n",
 728 |     "script_name_base = 'tutorial_seq2seq'\n",
 729 |     "csv_logger = CSVLogger('{:}.log'.format(script_name_base))\n",
 730 |     "model_checkpoint = ModelCheckpoint('{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base),\n",
 731 |     "                                   save_best_only=True)\n",
 732 |     "\n",
 733 |     "batch_size = 1200\n",
 734 |     "epochs = 7\n",
 735 |     "history = seq2seq_Model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_target_data, -1),\n",
 736 |     "          batch_size=batch_size,\n",
 737 |     "          epochs=epochs,\n",
 738 |     "          validation_split=0.12, callbacks=[csv_logger, model_checkpoint])"
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "code",
 743 |    "execution_count": 23,
 744 |    "metadata": {},
 745 |    "outputs": [
 746 |     {
 747 |      "name": "stderr",
 748 |      "output_type": "stream",
 749 |      "text": [
 750 |       "/ds/.local/lib/python3.6/site-packages/keras/engine/topology.py:2344: UserWarning: Layer Decoder-GRU was passed non-serializable keyword arguments: {'initial_state': [<tf.Tensor 'Encoder-Model/Encoder-Last-GRU/while/Exit_2:0' shape=(?, 300) dtype=float32>]}. They will not be included in the serialized model (and thus will be missing at deserialization time).\n",
 751 |       "  str(node.arguments) + '. They will not be included '\n"
 752 |      ]
 753 |     }
 754 |    ],
 755 |    "source": [
 756 |     "#save model\n",
 757 |     "seq2seq_Model.save('seq2seq_model_tutorial.h5')"
 758 |    ]
 759 |   },
 760 |   {
 761 |    "cell_type": "markdown",
 762 |    "metadata": {},
 763 |    "source": [
 764 |     "# See Results On Holdout Set"
 765 |    ]
 766 |   },
 767 |   {
 768 |    "cell_type": "code",
 769 |    "execution_count": 27,
 770 |    "metadata": {
 771 |     "collapsed": true
 772 |    },
 773 |    "outputs": [],
 774 |    "source": [
 775 |     "from seq2seq_utils import Seq2Seq_Inference\n",
 776 |     "seq2seq_inf = Seq2Seq_Inference(encoder_preprocessor=body_pp,\n",
 777 |     "                                 decoder_preprocessor=title_pp,\n",
 778 |     "                                 seq2seq_model=seq2seq_Model)"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "code",
 783 |    "execution_count": 34,
 784 |    "metadata": {
 785 |     "scrolled": false
 786 |    },
 787 |    "outputs": [
 788 |     {
 789 |      "name": "stdout",
 790 |      "output_type": "stream",
 791 |      "text": [
 792 |       "\n",
 793 |       "\n",
 794 |       "==============================================\n",
 795 |       "============== Example # 137237 =================\n",
 796 |       "\n",
 797 |       "\"https://github.com/envisionnw/upland/issues/90\"\n",
 798 |       "Issue Body:\n",
 799 |       " <a href= https://github.com/ncpn ><img src= https://avatars3.githubusercontent.com/u/9699622?v=3 align= left width= 96 height= 96 hspace= 10 ></img></a> issue by ncpn https://github.com/ncpn _friday mar 17, 2017 at 19:31 gmt_\n",
 800 |       "_originally opened as https://github.com/ncpn/upland/issues/90_ ---- check for odd species after completing the plot. compare with list of prior year's species. paper list \n",
 801 |       "\n",
 802 |       "Original Title:\n",
 803 |       " end of plot issues - identify odd species\n",
 804 |       "\n",
 805 |       "****** Machine Generated Title (Prediction) ******:\n",
 806 |       " closed species plot not working\n",
 807 |       "\n",
 808 |       "\n",
 809 |       "==============================================\n",
 810 |       "============== Example # 132413 =================\n",
 811 |       "\n",
 812 |       "\"https://github.com/open-organization-ambassadors/open-org-it-culture/issues/38\"\n",
 813 |       "Issue Body:\n",
 814 |       " need to include a specific call to the opensource.com writers list during the announcement part of the book series process. \n",
 815 |       "\n",
 816 |       "Original Title:\n",
 817 |       " update announcement process\n",
 818 |       "\n",
 819 |       "****** Machine Generated Title (Prediction) ******:\n",
 820 |       " add a list of the book series to the book\n",
 821 |       "\n",
 822 |       "\n",
 823 |       "==============================================\n",
 824 |       "============== Example # 110893 =================\n",
 825 |       "\n",
 826 |       "\"https://github.com/arquillian/arquillian-cube/issues/795\"\n",
 827 |       "Issue Body:\n",
 828 |       " issue overview add a new property to disable detection of image stream files those ended with -is.yml from target directory. expected behaviour by default cube should not process image stream files if user does not set it. current behaviour cube always try to execute -is.yml files which can cause some problems in most of cases, for example if you are using kuberentes instead of openshift or if you use together fabric8 maven plugin with cube. \n",
 829 |       "\n",
 830 |       "Original Title:\n",
 831 |       " add a new property to disable detection of image stream files\n",
 832 |       "\n",
 833 |       "****** Machine Generated Title (Prediction) ******:\n",
 834 |       " add a way to disable image detection\n",
 835 |       "\n",
 836 |       "\n",
 837 |       "==============================================\n",
 838 |       "============== Example # 179062 =================\n",
 839 |       "\n",
 840 |       "\"https://github.com/TryGhost/Ghost/issues/9299\"\n",
 841 |       "Issue Body:\n",
 842 |       " in ghost 1.0 we set out to get rid of incremental ids. we didn't quite achieve it, as the migrations table still uses it, and i believe there is still some hardcoded expectations around the ghost owner id. regarding incremental ids in the migrations table i raised an issue on knex migrator: https://github.com/tryghost/knex-migrator/issues/91 we need to also try to get rid of reliance on ids inside of ghost itself. this issue needs more detail really - raising it as a starting point. \n",
 843 |       "\n",
 844 |       "Original Title:\n",
 845 |       " remove all reliance on incremental ids\n",
 846 |       "\n",
 847 |       "****** Machine Generated Title (Prediction) ******:\n",
 848 |       " incremental migration to oracle db\n",
 849 |       "\n",
 850 |       "\n",
 851 |       "==============================================\n",
 852 |       "============== Example # 54381 =================\n",
 853 |       "\n",
 854 |       "\"https://github.com/googlevr/gvr-unity-sdk/issues/509\"\n",
 855 |       "Issue Body:\n",
 856 |       " hi, i'm trying to get the deep link working. i can send the activity, open the app and read dashcode and get booleanextra and all that. so activating the deep link works fine and for example when i call getaction, it returns android.intent.action.view which is correct. the main problem is that getdatastring and getscheme always return null. i'm out of test ideas. do you think its a bug? i have attached the manifest file for your reference. and i'm using gvrintent.getdata that always returns null. islaunchedfromvr and getintenthashcode are working fine. and this is the command line i used to test as an example: ./adb shell am start -w -a android.intent.action.view -d shapevisual://com.shapevisual.app?wl=gfs com.shapevisual.app androidmanifest.xml.txt https://github.com/googlevr/gvr-unity-sdk/files/864522/androidmanifest.xml.txt \n",
 857 |       "\n",
 858 |       "Original Title:\n",
 859 |       " android - deep link - getdatastring always returns null\n",
 860 |       "\n",
 861 |       "****** Machine Generated Title (Prediction) ******:\n",
 862 |       " deep link and null return\n",
 863 |       "\n",
 864 |       "\n",
 865 |       "==============================================\n",
 866 |       "============== Example # 113341 =================\n",
 867 |       "\n",
 868 |       "\"https://github.com/sten626/mirror-match/issues/26\"\n",
 869 |       "Issue Body:\n",
 870 |       " right now there is no logging of any kind. read up on proper app logging in angular and add it to the app. \n",
 871 |       "\n",
 872 |       "Original Title:\n",
 873 |       " add logging to app\n",
 874 |       "\n",
 875 |       "****** Machine Generated Title (Prediction) ******:\n",
 876 |       " add logging to app\n",
 877 |       "\n",
 878 |       "\n",
 879 |       "==============================================\n",
 880 |       "============== Example # 57566 =================\n",
 881 |       "\n",
 882 |       "\"https://github.com/convox/praxis/issues/319\"\n",
 883 |       "Issue Body:\n",
 884 |       " a pro user has expressed a need for this. \n",
 885 |       "\n",
 886 |       "Original Title:\n",
 887 |       " support for ev green bar ssl certs\n",
 888 |       "\n",
 889 |       "****** Machine Generated Title (Prediction) ******:\n",
 890 |       " add a new user\n",
 891 |       "\n",
 892 |       "\n",
 893 |       "==============================================\n",
 894 |       "============== Example # 199162 =================\n",
 895 |       "\n",
 896 |       "\"https://github.com/ChurchCRM/CRM/issues/2403\"\n",
 897 |       "Issue Body:\n",
 898 |       " im presently upgrading to 2.7.2 from 2.7.1, my automated update is not working from my site, so im just uploading the new code and replacing the .htaccess and config.php . is there any other changes that i should be aware of to make sure that the update is successfull ? \n",
 899 |       "\n",
 900 |       "Original Title:\n",
 901 |       " upgrading to 2.7.2\n",
 902 |       "\n",
 903 |       "****** Machine Generated Title (Prediction) ******:\n",
 904 |       " question : how to update the code ?\n",
 905 |       "\n",
 906 |       "\n",
 907 |       "==============================================\n",
 908 |       "============== Example # 187512 =================\n",
 909 |       "\n",
 910 |       "\"https://github.com/keepassxreboot/keepassxc/issues/693\"\n",
 911 |       "Issue Body:\n",
 912 |       " i tried to enable and use yubikey on snap version but yubikey doesn't shows on the list of valid devices. i done a comparison with debian package same version 2.2.0 and works fine. so i think the problem could be related to snap access to usb devices. \n",
 913 |       "\n",
 914 |       "Original Title:\n",
 915 |       " yubikey doesn't works on snap version\n",
 916 |       "\n",
 917 |       "****** Machine Generated Title (Prediction) ******:\n",
 918 |       " snap version does n't work on ubuntu * number *\n",
 919 |       "\n",
 920 |       "\n",
 921 |       "==============================================\n",
 922 |       "============== Example # 18015 =================\n",
 923 |       "\n",
 924 |       "\"https://github.com/primefaces/primeng/issues/4456\"\n",
 925 |       "Issue Body:\n",
 926 |       " hi folks, <h3 class= first >advanced</h3> <p-fileupload name= demo url= ./upload.php onupload = onuploadhandler $event multiple= multiple accept= image/ maxfilesize= 1000000 > <ng-template ptemplate type= content > <ul ngif= uploadedfiles.length > <li ngfor= let file of uploadedfiles >{{file.name}} - {{file.size}} bytes</li> </ul> </ng-template> </p-fileupload> onuploadhandler event:any { alert 'test' ; for let file of event.files { this.uploadedfiles.push file ; } } onuploadhandler function is not working. what's wrong? i am using primeng - ^5.0.0-rc.0. any idea? \n",
 927 |       "\n",
 928 |       "Original Title:\n",
 929 |       " onupload function is not working!\n",
 930 |       "\n",
 931 |       "****** Machine Generated Title (Prediction) ******:\n",
 932 |       " how to use this with multiple angular - cli * number *\n",
 933 |       "\n",
 934 |       "\n",
 935 |       "==============================================\n",
 936 |       "============== Example # 153048 =================\n",
 937 |       "\n",
 938 |       "\"https://github.com/imabug/raddb/issues/212\"\n",
 939 |       "Issue Body:\n",
 940 |       " adding a new test type is broken. this error is produced when the submit button is pressed. httpexception in handler.php line 133: this action is unauthorized. \n",
 941 |       "\n",
 942 |       "Original Title:\n",
 943 |       " error adding new test type\n",
 944 |       "\n",
 945 |       "****** Machine Generated Title (Prediction) ******:\n",
 946 |       " new test issue\n",
 947 |       "\n",
 948 |       "\n",
 949 |       "==============================================\n",
 950 |       "============== Example # 28004 =================\n",
 951 |       "\n",
 952 |       "\"https://github.com/Carthage/Carthage/issues/1936\"\n",
 953 |       "Issue Body:\n",
 954 |       " one of the more confusing parts around how carthage installation works currently is that it requires a dylib itself with many more embedded dylibs to be installed alongside the carthage binary carthagekit.framework . if we have support for 1379, would it be possible to statically link carthage's dependencies into the primary carthage binary? if so, it could mean that there would be only one file to install. the primary unanswered question in my mind is whether it's possible to statically link against the swift core dylibs. does anyone know if there's an exposed way to do this? they seem to be present at this path /applications/xcode.app/contents/developer/toolchains/xcodedefault.xctoolchain/usr/lib/swift_static/macosx , so perhaps it is as simple as linking against these .a files. it would be a little strange for carthage to use the non-default flow for embedding built frameworks, but it is a cli rather than an app so perhaps this may be a good choice for this scenario. additionally, this would likely resolve issues where another version of carthagekit.framework steaks into a user's @rpath before the one that they just downloaded, causing a new version of the carthage binary to use the wrong version of carthagekit.framework . thoughts? thanks for reading. \n",
 955 |       "\n",
 956 |       "Original Title:\n",
 957 |       " statically link carthage frameworks into carthage?\n",
 958 |       "\n",
 959 |       "****** Machine Generated Title (Prediction) ******:\n",
 960 |       " add carthage support for carthage\n",
 961 |       "\n",
 962 |       "\n",
 963 |       "==============================================\n",
 964 |       "============== Example # 131367 =================\n",
 965 |       "\n",
 966 |       "\"https://github.com/qlicker/qlicker/issues/341\"\n",
 967 |       "Issue Body:\n",
 968 |       " when looking at the course details as a professor, the message add ta to ... shows up when clicking the add student button. \n",
 969 |       "\n",
 970 |       "Original Title:\n",
 971 |       " add student to course displays the wrong message\n",
 972 |       "\n",
 973 |       "****** Machine Generated Title (Prediction) ******:\n",
 974 |       " add ta to ta course\n",
 975 |       "\n",
 976 |       "\n",
 977 |       "==============================================\n",
 978 |       "============== Example # 82152 =================\n",
 979 |       "\n",
 980 |       "\"https://github.com/RetroWoW/RetroWoW/issues/96\"\n",
 981 |       "Issue Body:\n",
 982 |       " description : hunter pets are not summoned upon res in battleground current behaviour : hunter pets are not summoned upon res in battleground expected behaviour : spirit guides in battlegrounds should summon/resurrect your current pet when the hunter is resurrected. steps to reproduce the problem : 1. die in a bg 2. get resurected 3. source: http://wowwiki.wikia.com/wiki/patch_1.5.0 \n",
 983 |       "\n",
 984 |       "Original Title:\n",
 985 |       " hunter pets are not summoned upon res in battleground\n",
 986 |       "\n",
 987 |       "****** Machine Generated Title (Prediction) ******:\n",
 988 |       " * number*.2 * number*.5 not possible to be used in a new\n",
 989 |       "\n",
 990 |       "\n",
 991 |       "==============================================\n",
 992 |       "============== Example # 160809 =================\n",
 993 |       "\n",
 994 |       "\"https://github.com/kubernetes/ingress-nginx/issues/1825\"\n",
 995 |       "Issue Body:\n",
 996 |       " ie 11 does not support permanent redirect 308 with default headers, so it might not be the best default. it was introduced in this pull request: https://github.com/kubernetes/ingress-nginx/pull/1776 you could also support a fall back mode based on user agent: https://stackoverflow.com/questions/37701100/redirecting-ie-7-and-ie-11-by-useragent-nginx-config it might be possible to get ie 11 to support permanent redirect 308 if the redirect page presented does not trigger compatibility mode, but older versions of ie still won't support 308. \n",
 997 |       "\n",
 998 |       "Original Title:\n"
 999 |      ]
1000 |     },
1001 |     {
1002 |      "name": "stdout",
1003 |      "output_type": "stream",
1004 |      "text": [
1005 |       " permanent redirect 308 not supported in ie11\n",
1006 |       "\n",
1007 |       "****** Machine Generated Title (Prediction) ******:\n",
1008 |       " redirect to * number * redirect does not work\n",
1009 |       "\n",
1010 |       "\n",
1011 |       "==============================================\n",
1012 |       "============== Example # 197532 =================\n",
1013 |       "\n",
1014 |       "\"https://github.com/ngrx/platform/issues/49\"\n",
1015 |       "Issue Body:\n",
1016 |       " export const selectfeature = createfeatureselector<featurestate> 'feature' ; ~~~~~~~~~~~~~~~ error ts4023: exported variable 'selectfeature' has or is using name 'memoizedselector' from external module .../ngrx/modules/store/src/selector but cannot be named. \n",
1017 |       "\n",
1018 |       "Original Title:\n",
1019 |       " memoizedselector needs to be exported as well\n",
1020 |       "\n",
1021 |       "****** Machine Generated Title (Prediction) ******:\n",
1022 |       " export ' ' : ' can not be used in ' module\n",
1023 |       "\n",
1024 |       "\n",
1025 |       "==============================================\n",
1026 |       "============== Example # 163719 =================\n",
1027 |       "\n",
1028 |       "\"https://github.com/aspnet/StaticFiles/issues/211\"\n",
1029 |       "Issue Body:\n",
1030 |       " staticfiles/src/microsoft.aspnet.staticfiles/fileextensioncontenttypeprovider.cs is missing the outlook .msg mimetype - currently manually doing the following: var provider = new fileextensioncontenttypeprovider ; provider.mappings.add .msg , application/vnd.ms-outlook ; ... but i think it would be good to have it included directly in the code. \n",
1031 |       "\n",
1032 |       "Original Title:\n",
1033 |       " missing .msg mimetype mapping\n",
1034 |       "\n",
1035 |       "****** Machine Generated Title (Prediction) ******:\n",
1036 |       " missing outlook / auto - parsing of the source - code\n",
1037 |       "\n",
1038 |       "\n",
1039 |       "==============================================\n",
1040 |       "============== Example # 169328 =================\n",
1041 |       "\n",
1042 |       "\"https://github.com/epics-modules/autosave/issues/13\"\n",
1043 |       "Issue Body:\n",
1044 |       " tech talk message as follows: > hello, > > > here at slac, we saw that autosave is failing to recover the data for a waveform with 1 element. for testing purposes, we changed manually nelm to 2 and the recovery succeeded. another test was to manually edit the sav file, adding the keyword @array@ and the recovering succeeded, too.​ > > > i saw the following comment in 5.4.1 release: previously, restoring an array which had been saved with zero or one values failed. also, manual restore including restore by configmenu of any array pv caused a seg fault. . > > > as we are using 5.7.1, i think this problem is already corrected since 5.4.1. the behavior was observed when using epics 3.15. > > > the strange thing is that the same version of autosave seems to be working in epics 3.14, but not in 3.15. > > > i saw that autosave uses ca_element_count from the channel access api. maybe something changed in this function in epics 3.15? > > > thank you for your help. > > > márcio paduan donadio > > system control engineer - slac > \n",
1045 |       "\n",
1046 |       "Original Title:\n",
1047 |       " recovering data from waveform with 1 element\n",
1048 |       "\n",
1049 |       "****** Machine Generated Title (Prediction) ******:\n",
1050 |       " recover the data from the * number *\n",
1051 |       "\n",
1052 |       "\n",
1053 |       "==============================================\n",
1054 |       "============== Example # 85076 =================\n",
1055 |       "\n",
1056 |       "\"https://github.com/kristoferjoseph/flexboxgrid/issues/233\"\n",
1057 |       "Issue Body:\n",
1058 |       " hello! when i using auto width: <div class= row center-xs center-sm center-md center-lg > <div class= col-xs col-sm col-md col-lg > <div class= box top bottom id= white >1</div> </div> <div class= col-xs col-sm col-md col-lg > <div class= box top bottom id= white >2</div> </div> </div> in chrome, firefox, vivaldi and android devices, all ok - content is transferred as filling: ! screenshot at 15 15-23-31 https://cloud.githubusercontent.com/assets/13396947/22974363/ff5ee854-f392-11e6-91ef-01844d8f655d.png but in safari om macos , displayed content in one row and add horizontal scroll: ! screenshot at 15 15-27-25 https://cloud.githubusercontent.com/assets/13396947/22974432/4ff8d496-f393-11e6-8abe-c04a6029d9ef.png how can i fix it? \n",
1059 |       "\n",
1060 |       "Original Title:\n",
1061 |       " content filling on safari\n",
1062 |       "\n",
1063 |       "****** Machine Generated Title (Prediction) ******:\n",
1064 |       " table not working\n",
1065 |       "\n",
1066 |       "\n",
1067 |       "==============================================\n",
1068 |       "============== Example # 13218 =================\n",
1069 |       "\n",
1070 |       "\"https://github.com/koorellasuresh/UKRegionTest/issues/82803\"\n",
1071 |       "Issue Body:\n",
1072 |       " first from flow in uk south \n",
1073 |       "\n",
1074 |       "Original Title:\n",
1075 |       " first from flow in uk south\n",
1076 |       "\n",
1077 |       "****** Machine Generated Title (Prediction) ******:\n",
1078 |       " first from flow in uk south\n",
1079 |       "\n",
1080 |       "\n",
1081 |       "==============================================\n",
1082 |       "============== Example # 193511 =================\n",
1083 |       "\n",
1084 |       "\"https://github.com/highcharts/highcharts/issues/7347\"\n",
1085 |       "Issue Body:\n",
1086 |       " i'm using highstockcharts and recently upgraded to v6.0.3. since then, the tooltips won't be shown anymore as soon as the tooltip is higher than the actual chart. see the minimum example which i've provided. expected behaviour the tooltip should be shown. actual behaviour the tooltip is not shown if the tooltip the height is larger than the actual chart. live demo with steps to reproduce http://jsfiddle.net/n1h3q3sr/ uncomment the part teststring += <br/> not working anymore to make the tooltip visible. affected browser s chrome / firefox and most probably ie too \n",
1087 |       "\n",
1088 |       "Original Title:\n",
1089 |       " tooltip is not shown anymore if tooltip is larger than the chart\n",
1090 |       "\n",
1091 |       "****** Machine Generated Title (Prediction) ******:\n",
1092 |       " tooltip not shown on * number *\n",
1093 |       "\n",
1094 |       "\n",
1095 |       "==============================================\n",
1096 |       "============== Example # 7320 =================\n",
1097 |       "\n",
1098 |       "\"https://github.com/Criccle/GoogleCombo/issues/1\"\n",
1099 |       "Issue Body:\n",
1100 |       " unlike google chart for mendix, google combo chart for mendix cannot redraw a chart. only one chart can be drawn only once but no redraw or two charts in a page is possible. thus, this module is useless at all with this condition. \n",
1101 |       "\n",
1102 |       "Original Title:\n",
1103 |       " cannot redraw a chart by google combo chart for mendix\n",
1104 |       "\n",
1105 |       "****** Machine Generated Title (Prediction) ******:\n",
1106 |       " google charts not working\n",
1107 |       "\n",
1108 |       "\n",
1109 |       "==============================================\n",
1110 |       "============== Example # 42159 =================\n",
1111 |       "\n",
1112 |       "\"https://github.com/cviebrock/eloquent-sluggable/issues/337\"\n",
1113 |       "Issue Body:\n",
1114 |       " hello! i have a model with multiple slug fields setup like this: return 'slug_en' => 'source' => 'name_en' , 'slug_es' => 'source' => 'name_es' , 'slug_fr' => 'source' => 'name_fr' , 'slug_it' => 'source' => 'name_it' , 'slug_de' => 'source' => 'name_de' , ; i want to findbyslug on all of them, i have tried with slugkeyname but no luck. is there something im missing? thank you \n",
1115 |       "\n",
1116 |       "Original Title:\n",
1117 |       " find on multiple slug fields\n",
1118 |       "\n",
1119 |       "****** Machine Generated Title (Prediction) ******:\n",
1120 |       " multiple fields with same name\n",
1121 |       "\n",
1122 |       "\n",
1123 |       "==============================================\n",
1124 |       "============== Example # 184774 =================\n",
1125 |       "\n",
1126 |       "\"https://github.com/hylang/hy/issues/1271\"\n",
1127 |       "Issue Body:\n",
1128 |       " it was released in 2008, so it's almost 10 years old. also, we don't test it. \n",
1129 |       "\n",
1130 |       "Original Title:\n",
1131 |       " drop support for python 2.6\n",
1132 |       "\n",
1133 |       "****** Machine Generated Title (Prediction) ******:\n",
1134 |       " remove old version from * number *\n",
1135 |       "\n",
1136 |       "\n",
1137 |       "==============================================\n",
1138 |       "============== Example # 121668 =================\n",
1139 |       "\n",
1140 |       "\"https://github.com/MajkiIT/polish-ads-filter/issues/3646\"\n",
1141 |       "Issue Body:\n",
1142 |       " @majkiit w prebake jest reguła, która psuje logowanie na gg. a najwyraźniej są jeszcze osoby, które korzystają z gg i z listy prebake. więc nie wiem czy warto dać whitelist na nasz filtr czy nie, co o tym sądzisz? https://github.com/azet12/popupblocker/issues/68 issuecomment-329763381 \n",
1143 |       "\n",
1144 |       "Original Title:\n",
1145 |       " gg.pl prebake\n",
1146 |       "\n",
1147 |       "****** Machine Generated Title (Prediction) ******:\n",
1148 |       " problem z login\n",
1149 |       "\n",
1150 |       "\n",
1151 |       "==============================================\n",
1152 |       "============== Example # 34871 =================\n",
1153 |       "\n",
1154 |       "\"https://github.com/WorldDominationArmy/geodk-reqtest-req/issues/1\"\n",
1155 |       "Issue Body:\n",
1156 |       " afsnit: 3. krav til løsningens overordnede egenskaber relateret: \n",
1157 |       "\n",
1158 |       "Original Title:\n",
1159 |       " krav 1-eksterne kilder til datasupplering\n",
1160 |       "\n",
1161 |       "****** Machine Generated Title (Prediction) ******:\n",
1162 |       " * number * - * number * - * number * -\n",
1163 |       "\n",
1164 |       "\n",
1165 |       "==============================================\n",
1166 |       "============== Example # 7978 =================\n",
1167 |       "\n",
1168 |       "\"https://github.com/blockstack/blockstack-portal/issues/416\"\n",
1169 |       "Issue Body:\n",
1170 |       " i noticed that gmp is installed by the macos installer script. noticed that the library was not loaded https://github.com/blockstack/blockstack-portal/issues/415 issuecomment-294392702 for albert: library not loaded: /usr/local/opt/gmp/lib/libgmp.10.dylib referenced from: /private/tmp/blockstack-venv/lib/python2.7/site-packages/fastecdsa/curvemath.so reason: image not found he is on macos 10.12. let's see if we can reproduce this error locally. \n",
1171 |       "\n",
1172 |       "Original Title:\n",
1173 |       " testing gmp and libffi installation via script\n",
1174 |       "\n",
1175 |       "****** Machine Generated Title (Prediction) ******:\n",
1176 |       " library not loaded in macos\n",
1177 |       "\n",
1178 |       "\n",
1179 |       "==============================================\n",
1180 |       "============== Example # 28099 =================\n",
1181 |       "\n",
1182 |       "\"https://github.com/EcrituresNumeriques/transformation_jats_erudit/issues/2\"\n",
1183 |       "Issue Body:\n",
1184 |       " avons-nous une liste définitive des attributs possible de 'fig-type' pour l'extrant de jats? le balisage de mon côté, pour érudit, dépend de la valeur sémantique de l'attribut de cette balise et je voudrais pouvoir styler les différents cas de figures haha , qui sont : <figure>, <tableau>, <encadre>, <objetmedia>, pour les images et le son. merci. \n",
1185 |       "\n",
1186 |       "Original Title:\n",
1187 |       " attributs possibles pour <fig> sous jats\n",
1188 |       "\n",
1189 |       "****** Machine Generated Title (Prediction) ******:\n",
1190 |       " * number * : gestion des dates\n",
1191 |       "\n",
1192 |       "\n",
1193 |       "==============================================\n",
1194 |       "============== Example # 24459 =================\n",
1195 |       "\n",
1196 |       "\"https://github.com/go-gitea/gitea/issues/656\"\n",
1197 |       "Issue Body:\n",
1198 |       " when adding a new member to an organisation owner team, addteammember does not set watches for the new team member. together with 653 that is pretty confusing behaviour and probably a bug. \n",
1199 |       "\n",
1200 |       "Original Title:\n",
1201 |       " new owner team member does not get watches for org repo's\n"
1202 |      ]
1203 |     },
1204 |     {
1205 |      "name": "stdout",
1206 |      "output_type": "stream",
1207 |      "text": [
1208 |       "\n",
1209 |       "****** Machine Generated Title (Prediction) ******:\n",
1210 |       " new member does not set the team member\n",
1211 |       "\n",
1212 |       "\n",
1213 |       "==============================================\n",
1214 |       "============== Example # 64152 =================\n",
1215 |       "\n",
1216 |       "\"https://github.com/linuxboss182/SoftEng-2017/issues/84\"\n",
1217 |       "Issue Body:\n",
1218 |       " need 3-4 people to present our application to the class on wednesday. applicants must: - not have presented last week - understand how to use the application - be ready to kick ass remember, you have to present at either this wednesday or the next one, so plan accordingly! \n",
1219 |       "\n",
1220 |       "Original Title:\n",
1221 |       " iteration 2 presentation\n",
1222 |       "\n",
1223 |       "****** Machine Generated Title (Prediction) ******:\n",
1224 |       " add a new class to the application\n",
1225 |       "\n",
1226 |       "\n",
1227 |       "==============================================\n",
1228 |       "============== Example # 69032 =================\n",
1229 |       "\n",
1230 |       "\"https://github.com/kartoza/qgis.org.za/issues/184\"\n",
1231 |       "Issue Body:\n",
1232 |       " i created a form 'contact' and it seems to work but the form labels do not appear on the form so it is a bit useless. please get the labels to appear and merge and release with other improvements asap \n",
1233 |       "\n",
1234 |       "Original Title:\n",
1235 |       " form labels not appearing\n",
1236 |       "\n",
1237 |       "****** Machine Generated Title (Prediction) ******:\n",
1238 |       " form labels not showing up\n",
1239 |       "\n",
1240 |       "\n",
1241 |       "==============================================\n",
1242 |       "============== Example # 132252 =================\n",
1243 |       "\n",
1244 |       "\"https://github.com/NTU-ASH/tree-generator/issues/18\"\n",
1245 |       "Issue Body:\n",
1246 |       " sort a series of node values within the tree, e.g. -take values from 0-9 up to 15 -sort them into a tree with the middle value as the root and the lowest on the left/highest on the right -perhaps do the same for letters so a is to the left and z is to the right \n",
1247 |       "\n",
1248 |       "Original Title:\n",
1249 |       " binary search tree generation\n",
1250 |       "\n",
1251 |       "****** Machine Generated Title (Prediction) ******:\n",
1252 |       " sort tree nodes\n",
1253 |       "\n",
1254 |       "\n",
1255 |       "==============================================\n",
1256 |       "============== Example # 53765 =================\n",
1257 |       "\n",
1258 |       "\"https://github.com/multiformats/multihash/issues/74\"\n",
1259 |       "Issue Body:\n",
1260 |       " why not use the existing crypt format? $.$ \n",
1261 |       "\n",
1262 |       "Original Title:\n",
1263 |       " why not use the existing crypt format? $.$\n",
1264 |       "\n",
1265 |       "****** Machine Generated Title (Prediction) ******:\n",
1266 |       " why not use the existing format ?\n",
1267 |       "\n",
1268 |       "\n",
1269 |       "==============================================\n",
1270 |       "============== Example # 123370 =================\n",
1271 |       "\n",
1272 |       "\"https://github.com/PSEBergclubBern/BergclubBern/issues/181\"\n",
1273 |       "Issue Body:\n",
1274 |       " ich kann bilder einfügen: ! 2017-05-07 14_01_33-tourenbericht anpassen bergclub bern wordpress https://cloud.githubusercontent.com/assets/18282099/25780754/d2260da6-332d-11e7-8350-f46821b300d5.png aber auf der website werden diese nicht angezeigt: ! 2017-05-07 14_00_32-bergclub bern https://cloud.githubusercontent.com/assets/18282099/25780756/defc015c-332d-11e7-982e-e51b758c8179.png \n",
1275 |       "\n",
1276 |       "Original Title:\n",
1277 |       " bilder eines tourenberichts werden nicht angezeigt\n",
1278 |       "\n",
1279 |       "****** Machine Generated Title (Prediction) ******:\n",
1280 |       " website : update to * url *\n",
1281 |       "\n",
1282 |       "\n",
1283 |       "==============================================\n",
1284 |       "============== Example # 57636 =================\n",
1285 |       "\n",
1286 |       "\"https://github.com/postmanlabs/postman-app-support/issues/2996\"\n",
1287 |       "Issue Body:\n",
1288 |       " welcome to the postman issue tracker. any feature requests / bug reports can be posted here. any security-related bugs should be reported directly to security@getpostman.com version/app information: 1. postman version: 4.10.7 2. app chrome app or mac app : linux app not sure if its also happening on other oss 3. os details: ubuntu 14.06 4. is the interceptor on and enabled in the app: no 5. did you encounter this recently, or has this bug always been there: 6. expected behaviour: explain below steps to repoduce 7. console logs http://blog.getpostman.com/2014/01/27/enabling-chrome-developer-tools-inside-postman/ for the chrome app, view->toggle dev tools for the mac app : 8. screenshots if applicable steps to reproduce the problem: it seems postman ignores the failures if there is 1<= passed test after the failed assertion. i.e: assertion a a=true assertion b=false must fails the test assertion c c=true the final outcome of the postman test must be false because b failed. but postman shows the final results as passed because it looks at c which was true as the last line of the test which is wrong and the test easily ignores any bug and marks the test as successfull. some guidelines: 1. please file newman-related issues at https://github.com/postmanlabs/newman/issues 2. if it’s a cloud-related issue, or you want to include personal information like your username / collection names, mail us at help@getpostman.com 3. if it’s a question anything along the lines of “how do i … in postman” , the answer might lie in our documentation - http://getpostman.com/docs. \n",
1289 |       "\n",
1290 |       "Original Title:\n",
1291 |       " postman is skiping the failed assestions if the last assersion passes\n",
1292 |       "\n",
1293 |       "****** Machine Generated Title (Prediction) ******:\n",
1294 |       " feature request : add support for multiple devices\n",
1295 |       "\n",
1296 |       "\n",
1297 |       "==============================================\n",
1298 |       "============== Example # 120461 =================\n",
1299 |       "\n",
1300 |       "\"https://github.com/libgraviton/gdk-java/issues/23\"\n",
1301 |       "Issue Body:\n",
1302 |       " with 12 rql support was introduced for string and date fields. since the rql syntax varies depending on the field type, integer and float and boolean are currently not supported, since they get treated as regular string fields. lets have a look at a typical query against a string field _fieldname_ with the value _value_ ?eq fieldname,string:value in this case the string: prefix is not required. it has the same result as ?eq fieldname,value but lets look at another example again a string field ?eq fieldname,string:20 at this point the string: prefix is required, since the graviton rql parser needs to know it's dealing with a string. omitting string: would lead to an empty result on the other hand, if we look at an integer field ?eq integerfieldname,string:20 would lead to an empty result. in this case the query needs to look like ?eq integerfieldname,string:20 the part that needs changing is https://github.com/libgraviton/gdk-java/blob/develop/gdk-core/src/main/java/com/github/libgraviton/gdk/api/query/rql/rql.java l141 where currently every field is always treated as string. \n",
1303 |       "\n",
1304 |       "Original Title:\n",
1305 |       " integer, float and boolean support for rql\n",
1306 |       "\n",
1307 |       "****** Machine Generated Title (Prediction) ******:\n",
1308 |       " support for numeric type\n",
1309 |       "\n",
1310 |       "\n",
1311 |       "==============================================\n",
1312 |       "============== Example # 3333 =================\n",
1313 |       "\n",
1314 |       "\"https://github.com/jpvillaisaza/hangman/issues/15\"\n",
1315 |       "Issue Body:\n",
1316 |       " losing a game and then restarting shouldn't count as two more games. just one, thanks. \n",
1317 |       "\n",
1318 |       "Original Title:\n",
1319 |       " fix total number of games\n",
1320 |       "\n",
1321 |       "****** Machine Generated Title (Prediction) ******:\n",
1322 |       " game crashes when game is running\n",
1323 |       "\n",
1324 |       "\n",
1325 |       "==============================================\n",
1326 |       "============== Example # 133450 =================\n",
1327 |       "\n",
1328 |       "\"https://github.com/vector-im/riot-meta/issues/28\"\n",
1329 |       "Issue Body:\n",
1330 |       " placeholder overarching issue to track progress on: general ux polish should probably be decomposed further. \n",
1331 |       "\n",
1332 |       "Original Title:\n",
1333 |       " general ux polish\n",
1334 |       "\n",
1335 |       "****** Machine Generated Title (Prediction) ******:\n",
1336 |       " add more info to the ui\n",
1337 |       "\n",
1338 |       "\n",
1339 |       "==============================================\n",
1340 |       "============== Example # 111482 =================\n",
1341 |       "\n",
1342 |       "\"https://github.com/Viva-con-Agua/drops/issues/21\"\n",
1343 |       "Issue Body:\n",
1344 |       " currently, the view for defining the roles is very confusing. a search field for searching users has to be implemented and the role selection should be a little bit more user friendly. \n",
1345 |       "\n",
1346 |       "Original Title:\n",
1347 |       " roles definition view\n",
1348 |       "\n",
1349 |       "****** Machine Generated Title (Prediction) ******:\n",
1350 |       " improve search for user roles\n",
1351 |       "\n",
1352 |       "\n",
1353 |       "==============================================\n",
1354 |       "============== Example # 154925 =================\n",
1355 |       "\n",
1356 |       "\"https://github.com/srusskih/SublimeJEDI/issues/228\"\n",
1357 |       "Issue Body:\n",
1358 |       " i want edit my project config file. according to the readme , by default project config name is <project name>.sublime-project , so the project is the folder that holds the project py file? \n",
1359 |       "\n",
1360 |       "Original Title:\n",
1361 |       " how to define a project ?\n",
1362 |       "\n",
1363 |       "****** Machine Generated Title (Prediction) ******:\n",
1364 |       " how to edit project name ?\n",
1365 |       "\n",
1366 |       "\n",
1367 |       "==============================================\n",
1368 |       "============== Example # 18851 =================\n",
1369 |       "\n",
1370 |       "\"https://github.com/climategadgets/servomaster/issues/7\"\n",
1371 |       "Issue Body:\n",
1372 |       " adafruit dc & stepper motor hat for raspberry pi - mini kit https://www.adafruit.com/product/2348 provides a very reproducible and standard stepper controller solution for raspberry pi, it would be a shame not to support it. this enhancement is much more complicated than 6, though. steppers, unlike servos, do not have inherent limits, and if a stepper is used as a servo, there will have to be solutions put in place to allow limit detection limit switches and torque sensors, to name a couple . in addition, stepper positioning model discrete steps is different from servo positioning model floating point 0 to 1 with adjustable ranges and limits , so some extra work will need to be done. \n",
1373 |       "\n",
1374 |       "Original Title:\n",
1375 |       " implement tb6612 driver for raspberry pi\n",
1376 |       "\n",
1377 |       "****** Machine Generated Title (Prediction) ******:\n",
1378 |       " rpi motor support\n",
1379 |       "\n",
1380 |       "\n",
1381 |       "==============================================\n",
1382 |       "============== Example # 174664 =================\n",
1383 |       "\n",
1384 |       "\"https://github.com/cawilliamson/ansible-gpdpocket/issues/98\"\n",
1385 |       "Issue Body:\n",
1386 |       " first off, thanks for all the effort going into this, very promising. issue: trying to bootstrap an ubuntu-16.04.3 iso from within an existing ubuntu instance. running into an error, which appears to be when ansible starts getting involved. very possible i'm doing something wrong. e: can not write log is /dev/pts mounted? - posix_openpt 2: no such file or directory + grep -wq -- --nogit + echo 'skip pulling source from git' + cd /usr/src/ansible-gpdpocket + ansible_nocows=1 + ansible-playbook system.yml -e bootstrap=true -v warning : provided hosts list is empty, only localhost is available error! syntax error while loading yaml. the error appears to have been in '/usr/src/ansible-gpdpocket/roles/audio/tasks/main.yml': line 17, column 1, but may be elsewhere in the file depending on the exact syntax problem. the offending line appears to be: - name: create chtrt5645 directory ^ here play recap localhost : ok=23 changed=14 unreachable=0 failed=1 \n",
1387 |       "\n",
1388 |       "Original Title:\n",
1389 |       " syntax error while loading yaml\n",
1390 |       "\n",
1391 |       "****** Machine Generated Title (Prediction) ******:\n",
1392 |       " bootstrap fails to mount in ubuntu\n",
1393 |       "\n",
1394 |       "\n",
1395 |       "==============================================\n",
1396 |       "============== Example # 186883 =================\n",
1397 |       "\n",
1398 |       "\"https://github.com/prettydiff/prettydiff/issues/456\"\n",
1399 |       "Issue Body:\n",
1400 |       " right now a single language file handles all tasks for a given group of languages. these files need to be broken down into respective pieces: parser beautifier minifier analyzer this is a large architectural effort. fortunately the code is well segmented internally for separation of concerns, so the logic can be broken apart without impact to operational integrity. the challenge is largely administration to ensure all the pieces are included into each of the respective environments and pass data among each other appropriately. \n",
1401 |       "\n",
1402 |       "Original Title:\n",
1403 |       " separate language files into their respective tasks\n",
1404 |       "\n",
1405 |       "****** Machine Generated Title (Prediction) ******:\n",
1406 |       " fix language handling for all languages\n",
1407 |       "\n",
1408 |       "\n",
1409 |       "==============================================\n",
1410 |       "============== Example # 151593 =================\n",
1411 |       "\n",
1412 |       "\"https://github.com/koorellasuresh/UKRegionTest/issues/21568\"\n",
1413 |       "Issue Body:\n",
1414 |       " first from flow in uk south \n",
1415 |       "\n",
1416 |       "Original Title:\n",
1417 |       " first from flow in uk south\n"
1418 |      ]
1419 |     },
1420 |     {
1421 |      "name": "stdout",
1422 |      "output_type": "stream",
1423 |      "text": [
1424 |       "\n",
1425 |       "****** Machine Generated Title (Prediction) ******:\n",
1426 |       " first from flow in uk south\n",
1427 |       "\n",
1428 |       "\n",
1429 |       "==============================================\n",
1430 |       "============== Example # 24718 =================\n",
1431 |       "\n",
1432 |       "\"https://github.com/sensorario/go-tris/issues/34\"\n",
1433 |       "Issue Body:\n",
1434 |       " move 1 simone : 5 move 2 computer : 2 move 3 simone : 9 move 4 computer : 1 move 5 simone : 3 move 6 computer : 6 move 7 simone : 8 move 8 computer : 7 move 9 simone : 4 \n",
1435 |       "\n",
1436 |       "Original Title:\n",
1437 |       " in this case computer loose\n",
1438 |       "\n",
1439 |       "****** Machine Generated Title (Prediction) ******:\n",
1440 |       " move to * number *\n",
1441 |       "\n",
1442 |       "\n",
1443 |       "==============================================\n",
1444 |       "============== Example # 2005 =================\n",
1445 |       "\n",
1446 |       "\"https://github.com/fossasia/susi_firefoxbot/issues/6\"\n",
1447 |       "Issue Body:\n",
1448 |       " actual behaviour only text response from the server is shown expected behaviour support different types of responses like images, links, tables etc. would you like to work on the issue ? yes \n",
1449 |       "\n",
1450 |       "Original Title:\n",
1451 |       " support for different types of responses from server\n",
1452 |       "\n",
1453 |       "****** Machine Generated Title (Prediction) ******:\n",
1454 |       " support for different types of response\n",
1455 |       "\n",
1456 |       "\n",
1457 |       "==============================================\n",
1458 |       "============== Example # 144769 =================\n",
1459 |       "\n",
1460 |       "\"https://github.com/reallyenglish/ansible-role-poudriere/issues/8\"\n",
1461 |       "Issue Body:\n",
1462 |       " the role clones a remote git repository, which takes time to clone. to make the test faster, create a small, but functional repository in the role, and use it for the test. \n",
1463 |       "\n",
1464 |       "Original Title:\n",
1465 |       " create minimal ports tree for the test\n",
1466 |       "\n",
1467 |       "****** Machine Generated Title (Prediction) ******:\n",
1468 |       " add a test to the repo\n",
1469 |       "\n",
1470 |       "\n",
1471 |       "==============================================\n",
1472 |       "============== Example # 148842 =================\n",
1473 |       "\n",
1474 |       "\"https://github.com/felquis/HTJSON/issues/2\"\n",
1475 |       "Issue Body:\n",
1476 |       " firstly - thanks for making this, i had the same idea. but i would do it slightly differently. exactly 2 differerences. 1. i'd make content an array 2. i'd more the objects inside attr down a level and get rid of it. thus content would be an attribute. for example, instead of : var template = { a : { attr : { href : http://your-domain.com/images/any-image.jpg }, content: { link name } } }; it'd be: var template = a : { href : http://your-domain.com/images/any-image.jpg , content : some text , { img : { src: http://whatever.jpg }, some more text } ; 1. makes it more compact, without losing any document structure information 2. makes it more versatile, and, in fact, makes it complete - it can then encode any html document. \n",
1477 |       "\n",
1478 |       "Original Title:\n",
1479 |       " shouldn't content be an array? is attr really neccessary?\n",
1480 |       "\n",
1481 |       "****** Machine Generated Title (Prediction) ******:\n",
1482 |       " content - type : attribute\n",
1483 |       "\n",
1484 |       "\n",
1485 |       "==============================================\n",
1486 |       "============== Example # 83915 =================\n",
1487 |       "\n",
1488 |       "\"https://github.com/rrdelaney/ava-rethinkdb/issues/3\"\n",
1489 |       "Issue Body:\n",
1490 |       " when i run the ava-rethinkdb it works but when i ran it through travis ci i get error: spawn rethinkdb enoent is there something i am doing wrong or need to add for ci build? \n",
1491 |       "\n",
1492 |       "Original Title:\n",
1493 |       " error: spawn rethinkdb enoent\n",
1494 |       "\n",
1495 |       "****** Machine Generated Title (Prediction) ******:\n",
1496 |       " spawn enoent on ci\n",
1497 |       "\n",
1498 |       "\n",
1499 |       "==============================================\n",
1500 |       "============== Example # 22941 =================\n",
1501 |       "\n",
1502 |       "\"https://github.com/cartalyst/stripe/issues/90\"\n",
1503 |       "Issue Body:\n",
1504 |       " i am using your latest release 2.0.9 but that release does not include the payout file. kidnly upload the latest release that has the payout work. \n",
1505 |       "\n",
1506 |       "Original Title:\n",
1507 |       " payout file is missing in latest release.\n",
1508 |       "\n",
1509 |       "****** Machine Generated Title (Prediction) ******:\n",
1510 |       " release * number*.1 missing\n"
1511 |      ]
1512 |     }
1513 |    ],
1514 |    "source": [
1515 |     "# this method displays the predictions on random rows of the holdout set\n",
1516 |     "seq2seq_inf.demo_model_predictions(n=50, issue_df=testdf)"
1517 |    ]
1518 |   },
1519 |   {
1520 |    "cell_type": "markdown",
1521 |    "metadata": {},
1522 |    "source": [
1523 |     "# Feature Extraction Demo"
1524 |    ]
1525 |   },
1526 |   {
1527 |    "cell_type": "code",
1528 |    "execution_count": 68,
1529 |    "metadata": {
1530 |     "collapsed": true
1531 |    },
1532 |    "outputs": [],
1533 |    "source": [
1534 |     "# Read All 5M data points\n",
1535 |     "all_data_df = pd.read_csv('github_issues.csv')\n",
1536 |     "# Extract the bodies from this dataframe\n",
1537 |     "all_data_bodies = all_data_df['body'].tolist()"
1538 |    ]
1539 |   },
1540 |   {
1541 |    "cell_type": "code",
1542 |    "execution_count": 70,
1543 |    "metadata": {
1544 |     "collapsed": true
1545 |    },
1546 |    "outputs": [],
1547 |    "source": [
1548 |     "# transform all of the data using the ktext processor\n",
1549 |     "all_data_vectorized = body_pp.transform_parallel(all_data_bodies)"
1550 |    ]
1551 |   },
1552 |   {
1553 |    "cell_type": "code",
1554 |    "execution_count": 71,
1555 |    "metadata": {
1556 |     "collapsed": true
1557 |    },
1558 |    "outputs": [],
1559 |    "source": [
1560 |     "# save transformed data\n",
1561 |     "with open('all_data_vectorized.dpkl', 'wb') as f:\n",
1562 |     "    dpickle.dump(all_data_vectorized, f)"
1563 |    ]
1564 |   },
1565 |   {
1566 |    "cell_type": "code",
1567 |    "execution_count": 262,
1568 |    "metadata": {
1569 |     "collapsed": true
1570 |    },
1571 |    "outputs": [],
1572 |    "source": [
1573 |     "%reload_ext autoreload\n",
1574 |     "%autoreload 2\n",
1575 |     "from seq2seq_utils import Seq2Seq_Inference\n",
1576 |     "seq2seq_inf_rec = Seq2Seq_Inference(encoder_preprocessor=body_pp,\n",
1577 |     "                                    decoder_preprocessor=title_pp,\n",
1578 |     "                                    seq2seq_model=seq2seq_Model)\n",
1579 |     "recsys_annoyobj = seq2seq_inf_rec.prepare_recommender(all_data_vectorized, all_data_df)"
1580 |    ]
1581 |   },
1582 |   {
1583 |    "cell_type": "markdown",
1584 |    "metadata": {
1585 |     "collapsed": true
1586 |    },
1587 |    "source": [
1588 |     "### Example 1: Issues Installing Python Packages"
1589 |    ]
1590 |   },
1591 |   {
1592 |    "cell_type": "code",
1593 |    "execution_count": 223,
1594 |    "metadata": {},
1595 |    "outputs": [
1596 |     {
1597 |      "name": "stdout",
1598 |      "output_type": "stream",
1599 |      "text": [
1600 |       "\n",
1601 |       "\n",
1602 |       "==============================================\n",
1603 |       "============== Example # 13563 =================\n",
1604 |       "\n",
1605 |       "\"https://github.com/bnosac/pattern.nlp/issues/5\"\n",
1606 |       "Issue Body:\n",
1607 |       " thanks for your package, i can't wait to use it. unfortunately i have issues with the installation. prerequisite is 'first install python version 2.5+ not version 3 '. so this package cant be used with version 3.6 64bit that i have installed? i nevertheless tried to install it using pip, conda is not supported? but got an error: 'syntaxerror: missing parentheses in call to 'print''. besides when i try to run the library in r version 3.3.3. 64 bit i got errors with can_find_python_cmd required_modules = pattern.db : 'error in find_python_cmd......' pattern seems to be written in python but must be used in r, why cant it be used in python? i found another python pattern application that apparently does the same in python: https://pypi.python.org/pypi/pattern how is this related? \n",
1608 |       "\n",
1609 |       "Original Title:\n",
1610 |       " error installation python\n",
1611 |       "\n",
1612 |       "****** Machine Generated Title (Prediction) ******:\n",
1613 |       " install with python * number *\n",
1614 |       "\n",
1615 |       "**** Similar Issues (using encoder embedding) ****:\n",
1616 |       "\n"
1617 |      ]
1618 |     },
1619 |     {
1620 |      "data": {
1621 |       "text/html": [
1622 |        "<div>\n",
1623 |        "<style scoped>\n",
1624 |        "    .dataframe tbody tr th:only-of-type {\n",
1625 |        "        vertical-align: middle;\n",
1626 |        "    }\n",
1627 |        "\n",
1628 |        "    .dataframe tbody tr th {\n",
1629 |        "        vertical-align: top;\n",
1630 |        "    }\n",
1631 |        "\n",
1632 |        "    .dataframe thead th {\n",
1633 |        "        text-align: right;\n",
1634 |        "    }\n",
1635 |        "</style>\n",
1636 |        "<table border=\"1\" class=\"dataframe\">\n",
1637 |        "  <thead>\n",
1638 |        "    <tr style=\"text-align: right;\">\n",
1639 |        "      <th></th>\n",
1640 |        "      <th>issue_url</th>\n",
1641 |        "      <th>issue_title</th>\n",
1642 |        "      <th>body</th>\n",
1643 |        "      <th>dist</th>\n",
1644 |        "    </tr>\n",
1645 |        "  </thead>\n",
1646 |        "  <tbody>\n",
1647 |        "    <tr>\n",
1648 |        "      <th>286906</th>\n",
1649 |        "      <td>\"https://github.com/scikit-hep/root_numpy/issues/337\"</td>\n",
1650 |        "      <td>root 6.10/02 and root_numpy compatibility</td>\n",
1651 |        "      <td>i am trying to pip install root_pandas and one of the dependency is root_numpy however some weird reasons i am unable to install it even though i can import root in python. i am working on python3.6 as i am more comfortable with it. is root_numpy is not yet compatible with the latest root?</td>\n",
1652 |        "      <td>0.694671</td>\n",
1653 |        "    </tr>\n",
1654 |        "    <tr>\n",
1655 |        "      <th>314005</th>\n",
1656 |        "      <td>\"https://github.com/andim/noisyopt/issues/4\"</td>\n",
1657 |        "      <td>joss review: installing dependencies via pip</td>\n",
1658 |        "      <td>hi, i'm trying to install noisyopt in a clean conda environment running python 3.5. running pip install noisyopt does not install the dependencies numpy, scipy . i see that you do include a requires keyword argument in your setup.py file, does this need to be install_requires ? as in https://packaging.python.org/requirements/ . also, not necessary if you don't want to, but i think it would be good to include a list of dependences somewhere in the readme.</td>\n",
1659 |        "      <td>0.698265</td>\n",
1660 |        "    </tr>\n",
1661 |        "    <tr>\n",
1662 |        "      <th>48120</th>\n",
1663 |        "      <td>\"https://github.com/turi-code/SFrame/issues/389\"</td>\n",
1664 |        "      <td>python 3.6 compatible</td>\n",
1665 |        "      <td>hi: i tried to install sframe using pip and conda but i can not find anything that will work with python 3.6? has sframe been updated to work with python 3.6 yet? thanks, drew</td>\n",
1666 |        "      <td>0.718715</td>\n",
1667 |        "    </tr>\n",
1668 |        "  </tbody>\n",
1669 |        "</table>\n",
1670 |        "</div>"
1671 |       ],
1672 |       "text/plain": [
1673 |        "                                                    issue_url  \\\n",
1674 |        "286906  \"https://github.com/scikit-hep/root_numpy/issues/337\"   \n",
1675 |        "314005           \"https://github.com/andim/noisyopt/issues/4\"   \n",
1676 |        "48120        \"https://github.com/turi-code/SFrame/issues/389\"   \n",
1677 |        "\n",
1678 |        "                                         issue_title  \\\n",
1679 |        "286906     root 6.10/02 and root_numpy compatibility   \n",
1680 |        "314005  joss review: installing dependencies via pip   \n",
1681 |        "48120                          python 3.6 compatible   \n",
1682 |        "\n",
1683 |        "                                                                                                                                                                                                                                                                                                                                                                                                                                                                              body  \\\n",
1684 |        "286906                                                                                                                                                                          i am trying to pip install root_pandas and one of the dependency is root_numpy however some weird reasons i am unable to install it even though i can import root in python. i am working on python3.6 as i am more comfortable with it. is root_numpy is not yet compatible with the latest root?   \n",
1685 |        "314005  hi, i'm trying to install noisyopt in a clean conda environment running python 3.5. running pip install noisyopt does not install the dependencies numpy, scipy . i see that you do include a requires keyword argument in your setup.py file, does this need to be install_requires ? as in https://packaging.python.org/requirements/ . also, not necessary if you don't want to, but i think it would be good to include a list of dependences somewhere in the readme.   \n",
1686 |        "48120                                                                                                                                                                                                                                                                                              hi: i tried to install sframe using pip and conda but i can not find anything that will work with python 3.6? has sframe been updated to work with python 3.6 yet? thanks, drew   \n",
1687 |        "\n",
1688 |        "            dist  \n",
1689 |        "286906  0.694671  \n",
1690 |        "314005  0.698265  \n",
1691 |        "48120   0.718715  "
1692 |       ]
1693 |      },
1694 |      "metadata": {},
1695 |      "output_type": "display_data"
1696 |     }
1697 |    ],
1698 |    "source": [
1699 |     "seq2seq_inf_rec.demo_model_predictions(n=1, issue_df=testdf, threshold=1)"
1700 |    ]
1701 |   },
1702 |   {
1703 |    "cell_type": "markdown",
1704 |    "metadata": {},
1705 |    "source": [
1706 |     "### Example 2:  Issues asking for feature improvements"
1707 |    ]
1708 |   },
1709 |   {
1710 |    "cell_type": "code",
1711 |    "execution_count": 226,
1712 |    "metadata": {},
1713 |    "outputs": [
1714 |     {
1715 |      "name": "stdout",
1716 |      "output_type": "stream",
1717 |      "text": [
1718 |       "\n",
1719 |       "\n",
1720 |       "==============================================\n",
1721 |       "============== Example # 157322 =================\n",
1722 |       "\n",
1723 |       "\"https://github.com/Chingu-cohorts/devgaido/issues/89\"\n",
1724 |       "Issue Body:\n",
1725 |       " right now, your profile link is https://devgaido.com/profile. this is fine, but it would be really cool if there was a way to share your profile with other people. on my portfolio, i have social media buttons to freecodecamp, github, ect. without a custom link, i cannot show-off what i have done on devgaido to future employers. \n",
1726 |       "\n",
1727 |       "Original Title:\n",
1728 |       " feature request: sharable profile.\n",
1729 |       "\n",
1730 |       "****** Machine Generated Title (Prediction) ******:\n",
1731 |       " add a link to your profile\n",
1732 |       "\n",
1733 |       "**** Similar Issues (using encoder embedding) ****:\n",
1734 |       "\n"
1735 |      ]
1736 |     },
1737 |     {
1738 |      "data": {
1739 |       "text/html": [
1740 |        "<div>\n",
1741 |        "<style scoped>\n",
1742 |        "    .dataframe tbody tr th:only-of-type {\n",
1743 |        "        vertical-align: middle;\n",
1744 |        "    }\n",
1745 |        "\n",
1746 |        "    .dataframe tbody tr th {\n",
1747 |        "        vertical-align: top;\n",
1748 |        "    }\n",
1749 |        "\n",
1750 |        "    .dataframe thead th {\n",
1751 |        "        text-align: right;\n",
1752 |        "    }\n",
1753 |        "</style>\n",
1754 |        "<table border=\"1\" class=\"dataframe\">\n",
1755 |        "  <thead>\n",
1756 |        "    <tr style=\"text-align: right;\">\n",
1757 |        "      <th></th>\n",
1758 |        "      <th>issue_url</th>\n",
1759 |        "      <th>issue_title</th>\n",
1760 |        "      <th>body</th>\n",
1761 |        "      <th>dist</th>\n",
1762 |        "    </tr>\n",
1763 |        "  </thead>\n",
1764 |        "  <tbody>\n",
1765 |        "    <tr>\n",
1766 |        "      <th>250423</th>\n",
1767 |        "      <td>\"https://github.com/ParabolInc/action/issues/1379\"</td>\n",
1768 |        "      <td>integrations list view discoverability</td>\n",
1769 |        "      <td>issue - enhancement i was initially confused by the link to my account copy; seeing github in the integrations list made me think it had already been set up . i realize now that i had to allow parabol to post as me. i think that link to my account could use a tooltip explaining what link means, and why you'd want to do so. &lt;img width= 728 alt= screen shot 2017-09-29 at 10 52 05 am src= https://user-images.githubusercontent.com/2146312/31024786-2fd39c46-a50e-11e7-9f2a-6d4a5ed2baeb.png &gt;</td>\n",
1770 |        "      <td>0.748828</td>\n",
1771 |        "    </tr>\n",
1772 |        "    <tr>\n",
1773 |        "      <th>222304</th>\n",
1774 |        "      <td>\"https://github.com/viosey/hexo-theme-material/issues/166\"</td>\n",
1775 |        "      <td>allow us to use sns-share for github</td>\n",
1776 |        "      <td>i'd love to be able to add a link at the bottom of the page for my github account. however, the sns-share option doesn't currently seem to be able to do this.</td>\n",
1777 |        "      <td>0.774398</td>\n",
1778 |        "    </tr>\n",
1779 |        "    <tr>\n",
1780 |        "      <th>153327</th>\n",
1781 |        "      <td>\"https://github.com/tobykurien/GoogleApps/issues/31\"</td>\n",
1782 |        "      <td>drive provide download ability</td>\n",
1783 |        "      <td>sometimes people share files via g drive. provided a link this app can show some info about the files but doesn't show the download button. i hope that it can be fixed and users would be able to download files with this app.</td>\n",
1784 |        "      <td>0.778953</td>\n",
1785 |        "    </tr>\n",
1786 |        "  </tbody>\n",
1787 |        "</table>\n",
1788 |        "</div>"
1789 |       ],
1790 |       "text/plain": [
1791 |        "                                                         issue_url  \\\n",
1792 |        "250423          \"https://github.com/ParabolInc/action/issues/1379\"   \n",
1793 |        "222304  \"https://github.com/viosey/hexo-theme-material/issues/166\"   \n",
1794 |        "153327        \"https://github.com/tobykurien/GoogleApps/issues/31\"   \n",
1795 |        "\n",
1796 |        "                                   issue_title  \\\n",
1797 |        "250423  integrations list view discoverability   \n",
1798 |        "222304    allow us to use sns-share for github   \n",
1799 |        "153327          drive provide download ability   \n",
1800 |        "\n",
1801 |        "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              body  \\\n",
1802 |        "250423  issue - enhancement i was initially confused by the link to my account copy; seeing github in the integrations list made me think it had already been set up . i realize now that i had to allow parabol to post as me. i think that link to my account could use a tooltip explaining what link means, and why you'd want to do so. <img width= 728 alt= screen shot 2017-09-29 at 10 52 05 am src= https://user-images.githubusercontent.com/2146312/31024786-2fd39c46-a50e-11e7-9f2a-6d4a5ed2baeb.png >   \n",
1803 |        "222304                                                                                                                                                                                                                                                                                                                                              i'd love to be able to add a link at the bottom of the page for my github account. however, the sns-share option doesn't currently seem to be able to do this.   \n",
1804 |        "153327                                                                                                                                                                                                                                                                            sometimes people share files via g drive. provided a link this app can show some info about the files but doesn't show the download button. i hope that it can be fixed and users would be able to download files with this app.   \n",
1805 |        "\n",
1806 |        "            dist  \n",
1807 |        "250423  0.748828  \n",
1808 |        "222304  0.774398  \n",
1809 |        "153327  0.778953  "
1810 |       ]
1811 |      },
1812 |      "metadata": {},
1813 |      "output_type": "display_data"
1814 |     }
1815 |    ],
1816 |    "source": [
1817 |     "seq2seq_inf_rec.demo_model_predictions(n=1, issue_df=testdf, threshold=1)"
1818 |    ]
1819 |   },
1820 |   {
1821 |    "cell_type": "code",
1822 |    "execution_count": 78,
1823 |    "metadata": {},
1824 |    "outputs": [
1825 |     {
1826 |      "data": {
1827 |       "text/plain": [
1828 |        "True"
1829 |       ]
1830 |      },
1831 |      "execution_count": 78,
1832 |      "metadata": {},
1833 |      "output_type": "execute_result"
1834 |     }
1835 |    ],
1836 |    "source": [
1837 |     "# incase you need to reset the rec system\n",
1838 |     "# seq2seq_inf_rec.set_recsys_annoyobj(recsys_annoyobj)\n",
1839 |     "# seq2seq_inf_rec.set_recsys_data(all_data_df)\n",
1840 |     "\n",
1841 |     "# save object\n",
1842 |     "recsys_annoyobj.save('recsys_annoyobj.pkl')"
1843 |    ]
1844 |   },
1845 |   {
1846 |    "cell_type": "code",
1847 |    "execution_count": null,
1848 |    "metadata": {
1849 |     "collapsed": true
1850 |    },
1851 |    "outputs": [],
1852 |    "source": []
1853 |   }
1854 |  ],
1855 |  "metadata": {
1856 |   "kernelspec": {
1857 |    "display_name": "Python 3",
1858 |    "language": "python",
1859 |    "name": "python3"
1860 |   },
1861 |   "language_info": {
1862 |    "codemirror_mode": {
1863 |     "name": "ipython",
1864 |     "version": 3
1865 |    },
1866 |    "file_extension": ".py",
1867 |    "mimetype": "text/x-python",
1868 |    "name": "python",
1869 |    "nbconvert_exporter": "python",
1870 |    "pygments_lexer": "ipython3",
1871 |    "version": "3.6.2"
1872 |   },
1873 |   "toc": {
1874 |    "nav_menu": {
1875 |     "height": "263px",
1876 |     "width": "352px"
1877 |    },
1878 |    "number_sections": true,
1879 |    "sideBar": true,
1880 |    "skip_h1_title": false,
1881 |    "title_cell": "Table of Contents",
1882 |    "title_sidebar": "Contents",
1883 |    "toc_cell": true,
1884 |    "toc_position": {},
1885 |    "toc_section_display": true,
1886 |    "toc_window_display": false
1887 |   }
1888 |  },
1889 |  "nbformat": 4,
1890 |  "nbformat_minor": 2
1891 | }
1892 | 


--------------------------------------------------------------------------------