├── sql └── GetIssues.sql ├── README.md ├── requirements.txt ├── LICENSE └── notebooks ├── seq2seq_utils.py └── Tutorial.ipynb /sql/GetIssues.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | url as issue_url 3 | -- replace more than one white-space character in a row with a single space 4 | , REGEXP_REPLACE(title, r"\s{2,}", ' ') as issue_title 5 | , REGEXP_REPLACE(body, r"\s{2,}", ' ') as body 6 | 7 | FROM( 8 | SELECT 9 | JSON_EXTRACT(payload, '$.issue.html_url') as url 10 | -- extract the title and body removing parentheses, brackets, and quotes 11 | , LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.title'), r"\\n|\(|\)|\[|\]|#|\*|`", ' '))) as title 12 | , LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.body'), r"\\n|\(|\)|\[|\]|#|\*|`", ' '))) as body 13 | FROM `githubarchive.day.2017*` 14 | WHERE 15 | -- 70 random days in 2017 (because it costs money to query these tables!!) 16 | _TABLE_SUFFIX BETWEEN '0101' and '1231' 17 | and type="IssuesEvent" 18 | -- Only want the issue at a specific point otherwise will have duplicates 19 | and JSON_EXTRACT(payload, '$.action') = "\"opened\"" 20 | ) as tbl 21 | 22 | WHERE 23 | -- the body must be at least 8 words long and the title at least 3 words long 24 | -- this is an arbitrary way to filter out empty or sparse issues 25 | ARRAY_LENGTH(SPLIT(body, ' ')) >= 6 26 | and ARRAY_LENGTH(SPLIT(title, ' ')) >= 3 27 | -- filter out issues that have really long titles or bodies 28 | -- (these are outliers, and will slow tokenization down). 29 | and LENGTH(title) <= 400 30 | and LENGTH(body) <= 2000 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GitHub license](https://img.shields.io/github/license/hamelsmu/Seq2Seq_Tutorial.svg)](https://github.com/hamelsmu/Seq2Seq_Tutorial/blob/master/LICENSE) 2 | 3 | ## Sequence-to-Sequence Tutorial with Github Issues Data 4 | Code For Medium Article: ["How To Create Data Products That Are Magical Using Sequence-to-Sequence Models"](https://medium.com/@hamelhusain/how-to-create-data-products-that-are-magical-using-sequence-to-sequence-models-703f86a231f8) 5 | 6 | ## Installation 7 | 8 | `pip install -r requirements.txt` 9 | 10 | If you are using the AWS Deep Learning Ubuntu AMI, many of the required dependencies will already be installed, 11 | so you only need to run: 12 | 13 | ``` 14 | source activate tensorflow_p36 15 | pip install ktext annoy nltk pydot 16 | ``` 17 | 18 | See #4 below if you wish to run this tutorial using Docker. 19 | 20 | 21 | ## Resources: 22 | 23 | 1. [Tutorial Notebook](https://nbviewer.jupyter.org/github/hamelsmu/Seq2Seq_Tutorial/blob/master/notebooks/Tutorial.ipynb): The Jupyter notebook that coincides with the Medium post. 24 | 25 | 2. [seq2seq_utils.py](./notebooks/seq2seq_utils.py): convenience functions that are used in the tutorial notebook to make predictions. 26 | 27 | 3. [ktext](https://github.com/hamelsmu/ktext): this library is used in the tutorial to clean data. This library can be installed with `pip`. 28 | 29 | 4. [Nvidia Docker Container](https://hub.docker.com/r/hamelsmu/seq2seq_tutorial/): contains all libraries that are required to run the tutorial. This container is built with Nvidia-Docker v1.0. You can install Nvidia-Docker and run this container like so: 30 | 31 | 32 | ``` 33 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - 34 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 35 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 36 | sudo apt-get update 37 | sudo apt-get install nvidia-docker 38 | 39 | sudo nvidia-docker run hamelsmu/seq2seq_tutorial 40 | 41 | ``` 42 | 43 | This should work with both Nvidia-Docker v1.0 and v2.0. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.10 2 | altair==1.2.1 3 | anaconda-client==1.6.5 4 | anaconda-navigator==1.6.8 5 | anaconda-project==0.8.0 6 | annoy==1.10.0 7 | asn1crypto==0.22.0 8 | astroid==1.5.3 9 | astropy==2.0.2 10 | Babel==2.5.0 11 | backports.functools-lru-cache==1.4 12 | backports.shutil-get-terminal-size==1.0.0 13 | bcolz==1.1.2 14 | beautifulsoup4==4.6.0 15 | bitarray==0.8.1 16 | bkcharts==0.2 17 | blaze==0.11.3 18 | bleach==1.5.0 19 | bokeh==0.12.7 20 | boto==2.48.0 21 | boto3==1.5.14 22 | botocore==1.8.28 23 | Bottleneck==1.2.1 24 | bz2file==0.98 25 | cachetools==2.0.1 26 | certifi==2017.11.5 27 | cffi==1.10.0 28 | chardet==3.0.4 29 | click==6.7 30 | cloudpickle==0.4.0 31 | clyent==1.2.2 32 | colorama==0.3.9 33 | conda==4.4.6 34 | conda-build==3.0.23 35 | conda-verify==2.0.0 36 | contextlib2==0.5.5 37 | cryptography==2.0.3 38 | cycler==0.10.0 39 | cymem==1.31.2 40 | Cython==0.26.1 41 | cytoolz==0.9.0 42 | dask==0.16.1 43 | datashape==0.5.4 44 | decorator==4.2.1 45 | dill==0.2.7.1 46 | distributed==1.20.2 47 | docopt==0.6.2 48 | docutils==0.14 49 | en-core-web-sm==2.0.0 50 | entrypoints==0.2.3 51 | et-xmlfile==1.0.1 52 | fastcache==1.0.2 53 | fastparquet==0.1.3 54 | filelock==2.0.12 55 | Flask==0.12.2 56 | Flask-Cors==3.0.3 57 | ftfy==4.4.3 58 | future==0.16.0 59 | gensim==3.2.0 60 | gevent==1.2.2 61 | glob2==0.5 62 | gmpy2==2.0.8 63 | graphviz==0.8.1 64 | greenlet==0.4.12 65 | h5py==2.7.1 66 | hdfs==2.1.0 67 | heapdict==1.0.0 68 | html5lib==1.0.1 69 | idna==2.6 70 | ijson==2.3 71 | imageio==2.2.0 72 | imagesize==0.7.1 73 | ipykernel==4.6.1 74 | ipython==6.2.1 75 | ipython-genutils==0.2.0 76 | ipywidgets==7.0.0 77 | isort==4.2.15 78 | isoweek==1.3.3 79 | itsdangerous==0.24 80 | jdcal==1.3 81 | jedi==0.11.0 82 | Jinja2==2.9.6 83 | jmespath==0.9.3 84 | jsonschema==2.6.0 85 | jupyter-client==5.1.0 86 | jupyter-console==5.2.0 87 | jupyter-core==4.3.0 88 | jupyterlab==0.27.0 89 | jupyterlab-launcher==0.4.0 90 | Keras==2.1.2 91 | ktext==0.27 92 | lazy-object-proxy==1.3.1 93 | llvmlite==0.20.0 94 | locket==0.2.0 95 | lxml==3.8.0 96 | Markdown==2.6.9 97 | MarkupSafe==1.0 98 | matplotlib==2.1.0 99 | mccabe==0.6.1 100 | mistune==0.7.4 101 | more-itertools==4.0.1 102 | mpmath==0.19 103 | msgpack==0.5.1 104 | msgpack-numpy==0.4.2 105 | msgpack-python==0.5.1 106 | multipledispatch==0.4.9 107 | multiprocess==0.70.5 108 | murmurhash==0.28.0 109 | navigator-updater==0.1.0 110 | nbconvert==5.3.1 111 | nbformat==4.4.0 112 | networkx==2.0 113 | nltk==3.2.5 114 | nose==1.3.7 115 | notebook==5.0.0 116 | numba==0.35.0+10.g143f70e90 117 | numexpr==2.6.2 118 | numpy==1.14.0 119 | numpydoc==0.7.0 120 | odo==0.5.1 121 | olefile==0.44 122 | openpyxl==2.4.8 123 | packaging==16.8 124 | pandas==0.22.0 125 | pandas-summary==0.0.41 126 | pandocfilters==1.4.2 127 | parso==0.1.0 128 | partd==0.3.8 129 | path.py==10.3.1 130 | pathlib==1.0.1 131 | pathlib2==2.3.0 132 | pathos==0.2.1 133 | patsy==0.4.1 134 | pep8==1.7.0 135 | pexpect==4.3.0 136 | pickleshare==0.7.4 137 | Pillow==4.3.0 138 | pkginfo==1.4.1 139 | plac==0.9.6 140 | ply==3.10 141 | pox==0.2.3 142 | ppft==1.6.4.7.1 143 | preshed==1.0.0 144 | prompt-toolkit==1.0.15 145 | protobuf==3.5.0 146 | psutil==5.2.2 147 | ptyprocess==0.5.2 148 | py==1.4.34 149 | pyarrow==0.8.0 150 | pycodestyle==2.3.1 151 | pycosat==0.6.3 152 | pycparser==2.18 153 | pycrypto==2.6.1 154 | pycurl==7.43.0 155 | pydot==1.2.3 156 | pydot-ng==1.0.0 157 | pyemd==0.4.4 158 | pyflakes==1.5.0 159 | Pygments==2.2.0 160 | PyHive==0.5.0 161 | pylint==1.7.2 162 | pyodbc==4.0.17 163 | pyOpenSSL==17.2.0 164 | pyparsing==2.2.0 165 | Pyphen==0.9.4 166 | PySocks==1.6.7 167 | pytest==3.2.1 168 | python-dateutil==2.6.1 169 | python-Levenshtein==0.12.0 170 | pytz==2017.3 171 | PyWavelets==0.5.2 172 | PyYAML==3.12 173 | pyzmq==16.0.2 174 | QtAwesome==0.4.4 175 | qtconsole==4.3.1 176 | QtPy==1.3.1 177 | regex==2017.4.5 178 | requests==2.18.4 179 | rope==0.10.5 180 | ruamel-yaml==0.11.14 181 | s3transfer==0.1.12 182 | scikit-image==0.13.0 183 | scikit-learn==0.19.1 184 | scipy==1.0.0 185 | seaborn==0.8 186 | simplegeneric==0.8.1 187 | singledispatch==3.4.0.3 188 | six==1.11.0 189 | sklearn-pandas==1.6.0 190 | smart-open==1.5.6 191 | snowballstemmer==1.2.1 192 | sortedcollections==0.5.3 193 | sortedcontainers==1.5.7 194 | spacy==2.0.5 195 | Sphinx==1.6.3 196 | sphinxcontrib-websupport==1.0.1 197 | spyder==3.2.3 198 | SQLAlchemy==1.1.13 199 | statsmodels==0.8.0 200 | sympy==1.1.1 201 | tables==3.4.2 202 | tabulate==0.8.2 203 | tblib==1.3.2 204 | tensorflow-gpu==1.3.0 205 | tensorflow-tensorboard==0.1.8 206 | termcolor==1.1.0 207 | terminado==0.6 208 | testpath==0.3.1 209 | textacy==0.5.0 210 | thinc==6.10.2 211 | thrift==0.10.0 212 | toolz==0.9.0 213 | torch==0.2.0.post4 214 | torchtext==0.2.0 215 | torchvision==0.1.9 216 | tornado==4.5.2 217 | tqdm==4.19.5 218 | traitlets==4.3.2 219 | typing==3.6.2 220 | ujson==1.35 221 | unicodecsv==0.14.1 222 | Unidecode==1.0.22 223 | urllib3==1.22 224 | vega==0.4.4 225 | wcwidth==0.1.7 226 | webencodings==0.5.1 227 | Werkzeug==0.12.2 228 | widgetsnbextension==3.0.2 229 | wrapt==1.10.11 230 | xlrd==1.1.0 231 | XlsxWriter==0.9.8 232 | xlwt==1.3.0 233 | zict==0.1.3 234 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /notebooks/seq2seq_utils.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import tensorflow as tf 3 | from keras import backend as K 4 | from keras.layers import Input 5 | from keras.models import Model 6 | from IPython.display import SVG, display 7 | from keras.utils.vis_utils import model_to_dot 8 | import logging 9 | import numpy as np 10 | import dill as dpickle 11 | from annoy import AnnoyIndex 12 | from tqdm import tqdm, tqdm_notebook 13 | from random import random 14 | from nltk.translate.bleu_score import corpus_bleu 15 | 16 | 17 | def load_text_processor(fname='title_pp.dpkl'): 18 | """ 19 | Load preprocessors from disk. 20 | 21 | Parameters 22 | ---------- 23 | fname: str 24 | file name of ktext.proccessor object 25 | 26 | Returns 27 | ------- 28 | num_tokens : int 29 | size of vocabulary loaded into ktext.processor 30 | pp : ktext.processor 31 | the processor you are trying to load 32 | 33 | Typical Usage: 34 | ------------- 35 | 36 | num_decoder_tokens, title_pp = load_text_processor(fname='title_pp.dpkl') 37 | num_encoder_tokens, body_pp = load_text_processor(fname='body_pp.dpkl') 38 | 39 | """ 40 | # Load files from disk 41 | with open(fname, 'rb') as f: 42 | pp = dpickle.load(f) 43 | 44 | num_tokens = max(pp.id2token.keys()) + 1 45 | print(f'Size of vocabulary for {fname}: {num_tokens:,}') 46 | return num_tokens, pp 47 | 48 | 49 | def load_decoder_inputs(decoder_np_vecs='train_title_vecs.npy'): 50 | """ 51 | Load decoder inputs. 52 | 53 | Parameters 54 | ---------- 55 | decoder_np_vecs : str 56 | filename of serialized numpy.array of decoder input (issue title) 57 | 58 | Returns 59 | ------- 60 | decoder_input_data : numpy.array 61 | The data fed to the decoder as input during training for teacher forcing. 62 | This is the same as `decoder_np_vecs` except the last position. 63 | decoder_target_data : numpy.array 64 | The data that the decoder data is trained to generate (issue title). 65 | Calculated by sliding `decoder_np_vecs` one position forward. 66 | 67 | """ 68 | vectorized_title = np.load(decoder_np_vecs) 69 | # For Decoder Input, you don't need the last word as that is only for prediction 70 | # when we are training using Teacher Forcing. 71 | decoder_input_data = vectorized_title[:, :-1] 72 | 73 | # Decoder Target Data Is Ahead By 1 Time Step From Decoder Input Data (Teacher Forcing) 74 | decoder_target_data = vectorized_title[:, 1:] 75 | 76 | print(f'Shape of decoder input: {decoder_input_data.shape}') 77 | print(f'Shape of decoder target: {decoder_target_data.shape}') 78 | return decoder_input_data, decoder_target_data 79 | 80 | 81 | def load_encoder_inputs(encoder_np_vecs='train_body_vecs.npy'): 82 | """ 83 | Load variables & data that are inputs to encoder. 84 | 85 | Parameters 86 | ---------- 87 | encoder_np_vecs : str 88 | filename of serialized numpy.array of encoder input (issue title) 89 | 90 | Returns 91 | ------- 92 | encoder_input_data : numpy.array 93 | The issue body 94 | doc_length : int 95 | The standard document length of the input for the encoder after padding 96 | the shape of this array will be (num_examples, doc_length) 97 | 98 | """ 99 | vectorized_body = np.load(encoder_np_vecs) 100 | # Encoder input is simply the body of the issue text 101 | encoder_input_data = vectorized_body 102 | doc_length = encoder_input_data.shape[1] 103 | print(f'Shape of encoder input: {encoder_input_data.shape}') 104 | return encoder_input_data, doc_length 105 | 106 | 107 | def viz_model_architecture(model): 108 | """Visualize model architecture in Jupyter notebook.""" 109 | display(SVG(model_to_dot(model).create(prog='dot', format='svg'))) 110 | 111 | 112 | def free_gpu_mem(): 113 | """Attempt to free gpu memory.""" 114 | K.get_session().close() 115 | cfg = K.tf.ConfigProto() 116 | cfg.gpu_options.allow_growth = True 117 | K.set_session(K.tf.Session(config=cfg)) 118 | 119 | 120 | def test_gpu(): 121 | """Run a toy computation task in tensorflow to test GPU.""" 122 | config = tf.ConfigProto() 123 | config.gpu_options.allow_growth = True 124 | session = tf.Session(config=config) 125 | hello = tf.constant('Hello, TensorFlow!') 126 | print(session.run(hello)) 127 | 128 | 129 | def plot_model_training_history(history_object): 130 | """Plots model train vs. validation loss.""" 131 | plt.title('model accuracy') 132 | plt.ylabel('accuracy') 133 | plt.xlabel('epoch') 134 | plt.plot(history_object.history['loss']) 135 | plt.plot(history_object.history['val_loss']) 136 | plt.legend(['train', 'test'], loc='upper left') 137 | plt.show() 138 | 139 | 140 | def extract_encoder_model(model): 141 | """ 142 | Extract the encoder from the original Sequence to Sequence Model. 143 | 144 | Returns a keras model object that has one input (body of issue) and one 145 | output (encoding of issue, which is the last hidden state). 146 | 147 | Input: 148 | ----- 149 | model: keras model object 150 | 151 | Returns: 152 | ----- 153 | keras model object 154 | 155 | """ 156 | encoder_model = model.get_layer('Encoder-Model') 157 | return encoder_model 158 | 159 | 160 | def extract_decoder_model(model): 161 | """ 162 | Extract the decoder from the original model. 163 | 164 | Inputs: 165 | ------ 166 | model: keras model object 167 | 168 | Returns: 169 | ------- 170 | A Keras model object with the following inputs and outputs: 171 | 172 | Inputs of Keras Model That Is Returned: 173 | 1: the embedding index for the last predicted word or the indicator 174 | 2: the last hidden state, or in the case of the first word the hidden state from the encoder 175 | 176 | Outputs of Keras Model That Is Returned: 177 | 1. Prediction (class probabilities) for the next word 178 | 2. The hidden state of the decoder, to be fed back into the decoder at the next time step 179 | 180 | Implementation Notes: 181 | ---------------------- 182 | Must extract relevant layers and reconstruct part of the computation graph 183 | to allow for different inputs as we are not going to use teacher forcing at 184 | inference time. 185 | 186 | """ 187 | # the latent dimension is the same throughout the architecture so we are going to 188 | # cheat and grab the latent dimension of the embedding because that is the same as what is 189 | # output from the decoder 190 | latent_dim = model.get_layer('Decoder-Word-Embedding').output_shape[-1] 191 | 192 | # Reconstruct the input into the decoder 193 | decoder_inputs = model.get_layer('Decoder-Input').input 194 | dec_emb = model.get_layer('Decoder-Word-Embedding')(decoder_inputs) 195 | dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb) 196 | 197 | # Instead of setting the intial state from the encoder and forgetting about it, during inference 198 | # we are not doing teacher forcing, so we will have to have a feedback loop from predictions back into 199 | # the GRU, thus we define this input layer for the state so we can add this capability 200 | gru_inference_state_input = Input(shape=(latent_dim,), name='hidden_state_input') 201 | 202 | # we need to reuse the weights that is why we are getting this 203 | # If you inspect the decoder GRU that we created for training, it will take as input 204 | # 2 tensors -> (1) is the embedding layer output for the teacher forcing 205 | # (which will now be the last step's prediction, and will be _start_ on the first time step) 206 | # (2) is the state, which we will initialize with the encoder on the first time step, but then 207 | # grab the state after the first prediction and feed that back in again. 208 | gru_out, gru_state_out = model.get_layer('Decoder-GRU')([dec_bn, gru_inference_state_input]) 209 | 210 | # Reconstruct dense layers 211 | dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out) 212 | dense_out = model.get_layer('Final-Output-Dense')(dec_bn2) 213 | decoder_model = Model([decoder_inputs, gru_inference_state_input], 214 | [dense_out, gru_state_out]) 215 | return decoder_model 216 | 217 | 218 | class Seq2Seq_Inference(object): 219 | def __init__(self, 220 | encoder_preprocessor, 221 | decoder_preprocessor, 222 | seq2seq_model): 223 | 224 | self.pp_body = encoder_preprocessor 225 | self.pp_title = decoder_preprocessor 226 | self.seq2seq_model = seq2seq_model 227 | self.encoder_model = extract_encoder_model(seq2seq_model) 228 | self.decoder_model = extract_decoder_model(seq2seq_model) 229 | self.default_max_len_title = self.pp_title.padding_maxlen 230 | self.nn = None 231 | self.rec_df = None 232 | 233 | def generate_issue_title(self, 234 | raw_input_text, 235 | max_len_title=None): 236 | """ 237 | Use the seq2seq model to generate a title given the body of an issue. 238 | 239 | Inputs 240 | ------ 241 | raw_input: str 242 | The body of the issue text as an input string 243 | 244 | max_len_title: int (optional) 245 | The maximum length of the title the model will generate 246 | 247 | """ 248 | if max_len_title is None: 249 | max_len_title = self.default_max_len_title 250 | # get the encoder's features for the decoder 251 | raw_tokenized = self.pp_body.transform([raw_input_text]) 252 | body_encoding = self.encoder_model.predict(raw_tokenized) 253 | # we want to save the encoder's embedding before its updated by decoder 254 | # because we can use that as an embedding for other tasks. 255 | original_body_encoding = body_encoding 256 | state_value = np.array(self.pp_title.token2id['_start_']).reshape(1, 1) 257 | 258 | decoded_sentence = [] 259 | stop_condition = False 260 | while not stop_condition: 261 | preds, st = self.decoder_model.predict([state_value, body_encoding]) 262 | 263 | # We are going to ignore indices 0 (padding) and indices 1 (unknown) 264 | # Argmax will return the integer index corresponding to the 265 | # prediction + 2 b/c we chopped off first two 266 | pred_idx = np.argmax(preds[:, :, 2:]) + 2 267 | 268 | # retrieve word from index prediction 269 | pred_word_str = self.pp_title.id2token[pred_idx] 270 | 271 | if pred_word_str == '_end_' or len(decoded_sentence) >= max_len_title: 272 | stop_condition = True 273 | break 274 | decoded_sentence.append(pred_word_str) 275 | 276 | # update the decoder for the next word 277 | body_encoding = st 278 | state_value = np.array(pred_idx).reshape(1, 1) 279 | 280 | return original_body_encoding, ' '.join(decoded_sentence) 281 | 282 | 283 | def print_example(self, 284 | i, 285 | body_text, 286 | title_text, 287 | url, 288 | threshold): 289 | """ 290 | Prints an example of the model's prediction for manual inspection. 291 | """ 292 | if i: 293 | print('\n\n==============================================') 294 | print(f'============== Example # {i} =================\n') 295 | 296 | if url: 297 | print(url) 298 | 299 | print(f"Issue Body:\n {body_text} \n") 300 | 301 | if title_text: 302 | print(f"Original Title:\n {title_text}") 303 | 304 | emb, gen_title = self.generate_issue_title(body_text) 305 | print(f"\n****** Machine Generated Title (Prediction) ******:\n {gen_title}") 306 | 307 | if self.nn: 308 | # return neighbors and distances 309 | n, d = self.nn.get_nns_by_vector(emb.flatten(), n=4, 310 | include_distances=True) 311 | neighbors = n[1:] 312 | dist = d[1:] 313 | 314 | if min(dist) <= threshold: 315 | cols = ['issue_url', 'issue_title', 'body'] 316 | dfcopy = self.rec_df.iloc[neighbors][cols].copy(deep=True) 317 | dfcopy['dist'] = dist 318 | similar_issues_df = dfcopy.query(f'dist <= {threshold}') 319 | 320 | print("\n**** Similar Issues (using encoder embedding) ****:\n") 321 | display(similar_issues_df) 322 | 323 | 324 | def demo_model_predictions(self, 325 | n, 326 | issue_df, 327 | threshold=1): 328 | """ 329 | Pick n random Issues and display predictions. 330 | 331 | Input: 332 | ------ 333 | n : int 334 | Number of issues to display from issue_df 335 | issue_df : pandas DataFrame 336 | DataFrame that contains two columns: `body` and `issue_title`. 337 | threshold : float 338 | distance threshold for recommendation of similar issues. 339 | 340 | Returns: 341 | -------- 342 | None 343 | Prints the original issue body and the model's prediction. 344 | """ 345 | # Extract body and title from DF 346 | body_text = issue_df.body.tolist() 347 | title_text = issue_df.issue_title.tolist() 348 | url = issue_df.issue_url.tolist() 349 | 350 | demo_list = np.random.randint(low=1, high=len(body_text), size=n) 351 | for i in demo_list: 352 | self.print_example(i, 353 | body_text=body_text[i], 354 | title_text=title_text[i], 355 | url=url[i], 356 | threshold=threshold) 357 | 358 | def prepare_recommender(self, vectorized_array, original_df): 359 | """ 360 | Use the annoy library to build recommender 361 | 362 | Parameters 363 | ---------- 364 | vectorized_array : List[List[int]] 365 | This is the list of list of integers that represents your corpus 366 | that is fed into the seq2seq model for training. 367 | original_df : pandas.DataFrame 368 | This is the original dataframe that has the columns 369 | ['issue_url', 'issue_title', 'body'] 370 | 371 | Returns 372 | ------- 373 | annoy.AnnoyIndex object (see https://github.com/spotify/annoy) 374 | """ 375 | self.rec_df = original_df 376 | emb = self.encoder_model.predict(x=vectorized_array, 377 | batch_size=vectorized_array.shape[0]//200) 378 | 379 | f = emb.shape[1] 380 | self.nn = AnnoyIndex(f) 381 | logging.warning('Adding embeddings') 382 | for i in tqdm(range(len(emb))): 383 | self.nn.add_item(i, emb[i]) 384 | logging.warning('Building trees for similarity lookup.') 385 | self.nn.build(50) 386 | return self.nn 387 | 388 | def set_recsys_data(self, original_df): 389 | self.rec_df = original_df 390 | 391 | def set_recsys_annoyobj(self, annoyobj): 392 | self.nn = annoyobj 393 | 394 | def evaluate_model(self, holdout_bodies, holdout_titles): 395 | """ 396 | Method for calculating BLEU Score. 397 | 398 | Parameters 399 | ---------- 400 | holdout_bodies : List[str] 401 | These are the issue bodies that we want to summarize 402 | holdout_titles : List[str] 403 | This is the ground truth we are trying to predict --> issue titles 404 | 405 | Returns 406 | ------- 407 | bleu : float 408 | The BLEU Score 409 | 410 | """ 411 | actual, predicted = list(), list() 412 | assert len(holdout_bodies) == len(holdout_titles) 413 | num_examples = len(holdout_bodies) 414 | 415 | logging.warning('Generating predictions.') 416 | # step over the whole set TODO: parallelize this 417 | for i in tqdm_notebook(range(num_examples)): 418 | _, yhat = self.generate_issue_title(holdout_bodies[i]) 419 | 420 | actual.append(self.pp_title.process_text([holdout_titles[i]])[0]) 421 | predicted.append(self.pp_title.process_text([yhat])[0]) 422 | # calculate BLEU score 423 | logging.warning('Calculating BLEU.') 424 | 425 | #must be careful with nltk api for corpus_bleu!, 426 | # expects List[List[List[str]]] for ground truth, using List[List[str]] will give you 427 | # erroneous results. 428 | bleu = corpus_bleu([[a] for a in actual], predicted) 429 | return bleu 430 | -------------------------------------------------------------------------------- /notebooks/Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "collapsed": true 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "import logging\n", 23 | "import glob\n", 24 | "from sklearn.model_selection import train_test_split\n", 25 | "pd.set_option('display.max_colwidth', 500)\n", 26 | "logger = logging.getLogger()\n", 27 | "logger.setLevel(logging.WARNING)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# Process Data" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "Look at filesystem to see files extracted from BigQuery (or Kaggle: https://www.kaggle.com/davidshinn/github-issues/)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 9, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "-rw-r--r-- 1 40294 40294 2.7G Jan 18 2018 github_issues.csv\r\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "!ls -lah | grep github_issues.csv" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Split data into train and test set and preview data" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 11, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Train: 1,800,000 rows 3 columns\n", 78 | "Test: 200,000 rows 3 columns\n" 79 | ] 80 | }, 81 | { 82 | "data": { 83 | "text/html": [ 84 | "
\n", 85 | "\n", 98 | "\n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
issue_urlissue_titlebody
3165423\"https://github.com/1000hz/bootstrap-validator/issues/574\"uncaught typeerror: f b is not a function when using $ ... .validator 'update'the above error is being thrown when i try and run update via js to include some new fields that have been added dynamically. i'm using backbone.js rendering a script template element to add a new set up fields based on user interaction. the full error message is: uncaught typeerror: f b is not a function at htmlformelement.<anonymous> validator.min.js:9 at function.each jquery.min.js:2 at n.fn.init.each jquery.min.js:2 at n.fn.init.b as validator validator.min.js:9 at n.initskillgroup app.l...
2763145\"https://github.com/quasar-analytics/quasar/issues/2821\"invoke endpoint regressionproblem accures in versions: 21.x.x , 23.x.x and 24.x.x didn't check 22.x.x first query is put to view mount sql select from /test-mount/testdb/flatviz the second one sql select row.seriesone as seriesone, row.seriestwo as seriestwo, min row.measureone as measureone from output_of_first_query as row group by row.seriesone, row.seriestwo order by row.seriesone asc, row.seriestwo asc the third one is sql select from output_of_second_query where seriesone = one-one in 20.14.13 this works as exp...
3882729\"https://github.com/msharov/ustl/issues/79\"build ustl with clang on linuxhi, on ubuntu 14.04 clang 3.4, gcc 4.8.4 and fedora 22 clang 3.5, gcc 5.3.1 : cc=clang cxx=clang++ ./configure --libdir=path/to/libsupc++.a without --libdir it searches for libcxxabi when cc=clang make works fine, make check however shows quite a few diffs. is such configuration supposed to work? thanks!
\n", 128 | "
" 129 | ], 130 | "text/plain": [ 131 | " issue_url \\\n", 132 | "3165423 \"https://github.com/1000hz/bootstrap-validator/issues/574\" \n", 133 | "2763145 \"https://github.com/quasar-analytics/quasar/issues/2821\" \n", 134 | "3882729 \"https://github.com/msharov/ustl/issues/79\" \n", 135 | "\n", 136 | " issue_title \\\n", 137 | "3165423 uncaught typeerror: f b is not a function when using $ ... .validator 'update' \n", 138 | "2763145 invoke endpoint regression \n", 139 | "3882729 build ustl with clang on linux \n", 140 | "\n", 141 | " body \n", 142 | "3165423 the above error is being thrown when i try and run update via js to include some new fields that have been added dynamically. i'm using backbone.js rendering a script template element to add a new set up fields based on user interaction. the full error message is: uncaught typeerror: f b is not a function at htmlformelement. validator.min.js:9 at function.each jquery.min.js:2 at n.fn.init.each jquery.min.js:2 at n.fn.init.b as validator validator.min.js:9 at n.initskillgroup app.l... \n", 143 | "2763145 problem accures in versions: 21.x.x , 23.x.x and 24.x.x didn't check 22.x.x first query is put to view mount sql select from /test-mount/testdb/flatviz the second one sql select row.seriesone as seriesone, row.seriestwo as seriestwo, min row.measureone as measureone from output_of_first_query as row group by row.seriesone, row.seriestwo order by row.seriesone asc, row.seriestwo asc the third one is sql select from output_of_second_query where seriesone = one-one in 20.14.13 this works as exp... \n", 144 | "3882729 hi, on ubuntu 14.04 clang 3.4, gcc 4.8.4 and fedora 22 clang 3.5, gcc 5.3.1 : cc=clang cxx=clang++ ./configure --libdir=path/to/libsupc++.a without --libdir it searches for libcxxabi when cc=clang make works fine, make check however shows quite a few diffs. is such configuration supposed to work? thanks! " 145 | ] 146 | }, 147 | "execution_count": 11, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "#read in data sample 2M rows (for speed of tutorial)\n", 154 | "traindf, testdf = train_test_split(pd.read_csv('github_issues.csv').sample(n=2000000), \n", 155 | " test_size=.10)\n", 156 | "\n", 157 | "\n", 158 | "#print out stats about shape of data\n", 159 | "print(f'Train: {traindf.shape[0]:,} rows {traindf.shape[1]:,} columns')\n", 160 | "print(f'Test: {testdf.shape[0]:,} rows {testdf.shape[1]:,} columns')\n", 161 | "\n", 162 | "# preview data\n", 163 | "traindf.head(3)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "**Convert to lists in preparation for modeling**" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "'some of the sds alerts do not have clearing alerts. so it always present in alerting directory. these kinds of alerts should be stored in etcd under /alerting/notify, it never goes to alerting/alerts directory and it is not displayed under alerts in ui also. these kinds of alerts are notified via notification channel and deleted via ttl. node_agent should have a logic to handle this in alerting framework.'" 182 | ] 183 | }, 184 | "execution_count": 9, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "train_body_raw = traindf.body.tolist()\n", 191 | "train_title_raw = traindf.issue_title.tolist()\n", 192 | "#preview output of first element\n", 193 | "train_body_raw[0]" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "# Pre-Process Data For Deep Learning\n", 201 | "\n", 202 | "See [this repo](https://github.com/hamelsmu/ktext) for documentation on the ktext package" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 10, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "name": "stderr", 212 | "output_type": "stream", 213 | "text": [ 214 | "/opt/conda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 215 | " from ._conv import register_converters as _register_converters\n", 216 | "Using TensorFlow backend.\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "%reload_ext autoreload\n", 222 | "%autoreload 2\n", 223 | "from ktext.preprocess import processor" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 11, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "name": "stderr", 233 | "output_type": "stream", 234 | "text": [ 235 | "WARNING:root:....tokenizing data\n", 236 | "WARNING:root:(1/3) done. 1738 sec\n", 237 | "WARNING:root:....building corpus\n", 238 | "WARNING:root:(2/3) done. 568 sec\n", 239 | "WARNING:root:....consolidating corpus\n", 240 | "WARNING:root:(3/3) done. 9 sec\n", 241 | "WARNING:root:Finished parsing 1,800,000 documents.\n", 242 | "WARNING:root:...fit is finished, beginning transform\n", 243 | "WARNING:root:done. 733 sec\n" 244 | ] 245 | }, 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "CPU times: user 22min 17s, sys: 1min 16s, total: 23min 34s\n", 251 | "Wall time: 50min 53s\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "%%time\n", 257 | "# Clean, tokenize, and apply padding / truncating such that each document length = 70\n", 258 | "# also, retain only the top 8,000 words in the vocabulary and set the remaining words\n", 259 | "# to 1 which will become common index for rare words \n", 260 | "body_pp = processor(keep_n=8000, padding_maxlen=70)\n", 261 | "train_body_vecs = body_pp.fit_transform(train_body_raw)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "#### Look at one example of processed issue bodies" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 12, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "\n", 281 | "original string:\n", 282 | " some of the sds alerts do not have clearing alerts. so it always present in alerting directory. these kinds of alerts should be stored in etcd under /alerting/notify, it never goes to alerting/alerts directory and it is not displayed under alerts in ui also. these kinds of alerts are notified via notification channel and deleted via ttl. node_agent should have a logic to handle this in alerting framework. \n", 283 | "\n", 284 | "after pre-processing:\n", 285 | " [37 33 39 1 6 17 29 22 13 6 3 36 25 8 34 23 1 15 3 40 26 33 6 35\n", 286 | " 11 38 23 18 45 1 4 32 2 25 28 20 42 1 4 6 15 9 25 24 29 16 45 6\n", 287 | " 23 44 7 3 40 26 33 6 10 31 46 30 12 9 14 46 43 3 1 35 22 5] \n", 288 | "\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "print('\\noriginal string:\\n', train_body_raw[0], '\\n')\n", 294 | "print('after pre-processing:\\n', train_body_vecs[0], '\\n')" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 13, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "name": "stderr", 304 | "output_type": "stream", 305 | "text": [ 306 | "WARNING:root:....tokenizing data\n", 307 | "WARNING:root:(1/3) done. 222 sec\n", 308 | "WARNING:root:....building corpus\n", 309 | "WARNING:root:(2/3) done. 35 sec\n", 310 | "WARNING:root:....consolidating corpus\n", 311 | "WARNING:root:(3/3) done. 2 sec\n", 312 | "WARNING:root:Finished parsing 1,800,000 documents.\n", 313 | "WARNING:root:...fit is finished, beginning transform\n", 314 | "WARNING:root:done. 101 sec\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "# Instantiate a text processor for the titles, with some different parameters\n", 320 | "# append_indicators = True appends the tokens '_start_' and '_end_' to each\n", 321 | "# document\n", 322 | "# padding = 'post' means that zero padding is appended to the end of the \n", 323 | "# of the document (as opposed to the default which is 'pre')\n", 324 | "title_pp = processor(append_indicators=True, keep_n=4500, \n", 325 | " padding_maxlen=12, padding ='post')\n", 326 | "\n", 327 | "# process the title data\n", 328 | "train_title_vecs = title_pp.fit_transform(train_title_raw)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "#### Look at one example of processed issue titles" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 14, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "\n", 348 | "original string:\n", 349 | " node_agent should handle sds native alerts also\n", 350 | "after pre-processing:\n", 351 | " [3 1 8 6 1 7 4 5 2 0 0 0]\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "print('\\noriginal string:\\n', train_title_raw[0])\n", 357 | "print('after pre-processing:\\n', train_title_vecs[0])" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Serialize all of this to disk for later use" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 15, 370 | "metadata": { 371 | "collapsed": true 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "import dill as dpickle\n", 376 | "import numpy as np\n", 377 | "\n", 378 | "# Save the preprocessor\n", 379 | "with open('body_pp.dpkl', 'wb') as f:\n", 380 | " dpickle.dump(body_pp, f)\n", 381 | "\n", 382 | "with open('title_pp.dpkl', 'wb') as f:\n", 383 | " dpickle.dump(title_pp, f)\n", 384 | "\n", 385 | "# Save the processed data\n", 386 | "np.save('train_title_vecs.npy', train_title_vecs)\n", 387 | "np.save('train_body_vecs.npy', train_body_vecs)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "# Define Model Architecture" 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "### Load the data from disk into variables" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 16, 407 | "metadata": { 408 | "collapsed": true 409 | }, 410 | "outputs": [], 411 | "source": [ 412 | "from seq2seq_utils import load_decoder_inputs, load_encoder_inputs, load_text_processor" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 17, 418 | "metadata": {}, 419 | "outputs": [ 420 | { 421 | "name": "stdout", 422 | "output_type": "stream", 423 | "text": [ 424 | "Shape of encoder input: (1800000, 70)\n", 425 | "Shape of decoder input: (1800000, 11)\n", 426 | "Shape of decoder target: (1800000, 11)\n" 427 | ] 428 | } 429 | ], 430 | "source": [ 431 | "encoder_input_data, doc_length = load_encoder_inputs('train_body_vecs.npy')\n", 432 | "decoder_input_data, decoder_target_data = load_decoder_inputs('train_title_vecs.npy')" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 18, 438 | "metadata": {}, 439 | "outputs": [ 440 | { 441 | "name": "stdout", 442 | "output_type": "stream", 443 | "text": [ 444 | "Size of vocabulary for body_pp.dpkl: 8,002\n", 445 | "Size of vocabulary for title_pp.dpkl: 4,502\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "num_encoder_tokens, body_pp = load_text_processor('body_pp.dpkl')\n", 451 | "num_decoder_tokens, title_pp = load_text_processor('title_pp.dpkl')" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "### Define Model Architecture" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 19, 464 | "metadata": { 465 | "collapsed": true 466 | }, 467 | "outputs": [], 468 | "source": [ 469 | "%matplotlib inline\n", 470 | "from keras.models import Model\n", 471 | "from keras.layers import Input, LSTM, GRU, Dense, Embedding, Bidirectional, BatchNormalization\n", 472 | "from keras import optimizers" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 20, 478 | "metadata": { 479 | "collapsed": true 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "#arbitrarly set latent dimension for embedding and hidden units\n", 484 | "latent_dim = 300\n", 485 | "\n", 486 | "##### Define Model Architecture ######\n", 487 | "\n", 488 | "########################\n", 489 | "#### Encoder Model ####\n", 490 | "encoder_inputs = Input(shape=(doc_length,), name='Encoder-Input')\n", 491 | "\n", 492 | "# Word embeding for encoder (ex: Issue Body)\n", 493 | "x = Embedding(num_encoder_tokens, latent_dim, name='Body-Word-Embedding', mask_zero=False)(encoder_inputs)\n", 494 | "x = BatchNormalization(name='Encoder-Batchnorm-1')(x)\n", 495 | "\n", 496 | "# Intermediate GRU layer (optional)\n", 497 | "#x = GRU(latent_dim, name='Encoder-Intermediate-GRU', return_sequences=True)(x)\n", 498 | "#x = BatchNormalization(name='Encoder-Batchnorm-2')(x)\n", 499 | "\n", 500 | "# We do not need the `encoder_output` just the hidden state.\n", 501 | "_, state_h = GRU(latent_dim, return_state=True, name='Encoder-Last-GRU')(x)\n", 502 | "\n", 503 | "# Encapsulate the encoder as a separate entity so we can just \n", 504 | "# encode without decoding if we want to.\n", 505 | "encoder_model = Model(inputs=encoder_inputs, outputs=state_h, name='Encoder-Model')\n", 506 | "\n", 507 | "seq2seq_encoder_out = encoder_model(encoder_inputs)\n", 508 | "\n", 509 | "########################\n", 510 | "#### Decoder Model ####\n", 511 | "decoder_inputs = Input(shape=(None,), name='Decoder-Input') # for teacher forcing\n", 512 | "\n", 513 | "# Word Embedding For Decoder (ex: Issue Titles)\n", 514 | "dec_emb = Embedding(num_decoder_tokens, latent_dim, name='Decoder-Word-Embedding', mask_zero=False)(decoder_inputs)\n", 515 | "dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)\n", 516 | "\n", 517 | "# Set up the decoder, using `decoder_state_input` as initial state.\n", 518 | "decoder_gru = GRU(latent_dim, return_state=True, return_sequences=True, name='Decoder-GRU')\n", 519 | "decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=seq2seq_encoder_out)\n", 520 | "x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output)\n", 521 | "\n", 522 | "# Dense layer for prediction\n", 523 | "decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='Final-Output-Dense')\n", 524 | "decoder_outputs = decoder_dense(x)\n", 525 | "\n", 526 | "########################\n", 527 | "#### Seq2Seq Model ####\n", 528 | "\n", 529 | "#seq2seq_decoder_out = decoder_model([decoder_inputs, seq2seq_encoder_out])\n", 530 | "seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs)\n", 531 | "\n", 532 | "\n", 533 | "seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=0.001), loss='sparse_categorical_crossentropy')" 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "** Examine Model Architecture Summary **" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 21, 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "name": "stdout", 550 | "output_type": "stream", 551 | "text": [ 552 | "__________________________________________________________________________________________________\n", 553 | "Layer (type) Output Shape Param # Connected to \n", 554 | "==================================================================================================\n", 555 | "Decoder-Input (InputLayer) (None, None) 0 \n", 556 | "__________________________________________________________________________________________________\n", 557 | "Decoder-Word-Embedding (Embeddi (None, None, 300) 1350600 Decoder-Input[0][0] \n", 558 | "__________________________________________________________________________________________________\n", 559 | "Encoder-Input (InputLayer) (None, 70) 0 \n", 560 | "__________________________________________________________________________________________________\n", 561 | "Decoder-Batchnorm-1 (BatchNorma (None, None, 300) 1200 Decoder-Word-Embedding[0][0] \n", 562 | "__________________________________________________________________________________________________\n", 563 | "Encoder-Model (Model) (None, 300) 2942700 Encoder-Input[0][0] \n", 564 | "__________________________________________________________________________________________________\n", 565 | "Decoder-GRU (GRU) [(None, None, 300), 540900 Decoder-Batchnorm-1[0][0] \n", 566 | " Encoder-Model[1][0] \n", 567 | "__________________________________________________________________________________________________\n", 568 | "Decoder-Batchnorm-2 (BatchNorma (None, None, 300) 1200 Decoder-GRU[0][0] \n", 569 | "__________________________________________________________________________________________________\n", 570 | "Final-Output-Dense (Dense) (None, None, 4502) 1355102 Decoder-Batchnorm-2[0][0] \n", 571 | "==================================================================================================\n", 572 | "Total params: 6,191,702\n", 573 | "Trainable params: 6,189,902\n", 574 | "Non-trainable params: 1,800\n", 575 | "__________________________________________________________________________________________________\n" 576 | ] 577 | }, 578 | { 579 | "data": { 580 | "image/svg+xml": [ 581 | "\n", 582 | "\n", 583 | "G\n", 584 | "\n", 585 | "\n", 586 | "140465408816968\n", 587 | "\n", 588 | "Decoder-Input: InputLayer\n", 589 | "\n", 590 | "\n", 591 | "140462192858392\n", 592 | "\n", 593 | "Decoder-Word-Embedding: Embedding\n", 594 | "\n", 595 | "\n", 596 | "140465408816968->140462192858392\n", 597 | "\n", 598 | "\n", 599 | "\n", 600 | "\n", 601 | "140454723112688\n", 602 | "\n", 603 | "Decoder-Batchnorm-1: BatchNormalization\n", 604 | "\n", 605 | "\n", 606 | "140462192858392->140454723112688\n", 607 | "\n", 608 | "\n", 609 | "\n", 610 | "\n", 611 | "140462193067792\n", 612 | "\n", 613 | "Encoder-Input: InputLayer\n", 614 | "\n", 615 | "\n", 616 | "140462193062128\n", 617 | "\n", 618 | "Encoder-Model: Model\n", 619 | "\n", 620 | "\n", 621 | "140462193067792->140462193062128\n", 622 | "\n", 623 | "\n", 624 | "\n", 625 | "\n", 626 | "140465622676536\n", 627 | "\n", 628 | "Decoder-GRU: GRU\n", 629 | "\n", 630 | "\n", 631 | "140454723112688->140465622676536\n", 632 | "\n", 633 | "\n", 634 | "\n", 635 | "\n", 636 | "140462193062128->140465622676536\n", 637 | "\n", 638 | "\n", 639 | "\n", 640 | "\n", 641 | "140462190791816\n", 642 | "\n", 643 | "Decoder-Batchnorm-2: BatchNormalization\n", 644 | "\n", 645 | "\n", 646 | "140465622676536->140462190791816\n", 647 | "\n", 648 | "\n", 649 | "\n", 650 | "\n", 651 | "140462190791200\n", 652 | "\n", 653 | "Final-Output-Dense: Dense\n", 654 | "\n", 655 | "\n", 656 | "140462190791816->140462190791200\n", 657 | "\n", 658 | "\n", 659 | "\n", 660 | "\n", 661 | "" 662 | ], 663 | "text/plain": [ 664 | "" 665 | ] 666 | }, 667 | "metadata": {}, 668 | "output_type": "display_data" 669 | } 670 | ], 671 | "source": [ 672 | "from seq2seq_utils import viz_model_architecture\n", 673 | "seq2seq_Model.summary()\n", 674 | "viz_model_architecture(seq2seq_Model)" 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": {}, 680 | "source": [ 681 | "# Train Model" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 22, 687 | "metadata": {}, 688 | "outputs": [ 689 | { 690 | "name": "stdout", 691 | "output_type": "stream", 692 | "text": [ 693 | "Train on 1584000 samples, validate on 216000 samples\n", 694 | "Epoch 1/7\n", 695 | "1584000/1584000 [==============================] - 265s 167us/step - loss: 2.7234 - val_loss: 2.4321\n" 696 | ] 697 | }, 698 | { 699 | "name": "stderr", 700 | "output_type": "stream", 701 | "text": [ 702 | "/ds/.local/lib/python3.6/site-packages/keras/engine/topology.py:2344: UserWarning: Layer Decoder-GRU was passed non-serializable keyword arguments: {'initial_state': []}. They will not be included in the serialized model (and thus will be missing at deserialization time).\n", 703 | " str(node.arguments) + '. They will not be included '\n" 704 | ] 705 | }, 706 | { 707 | "name": "stdout", 708 | "output_type": "stream", 709 | "text": [ 710 | "Epoch 2/7\n", 711 | "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.3446 - val_loss: 2.3563\n", 712 | "Epoch 3/7\n", 713 | "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.2608 - val_loss: 2.3281\n", 714 | "Epoch 4/7\n", 715 | "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.2117 - val_loss: 2.3161\n", 716 | "Epoch 5/7\n", 717 | "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.1767 - val_loss: 2.3110\n", 718 | "Epoch 6/7\n", 719 | "1584000/1584000 [==============================] - 263s 166us/step - loss: 2.1494 - val_loss: 2.3095\n", 720 | "Epoch 7/7\n", 721 | "1584000/1584000 [==============================] - 265s 167us/step - loss: 2.1268 - val_loss: 2.3124\n" 722 | ] 723 | } 724 | ], 725 | "source": [ 726 | "from keras.callbacks import CSVLogger, ModelCheckpoint\n", 727 | "\n", 728 | "script_name_base = 'tutorial_seq2seq'\n", 729 | "csv_logger = CSVLogger('{:}.log'.format(script_name_base))\n", 730 | "model_checkpoint = ModelCheckpoint('{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base),\n", 731 | " save_best_only=True)\n", 732 | "\n", 733 | "batch_size = 1200\n", 734 | "epochs = 7\n", 735 | "history = seq2seq_Model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_target_data, -1),\n", 736 | " batch_size=batch_size,\n", 737 | " epochs=epochs,\n", 738 | " validation_split=0.12, callbacks=[csv_logger, model_checkpoint])" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 23, 744 | "metadata": {}, 745 | "outputs": [ 746 | { 747 | "name": "stderr", 748 | "output_type": "stream", 749 | "text": [ 750 | "/ds/.local/lib/python3.6/site-packages/keras/engine/topology.py:2344: UserWarning: Layer Decoder-GRU was passed non-serializable keyword arguments: {'initial_state': []}. They will not be included in the serialized model (and thus will be missing at deserialization time).\n", 751 | " str(node.arguments) + '. They will not be included '\n" 752 | ] 753 | } 754 | ], 755 | "source": [ 756 | "#save model\n", 757 | "seq2seq_Model.save('seq2seq_model_tutorial.h5')" 758 | ] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "# See Results On Holdout Set" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": 27, 770 | "metadata": { 771 | "collapsed": true 772 | }, 773 | "outputs": [], 774 | "source": [ 775 | "from seq2seq_utils import Seq2Seq_Inference\n", 776 | "seq2seq_inf = Seq2Seq_Inference(encoder_preprocessor=body_pp,\n", 777 | " decoder_preprocessor=title_pp,\n", 778 | " seq2seq_model=seq2seq_Model)" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 34, 784 | "metadata": { 785 | "scrolled": false 786 | }, 787 | "outputs": [ 788 | { 789 | "name": "stdout", 790 | "output_type": "stream", 791 | "text": [ 792 | "\n", 793 | "\n", 794 | "==============================================\n", 795 | "============== Example # 137237 =================\n", 796 | "\n", 797 | "\"https://github.com/envisionnw/upland/issues/90\"\n", 798 | "Issue Body:\n", 799 | " issue by ncpn https://github.com/ncpn _friday mar 17, 2017 at 19:31 gmt_\n", 800 | "_originally opened as https://github.com/ncpn/upland/issues/90_ ---- check for odd species after completing the plot. compare with list of prior year's species. paper list \n", 801 | "\n", 802 | "Original Title:\n", 803 | " end of plot issues - identify odd species\n", 804 | "\n", 805 | "****** Machine Generated Title (Prediction) ******:\n", 806 | " closed species plot not working\n", 807 | "\n", 808 | "\n", 809 | "==============================================\n", 810 | "============== Example # 132413 =================\n", 811 | "\n", 812 | "\"https://github.com/open-organization-ambassadors/open-org-it-culture/issues/38\"\n", 813 | "Issue Body:\n", 814 | " need to include a specific call to the opensource.com writers list during the announcement part of the book series process. \n", 815 | "\n", 816 | "Original Title:\n", 817 | " update announcement process\n", 818 | "\n", 819 | "****** Machine Generated Title (Prediction) ******:\n", 820 | " add a list of the book series to the book\n", 821 | "\n", 822 | "\n", 823 | "==============================================\n", 824 | "============== Example # 110893 =================\n", 825 | "\n", 826 | "\"https://github.com/arquillian/arquillian-cube/issues/795\"\n", 827 | "Issue Body:\n", 828 | " issue overview add a new property to disable detection of image stream files those ended with -is.yml from target directory. expected behaviour by default cube should not process image stream files if user does not set it. current behaviour cube always try to execute -is.yml files which can cause some problems in most of cases, for example if you are using kuberentes instead of openshift or if you use together fabric8 maven plugin with cube. \n", 829 | "\n", 830 | "Original Title:\n", 831 | " add a new property to disable detection of image stream files\n", 832 | "\n", 833 | "****** Machine Generated Title (Prediction) ******:\n", 834 | " add a way to disable image detection\n", 835 | "\n", 836 | "\n", 837 | "==============================================\n", 838 | "============== Example # 179062 =================\n", 839 | "\n", 840 | "\"https://github.com/TryGhost/Ghost/issues/9299\"\n", 841 | "Issue Body:\n", 842 | " in ghost 1.0 we set out to get rid of incremental ids. we didn't quite achieve it, as the migrations table still uses it, and i believe there is still some hardcoded expectations around the ghost owner id. regarding incremental ids in the migrations table i raised an issue on knex migrator: https://github.com/tryghost/knex-migrator/issues/91 we need to also try to get rid of reliance on ids inside of ghost itself. this issue needs more detail really - raising it as a starting point. \n", 843 | "\n", 844 | "Original Title:\n", 845 | " remove all reliance on incremental ids\n", 846 | "\n", 847 | "****** Machine Generated Title (Prediction) ******:\n", 848 | " incremental migration to oracle db\n", 849 | "\n", 850 | "\n", 851 | "==============================================\n", 852 | "============== Example # 54381 =================\n", 853 | "\n", 854 | "\"https://github.com/googlevr/gvr-unity-sdk/issues/509\"\n", 855 | "Issue Body:\n", 856 | " hi, i'm trying to get the deep link working. i can send the activity, open the app and read dashcode and get booleanextra and all that. so activating the deep link works fine and for example when i call getaction, it returns android.intent.action.view which is correct. the main problem is that getdatastring and getscheme always return null. i'm out of test ideas. do you think its a bug? i have attached the manifest file for your reference. and i'm using gvrintent.getdata that always returns null. islaunchedfromvr and getintenthashcode are working fine. and this is the command line i used to test as an example: ./adb shell am start -w -a android.intent.action.view -d shapevisual://com.shapevisual.app?wl=gfs com.shapevisual.app androidmanifest.xml.txt https://github.com/googlevr/gvr-unity-sdk/files/864522/androidmanifest.xml.txt \n", 857 | "\n", 858 | "Original Title:\n", 859 | " android - deep link - getdatastring always returns null\n", 860 | "\n", 861 | "****** Machine Generated Title (Prediction) ******:\n", 862 | " deep link and null return\n", 863 | "\n", 864 | "\n", 865 | "==============================================\n", 866 | "============== Example # 113341 =================\n", 867 | "\n", 868 | "\"https://github.com/sten626/mirror-match/issues/26\"\n", 869 | "Issue Body:\n", 870 | " right now there is no logging of any kind. read up on proper app logging in angular and add it to the app. \n", 871 | "\n", 872 | "Original Title:\n", 873 | " add logging to app\n", 874 | "\n", 875 | "****** Machine Generated Title (Prediction) ******:\n", 876 | " add logging to app\n", 877 | "\n", 878 | "\n", 879 | "==============================================\n", 880 | "============== Example # 57566 =================\n", 881 | "\n", 882 | "\"https://github.com/convox/praxis/issues/319\"\n", 883 | "Issue Body:\n", 884 | " a pro user has expressed a need for this. \n", 885 | "\n", 886 | "Original Title:\n", 887 | " support for ev green bar ssl certs\n", 888 | "\n", 889 | "****** Machine Generated Title (Prediction) ******:\n", 890 | " add a new user\n", 891 | "\n", 892 | "\n", 893 | "==============================================\n", 894 | "============== Example # 199162 =================\n", 895 | "\n", 896 | "\"https://github.com/ChurchCRM/CRM/issues/2403\"\n", 897 | "Issue Body:\n", 898 | " im presently upgrading to 2.7.2 from 2.7.1, my automated update is not working from my site, so im just uploading the new code and replacing the .htaccess and config.php . is there any other changes that i should be aware of to make sure that the update is successfull ? \n", 899 | "\n", 900 | "Original Title:\n", 901 | " upgrading to 2.7.2\n", 902 | "\n", 903 | "****** Machine Generated Title (Prediction) ******:\n", 904 | " question : how to update the code ?\n", 905 | "\n", 906 | "\n", 907 | "==============================================\n", 908 | "============== Example # 187512 =================\n", 909 | "\n", 910 | "\"https://github.com/keepassxreboot/keepassxc/issues/693\"\n", 911 | "Issue Body:\n", 912 | " i tried to enable and use yubikey on snap version but yubikey doesn't shows on the list of valid devices. i done a comparison with debian package same version 2.2.0 and works fine. so i think the problem could be related to snap access to usb devices. \n", 913 | "\n", 914 | "Original Title:\n", 915 | " yubikey doesn't works on snap version\n", 916 | "\n", 917 | "****** Machine Generated Title (Prediction) ******:\n", 918 | " snap version does n't work on ubuntu * number *\n", 919 | "\n", 920 | "\n", 921 | "==============================================\n", 922 | "============== Example # 18015 =================\n", 923 | "\n", 924 | "\"https://github.com/primefaces/primeng/issues/4456\"\n", 925 | "Issue Body:\n", 926 | " hi folks,

advanced

  • {{file.name}} - {{file.size}} bytes
onuploadhandler event:any { alert 'test' ; for let file of event.files { this.uploadedfiles.push file ; } } onuploadhandler function is not working. what's wrong? i am using primeng - ^5.0.0-rc.0. any idea? \n", 927 | "\n", 928 | "Original Title:\n", 929 | " onupload function is not working!\n", 930 | "\n", 931 | "****** Machine Generated Title (Prediction) ******:\n", 932 | " how to use this with multiple angular - cli * number *\n", 933 | "\n", 934 | "\n", 935 | "==============================================\n", 936 | "============== Example # 153048 =================\n", 937 | "\n", 938 | "\"https://github.com/imabug/raddb/issues/212\"\n", 939 | "Issue Body:\n", 940 | " adding a new test type is broken. this error is produced when the submit button is pressed. httpexception in handler.php line 133: this action is unauthorized. \n", 941 | "\n", 942 | "Original Title:\n", 943 | " error adding new test type\n", 944 | "\n", 945 | "****** Machine Generated Title (Prediction) ******:\n", 946 | " new test issue\n", 947 | "\n", 948 | "\n", 949 | "==============================================\n", 950 | "============== Example # 28004 =================\n", 951 | "\n", 952 | "\"https://github.com/Carthage/Carthage/issues/1936\"\n", 953 | "Issue Body:\n", 954 | " one of the more confusing parts around how carthage installation works currently is that it requires a dylib itself with many more embedded dylibs to be installed alongside the carthage binary carthagekit.framework . if we have support for 1379, would it be possible to statically link carthage's dependencies into the primary carthage binary? if so, it could mean that there would be only one file to install. the primary unanswered question in my mind is whether it's possible to statically link against the swift core dylibs. does anyone know if there's an exposed way to do this? they seem to be present at this path /applications/xcode.app/contents/developer/toolchains/xcodedefault.xctoolchain/usr/lib/swift_static/macosx , so perhaps it is as simple as linking against these .a files. it would be a little strange for carthage to use the non-default flow for embedding built frameworks, but it is a cli rather than an app so perhaps this may be a good choice for this scenario. additionally, this would likely resolve issues where another version of carthagekit.framework steaks into a user's @rpath before the one that they just downloaded, causing a new version of the carthage binary to use the wrong version of carthagekit.framework . thoughts? thanks for reading. \n", 955 | "\n", 956 | "Original Title:\n", 957 | " statically link carthage frameworks into carthage?\n", 958 | "\n", 959 | "****** Machine Generated Title (Prediction) ******:\n", 960 | " add carthage support for carthage\n", 961 | "\n", 962 | "\n", 963 | "==============================================\n", 964 | "============== Example # 131367 =================\n", 965 | "\n", 966 | "\"https://github.com/qlicker/qlicker/issues/341\"\n", 967 | "Issue Body:\n", 968 | " when looking at the course details as a professor, the message add ta to ... shows up when clicking the add student button. \n", 969 | "\n", 970 | "Original Title:\n", 971 | " add student to course displays the wrong message\n", 972 | "\n", 973 | "****** Machine Generated Title (Prediction) ******:\n", 974 | " add ta to ta course\n", 975 | "\n", 976 | "\n", 977 | "==============================================\n", 978 | "============== Example # 82152 =================\n", 979 | "\n", 980 | "\"https://github.com/RetroWoW/RetroWoW/issues/96\"\n", 981 | "Issue Body:\n", 982 | " description : hunter pets are not summoned upon res in battleground current behaviour : hunter pets are not summoned upon res in battleground expected behaviour : spirit guides in battlegrounds should summon/resurrect your current pet when the hunter is resurrected. steps to reproduce the problem : 1. die in a bg 2. get resurected 3. source: http://wowwiki.wikia.com/wiki/patch_1.5.0 \n", 983 | "\n", 984 | "Original Title:\n", 985 | " hunter pets are not summoned upon res in battleground\n", 986 | "\n", 987 | "****** Machine Generated Title (Prediction) ******:\n", 988 | " * number*.2 * number*.5 not possible to be used in a new\n", 989 | "\n", 990 | "\n", 991 | "==============================================\n", 992 | "============== Example # 160809 =================\n", 993 | "\n", 994 | "\"https://github.com/kubernetes/ingress-nginx/issues/1825\"\n", 995 | "Issue Body:\n", 996 | " ie 11 does not support permanent redirect 308 with default headers, so it might not be the best default. it was introduced in this pull request: https://github.com/kubernetes/ingress-nginx/pull/1776 you could also support a fall back mode based on user agent: https://stackoverflow.com/questions/37701100/redirecting-ie-7-and-ie-11-by-useragent-nginx-config it might be possible to get ie 11 to support permanent redirect 308 if the redirect page presented does not trigger compatibility mode, but older versions of ie still won't support 308. \n", 997 | "\n", 998 | "Original Title:\n" 999 | ] 1000 | }, 1001 | { 1002 | "name": "stdout", 1003 | "output_type": "stream", 1004 | "text": [ 1005 | " permanent redirect 308 not supported in ie11\n", 1006 | "\n", 1007 | "****** Machine Generated Title (Prediction) ******:\n", 1008 | " redirect to * number * redirect does not work\n", 1009 | "\n", 1010 | "\n", 1011 | "==============================================\n", 1012 | "============== Example # 197532 =================\n", 1013 | "\n", 1014 | "\"https://github.com/ngrx/platform/issues/49\"\n", 1015 | "Issue Body:\n", 1016 | " export const selectfeature = createfeatureselector 'feature' ; ~~~~~~~~~~~~~~~ error ts4023: exported variable 'selectfeature' has or is using name 'memoizedselector' from external module .../ngrx/modules/store/src/selector but cannot be named. \n", 1017 | "\n", 1018 | "Original Title:\n", 1019 | " memoizedselector needs to be exported as well\n", 1020 | "\n", 1021 | "****** Machine Generated Title (Prediction) ******:\n", 1022 | " export ' ' : ' can not be used in ' module\n", 1023 | "\n", 1024 | "\n", 1025 | "==============================================\n", 1026 | "============== Example # 163719 =================\n", 1027 | "\n", 1028 | "\"https://github.com/aspnet/StaticFiles/issues/211\"\n", 1029 | "Issue Body:\n", 1030 | " staticfiles/src/microsoft.aspnet.staticfiles/fileextensioncontenttypeprovider.cs is missing the outlook .msg mimetype - currently manually doing the following: var provider = new fileextensioncontenttypeprovider ; provider.mappings.add .msg , application/vnd.ms-outlook ; ... but i think it would be good to have it included directly in the code. \n", 1031 | "\n", 1032 | "Original Title:\n", 1033 | " missing .msg mimetype mapping\n", 1034 | "\n", 1035 | "****** Machine Generated Title (Prediction) ******:\n", 1036 | " missing outlook / auto - parsing of the source - code\n", 1037 | "\n", 1038 | "\n", 1039 | "==============================================\n", 1040 | "============== Example # 169328 =================\n", 1041 | "\n", 1042 | "\"https://github.com/epics-modules/autosave/issues/13\"\n", 1043 | "Issue Body:\n", 1044 | " tech talk message as follows: > hello, > > > here at slac, we saw that autosave is failing to recover the data for a waveform with 1 element. for testing purposes, we changed manually nelm to 2 and the recovery succeeded. another test was to manually edit the sav file, adding the keyword @array@ and the recovering succeeded, too.​ > > > i saw the following comment in 5.4.1 release: previously, restoring an array which had been saved with zero or one values failed. also, manual restore including restore by configmenu of any array pv caused a seg fault. . > > > as we are using 5.7.1, i think this problem is already corrected since 5.4.1. the behavior was observed when using epics 3.15. > > > the strange thing is that the same version of autosave seems to be working in epics 3.14, but not in 3.15. > > > i saw that autosave uses ca_element_count from the channel access api. maybe something changed in this function in epics 3.15? > > > thank you for your help. > > > márcio paduan donadio > > system control engineer - slac > \n", 1045 | "\n", 1046 | "Original Title:\n", 1047 | " recovering data from waveform with 1 element\n", 1048 | "\n", 1049 | "****** Machine Generated Title (Prediction) ******:\n", 1050 | " recover the data from the * number *\n", 1051 | "\n", 1052 | "\n", 1053 | "==============================================\n", 1054 | "============== Example # 85076 =================\n", 1055 | "\n", 1056 | "\"https://github.com/kristoferjoseph/flexboxgrid/issues/233\"\n", 1057 | "Issue Body:\n", 1058 | " hello! when i using auto width:
1
2
in chrome, firefox, vivaldi and android devices, all ok - content is transferred as filling: ! screenshot at 15 15-23-31 https://cloud.githubusercontent.com/assets/13396947/22974363/ff5ee854-f392-11e6-91ef-01844d8f655d.png but in safari om macos , displayed content in one row and add horizontal scroll: ! screenshot at 15 15-27-25 https://cloud.githubusercontent.com/assets/13396947/22974432/4ff8d496-f393-11e6-8abe-c04a6029d9ef.png how can i fix it? \n", 1059 | "\n", 1060 | "Original Title:\n", 1061 | " content filling on safari\n", 1062 | "\n", 1063 | "****** Machine Generated Title (Prediction) ******:\n", 1064 | " table not working\n", 1065 | "\n", 1066 | "\n", 1067 | "==============================================\n", 1068 | "============== Example # 13218 =================\n", 1069 | "\n", 1070 | "\"https://github.com/koorellasuresh/UKRegionTest/issues/82803\"\n", 1071 | "Issue Body:\n", 1072 | " first from flow in uk south \n", 1073 | "\n", 1074 | "Original Title:\n", 1075 | " first from flow in uk south\n", 1076 | "\n", 1077 | "****** Machine Generated Title (Prediction) ******:\n", 1078 | " first from flow in uk south\n", 1079 | "\n", 1080 | "\n", 1081 | "==============================================\n", 1082 | "============== Example # 193511 =================\n", 1083 | "\n", 1084 | "\"https://github.com/highcharts/highcharts/issues/7347\"\n", 1085 | "Issue Body:\n", 1086 | " i'm using highstockcharts and recently upgraded to v6.0.3. since then, the tooltips won't be shown anymore as soon as the tooltip is higher than the actual chart. see the minimum example which i've provided. expected behaviour the tooltip should be shown. actual behaviour the tooltip is not shown if the tooltip the height is larger than the actual chart. live demo with steps to reproduce http://jsfiddle.net/n1h3q3sr/ uncomment the part teststring +=
not working anymore to make the tooltip visible. affected browser s chrome / firefox and most probably ie too \n", 1087 | "\n", 1088 | "Original Title:\n", 1089 | " tooltip is not shown anymore if tooltip is larger than the chart\n", 1090 | "\n", 1091 | "****** Machine Generated Title (Prediction) ******:\n", 1092 | " tooltip not shown on * number *\n", 1093 | "\n", 1094 | "\n", 1095 | "==============================================\n", 1096 | "============== Example # 7320 =================\n", 1097 | "\n", 1098 | "\"https://github.com/Criccle/GoogleCombo/issues/1\"\n", 1099 | "Issue Body:\n", 1100 | " unlike google chart for mendix, google combo chart for mendix cannot redraw a chart. only one chart can be drawn only once but no redraw or two charts in a page is possible. thus, this module is useless at all with this condition. \n", 1101 | "\n", 1102 | "Original Title:\n", 1103 | " cannot redraw a chart by google combo chart for mendix\n", 1104 | "\n", 1105 | "****** Machine Generated Title (Prediction) ******:\n", 1106 | " google charts not working\n", 1107 | "\n", 1108 | "\n", 1109 | "==============================================\n", 1110 | "============== Example # 42159 =================\n", 1111 | "\n", 1112 | "\"https://github.com/cviebrock/eloquent-sluggable/issues/337\"\n", 1113 | "Issue Body:\n", 1114 | " hello! i have a model with multiple slug fields setup like this: return 'slug_en' => 'source' => 'name_en' , 'slug_es' => 'source' => 'name_es' , 'slug_fr' => 'source' => 'name_fr' , 'slug_it' => 'source' => 'name_it' , 'slug_de' => 'source' => 'name_de' , ; i want to findbyslug on all of them, i have tried with slugkeyname but no luck. is there something im missing? thank you \n", 1115 | "\n", 1116 | "Original Title:\n", 1117 | " find on multiple slug fields\n", 1118 | "\n", 1119 | "****** Machine Generated Title (Prediction) ******:\n", 1120 | " multiple fields with same name\n", 1121 | "\n", 1122 | "\n", 1123 | "==============================================\n", 1124 | "============== Example # 184774 =================\n", 1125 | "\n", 1126 | "\"https://github.com/hylang/hy/issues/1271\"\n", 1127 | "Issue Body:\n", 1128 | " it was released in 2008, so it's almost 10 years old. also, we don't test it. \n", 1129 | "\n", 1130 | "Original Title:\n", 1131 | " drop support for python 2.6\n", 1132 | "\n", 1133 | "****** Machine Generated Title (Prediction) ******:\n", 1134 | " remove old version from * number *\n", 1135 | "\n", 1136 | "\n", 1137 | "==============================================\n", 1138 | "============== Example # 121668 =================\n", 1139 | "\n", 1140 | "\"https://github.com/MajkiIT/polish-ads-filter/issues/3646\"\n", 1141 | "Issue Body:\n", 1142 | " @majkiit w prebake jest reguła, która psuje logowanie na gg. a najwyraźniej są jeszcze osoby, które korzystają z gg i z listy prebake. więc nie wiem czy warto dać whitelist na nasz filtr czy nie, co o tym sądzisz? https://github.com/azet12/popupblocker/issues/68 issuecomment-329763381 \n", 1143 | "\n", 1144 | "Original Title:\n", 1145 | " gg.pl prebake\n", 1146 | "\n", 1147 | "****** Machine Generated Title (Prediction) ******:\n", 1148 | " problem z login\n", 1149 | "\n", 1150 | "\n", 1151 | "==============================================\n", 1152 | "============== Example # 34871 =================\n", 1153 | "\n", 1154 | "\"https://github.com/WorldDominationArmy/geodk-reqtest-req/issues/1\"\n", 1155 | "Issue Body:\n", 1156 | " afsnit: 3. krav til løsningens overordnede egenskaber relateret: \n", 1157 | "\n", 1158 | "Original Title:\n", 1159 | " krav 1-eksterne kilder til datasupplering\n", 1160 | "\n", 1161 | "****** Machine Generated Title (Prediction) ******:\n", 1162 | " * number * - * number * - * number * -\n", 1163 | "\n", 1164 | "\n", 1165 | "==============================================\n", 1166 | "============== Example # 7978 =================\n", 1167 | "\n", 1168 | "\"https://github.com/blockstack/blockstack-portal/issues/416\"\n", 1169 | "Issue Body:\n", 1170 | " i noticed that gmp is installed by the macos installer script. noticed that the library was not loaded https://github.com/blockstack/blockstack-portal/issues/415 issuecomment-294392702 for albert: library not loaded: /usr/local/opt/gmp/lib/libgmp.10.dylib referenced from: /private/tmp/blockstack-venv/lib/python2.7/site-packages/fastecdsa/curvemath.so reason: image not found he is on macos 10.12. let's see if we can reproduce this error locally. \n", 1171 | "\n", 1172 | "Original Title:\n", 1173 | " testing gmp and libffi installation via script\n", 1174 | "\n", 1175 | "****** Machine Generated Title (Prediction) ******:\n", 1176 | " library not loaded in macos\n", 1177 | "\n", 1178 | "\n", 1179 | "==============================================\n", 1180 | "============== Example # 28099 =================\n", 1181 | "\n", 1182 | "\"https://github.com/EcrituresNumeriques/transformation_jats_erudit/issues/2\"\n", 1183 | "Issue Body:\n", 1184 | " avons-nous une liste définitive des attributs possible de 'fig-type' pour l'extrant de jats? le balisage de mon côté, pour érudit, dépend de la valeur sémantique de l'attribut de cette balise et je voudrais pouvoir styler les différents cas de figures haha , qui sont :
, , , , pour les images et le son. merci. \n", 1185 | "\n", 1186 | "Original Title:\n", 1187 | " attributs possibles pour sous jats\n", 1188 | "\n", 1189 | "****** Machine Generated Title (Prediction) ******:\n", 1190 | " * number * : gestion des dates\n", 1191 | "\n", 1192 | "\n", 1193 | "==============================================\n", 1194 | "============== Example # 24459 =================\n", 1195 | "\n", 1196 | "\"https://github.com/go-gitea/gitea/issues/656\"\n", 1197 | "Issue Body:\n", 1198 | " when adding a new member to an organisation owner team, addteammember does not set watches for the new team member. together with 653 that is pretty confusing behaviour and probably a bug. \n", 1199 | "\n", 1200 | "Original Title:\n", 1201 | " new owner team member does not get watches for org repo's\n" 1202 | ] 1203 | }, 1204 | { 1205 | "name": "stdout", 1206 | "output_type": "stream", 1207 | "text": [ 1208 | "\n", 1209 | "****** Machine Generated Title (Prediction) ******:\n", 1210 | " new member does not set the team member\n", 1211 | "\n", 1212 | "\n", 1213 | "==============================================\n", 1214 | "============== Example # 64152 =================\n", 1215 | "\n", 1216 | "\"https://github.com/linuxboss182/SoftEng-2017/issues/84\"\n", 1217 | "Issue Body:\n", 1218 | " need 3-4 people to present our application to the class on wednesday. applicants must: - not have presented last week - understand how to use the application - be ready to kick ass remember, you have to present at either this wednesday or the next one, so plan accordingly! \n", 1219 | "\n", 1220 | "Original Title:\n", 1221 | " iteration 2 presentation\n", 1222 | "\n", 1223 | "****** Machine Generated Title (Prediction) ******:\n", 1224 | " add a new class to the application\n", 1225 | "\n", 1226 | "\n", 1227 | "==============================================\n", 1228 | "============== Example # 69032 =================\n", 1229 | "\n", 1230 | "\"https://github.com/kartoza/qgis.org.za/issues/184\"\n", 1231 | "Issue Body:\n", 1232 | " i created a form 'contact' and it seems to work but the form labels do not appear on the form so it is a bit useless. please get the labels to appear and merge and release with other improvements asap \n", 1233 | "\n", 1234 | "Original Title:\n", 1235 | " form labels not appearing\n", 1236 | "\n", 1237 | "****** Machine Generated Title (Prediction) ******:\n", 1238 | " form labels not showing up\n", 1239 | "\n", 1240 | "\n", 1241 | "==============================================\n", 1242 | "============== Example # 132252 =================\n", 1243 | "\n", 1244 | "\"https://github.com/NTU-ASH/tree-generator/issues/18\"\n", 1245 | "Issue Body:\n", 1246 | " sort a series of node values within the tree, e.g. -take values from 0-9 up to 15 -sort them into a tree with the middle value as the root and the lowest on the left/highest on the right -perhaps do the same for letters so a is to the left and z is to the right \n", 1247 | "\n", 1248 | "Original Title:\n", 1249 | " binary search tree generation\n", 1250 | "\n", 1251 | "****** Machine Generated Title (Prediction) ******:\n", 1252 | " sort tree nodes\n", 1253 | "\n", 1254 | "\n", 1255 | "==============================================\n", 1256 | "============== Example # 53765 =================\n", 1257 | "\n", 1258 | "\"https://github.com/multiformats/multihash/issues/74\"\n", 1259 | "Issue Body:\n", 1260 | " why not use the existing crypt format? $.$ \n", 1261 | "\n", 1262 | "Original Title:\n", 1263 | " why not use the existing crypt format? $.$\n", 1264 | "\n", 1265 | "****** Machine Generated Title (Prediction) ******:\n", 1266 | " why not use the existing format ?\n", 1267 | "\n", 1268 | "\n", 1269 | "==============================================\n", 1270 | "============== Example # 123370 =================\n", 1271 | "\n", 1272 | "\"https://github.com/PSEBergclubBern/BergclubBern/issues/181\"\n", 1273 | "Issue Body:\n", 1274 | " ich kann bilder einfügen: ! 2017-05-07 14_01_33-tourenbericht anpassen bergclub bern wordpress https://cloud.githubusercontent.com/assets/18282099/25780754/d2260da6-332d-11e7-8350-f46821b300d5.png aber auf der website werden diese nicht angezeigt: ! 2017-05-07 14_00_32-bergclub bern https://cloud.githubusercontent.com/assets/18282099/25780756/defc015c-332d-11e7-982e-e51b758c8179.png \n", 1275 | "\n", 1276 | "Original Title:\n", 1277 | " bilder eines tourenberichts werden nicht angezeigt\n", 1278 | "\n", 1279 | "****** Machine Generated Title (Prediction) ******:\n", 1280 | " website : update to * url *\n", 1281 | "\n", 1282 | "\n", 1283 | "==============================================\n", 1284 | "============== Example # 57636 =================\n", 1285 | "\n", 1286 | "\"https://github.com/postmanlabs/postman-app-support/issues/2996\"\n", 1287 | "Issue Body:\n", 1288 | " welcome to the postman issue tracker. any feature requests / bug reports can be posted here. any security-related bugs should be reported directly to security@getpostman.com version/app information: 1. postman version: 4.10.7 2. app chrome app or mac app : linux app not sure if its also happening on other oss 3. os details: ubuntu 14.06 4. is the interceptor on and enabled in the app: no 5. did you encounter this recently, or has this bug always been there: 6. expected behaviour: explain below steps to repoduce 7. console logs http://blog.getpostman.com/2014/01/27/enabling-chrome-developer-tools-inside-postman/ for the chrome app, view->toggle dev tools for the mac app : 8. screenshots if applicable steps to reproduce the problem: it seems postman ignores the failures if there is 1<= passed test after the failed assertion. i.e: assertion a a=true assertion b=false must fails the test assertion c c=true the final outcome of the postman test must be false because b failed. but postman shows the final results as passed because it looks at c which was true as the last line of the test which is wrong and the test easily ignores any bug and marks the test as successfull. some guidelines: 1. please file newman-related issues at https://github.com/postmanlabs/newman/issues 2. if it’s a cloud-related issue, or you want to include personal information like your username / collection names, mail us at help@getpostman.com 3. if it’s a question anything along the lines of “how do i … in postman” , the answer might lie in our documentation - http://getpostman.com/docs. \n", 1289 | "\n", 1290 | "Original Title:\n", 1291 | " postman is skiping the failed assestions if the last assersion passes\n", 1292 | "\n", 1293 | "****** Machine Generated Title (Prediction) ******:\n", 1294 | " feature request : add support for multiple devices\n", 1295 | "\n", 1296 | "\n", 1297 | "==============================================\n", 1298 | "============== Example # 120461 =================\n", 1299 | "\n", 1300 | "\"https://github.com/libgraviton/gdk-java/issues/23\"\n", 1301 | "Issue Body:\n", 1302 | " with 12 rql support was introduced for string and date fields. since the rql syntax varies depending on the field type, integer and float and boolean are currently not supported, since they get treated as regular string fields. lets have a look at a typical query against a string field _fieldname_ with the value _value_ ?eq fieldname,string:value in this case the string: prefix is not required. it has the same result as ?eq fieldname,value but lets look at another example again a string field ?eq fieldname,string:20 at this point the string: prefix is required, since the graviton rql parser needs to know it's dealing with a string. omitting string: would lead to an empty result on the other hand, if we look at an integer field ?eq integerfieldname,string:20 would lead to an empty result. in this case the query needs to look like ?eq integerfieldname,string:20 the part that needs changing is https://github.com/libgraviton/gdk-java/blob/develop/gdk-core/src/main/java/com/github/libgraviton/gdk/api/query/rql/rql.java l141 where currently every field is always treated as string. \n", 1303 | "\n", 1304 | "Original Title:\n", 1305 | " integer, float and boolean support for rql\n", 1306 | "\n", 1307 | "****** Machine Generated Title (Prediction) ******:\n", 1308 | " support for numeric type\n", 1309 | "\n", 1310 | "\n", 1311 | "==============================================\n", 1312 | "============== Example # 3333 =================\n", 1313 | "\n", 1314 | "\"https://github.com/jpvillaisaza/hangman/issues/15\"\n", 1315 | "Issue Body:\n", 1316 | " losing a game and then restarting shouldn't count as two more games. just one, thanks. \n", 1317 | "\n", 1318 | "Original Title:\n", 1319 | " fix total number of games\n", 1320 | "\n", 1321 | "****** Machine Generated Title (Prediction) ******:\n", 1322 | " game crashes when game is running\n", 1323 | "\n", 1324 | "\n", 1325 | "==============================================\n", 1326 | "============== Example # 133450 =================\n", 1327 | "\n", 1328 | "\"https://github.com/vector-im/riot-meta/issues/28\"\n", 1329 | "Issue Body:\n", 1330 | " placeholder overarching issue to track progress on: general ux polish should probably be decomposed further. \n", 1331 | "\n", 1332 | "Original Title:\n", 1333 | " general ux polish\n", 1334 | "\n", 1335 | "****** Machine Generated Title (Prediction) ******:\n", 1336 | " add more info to the ui\n", 1337 | "\n", 1338 | "\n", 1339 | "==============================================\n", 1340 | "============== Example # 111482 =================\n", 1341 | "\n", 1342 | "\"https://github.com/Viva-con-Agua/drops/issues/21\"\n", 1343 | "Issue Body:\n", 1344 | " currently, the view for defining the roles is very confusing. a search field for searching users has to be implemented and the role selection should be a little bit more user friendly. \n", 1345 | "\n", 1346 | "Original Title:\n", 1347 | " roles definition view\n", 1348 | "\n", 1349 | "****** Machine Generated Title (Prediction) ******:\n", 1350 | " improve search for user roles\n", 1351 | "\n", 1352 | "\n", 1353 | "==============================================\n", 1354 | "============== Example # 154925 =================\n", 1355 | "\n", 1356 | "\"https://github.com/srusskih/SublimeJEDI/issues/228\"\n", 1357 | "Issue Body:\n", 1358 | " i want edit my project config file. according to the readme , by default project config name is .sublime-project , so the project is the folder that holds the project py file? \n", 1359 | "\n", 1360 | "Original Title:\n", 1361 | " how to define a project ?\n", 1362 | "\n", 1363 | "****** Machine Generated Title (Prediction) ******:\n", 1364 | " how to edit project name ?\n", 1365 | "\n", 1366 | "\n", 1367 | "==============================================\n", 1368 | "============== Example # 18851 =================\n", 1369 | "\n", 1370 | "\"https://github.com/climategadgets/servomaster/issues/7\"\n", 1371 | "Issue Body:\n", 1372 | " adafruit dc & stepper motor hat for raspberry pi - mini kit https://www.adafruit.com/product/2348 provides a very reproducible and standard stepper controller solution for raspberry pi, it would be a shame not to support it. this enhancement is much more complicated than 6, though. steppers, unlike servos, do not have inherent limits, and if a stepper is used as a servo, there will have to be solutions put in place to allow limit detection limit switches and torque sensors, to name a couple . in addition, stepper positioning model discrete steps is different from servo positioning model floating point 0 to 1 with adjustable ranges and limits , so some extra work will need to be done. \n", 1373 | "\n", 1374 | "Original Title:\n", 1375 | " implement tb6612 driver for raspberry pi\n", 1376 | "\n", 1377 | "****** Machine Generated Title (Prediction) ******:\n", 1378 | " rpi motor support\n", 1379 | "\n", 1380 | "\n", 1381 | "==============================================\n", 1382 | "============== Example # 174664 =================\n", 1383 | "\n", 1384 | "\"https://github.com/cawilliamson/ansible-gpdpocket/issues/98\"\n", 1385 | "Issue Body:\n", 1386 | " first off, thanks for all the effort going into this, very promising. issue: trying to bootstrap an ubuntu-16.04.3 iso from within an existing ubuntu instance. running into an error, which appears to be when ansible starts getting involved. very possible i'm doing something wrong. e: can not write log is /dev/pts mounted? - posix_openpt 2: no such file or directory + grep -wq -- --nogit + echo 'skip pulling source from git' + cd /usr/src/ansible-gpdpocket + ansible_nocows=1 + ansible-playbook system.yml -e bootstrap=true -v warning : provided hosts list is empty, only localhost is available error! syntax error while loading yaml. the error appears to have been in '/usr/src/ansible-gpdpocket/roles/audio/tasks/main.yml': line 17, column 1, but may be elsewhere in the file depending on the exact syntax problem. the offending line appears to be: - name: create chtrt5645 directory ^ here play recap localhost : ok=23 changed=14 unreachable=0 failed=1 \n", 1387 | "\n", 1388 | "Original Title:\n", 1389 | " syntax error while loading yaml\n", 1390 | "\n", 1391 | "****** Machine Generated Title (Prediction) ******:\n", 1392 | " bootstrap fails to mount in ubuntu\n", 1393 | "\n", 1394 | "\n", 1395 | "==============================================\n", 1396 | "============== Example # 186883 =================\n", 1397 | "\n", 1398 | "\"https://github.com/prettydiff/prettydiff/issues/456\"\n", 1399 | "Issue Body:\n", 1400 | " right now a single language file handles all tasks for a given group of languages. these files need to be broken down into respective pieces: parser beautifier minifier analyzer this is a large architectural effort. fortunately the code is well segmented internally for separation of concerns, so the logic can be broken apart without impact to operational integrity. the challenge is largely administration to ensure all the pieces are included into each of the respective environments and pass data among each other appropriately. \n", 1401 | "\n", 1402 | "Original Title:\n", 1403 | " separate language files into their respective tasks\n", 1404 | "\n", 1405 | "****** Machine Generated Title (Prediction) ******:\n", 1406 | " fix language handling for all languages\n", 1407 | "\n", 1408 | "\n", 1409 | "==============================================\n", 1410 | "============== Example # 151593 =================\n", 1411 | "\n", 1412 | "\"https://github.com/koorellasuresh/UKRegionTest/issues/21568\"\n", 1413 | "Issue Body:\n", 1414 | " first from flow in uk south \n", 1415 | "\n", 1416 | "Original Title:\n", 1417 | " first from flow in uk south\n" 1418 | ] 1419 | }, 1420 | { 1421 | "name": "stdout", 1422 | "output_type": "stream", 1423 | "text": [ 1424 | "\n", 1425 | "****** Machine Generated Title (Prediction) ******:\n", 1426 | " first from flow in uk south\n", 1427 | "\n", 1428 | "\n", 1429 | "==============================================\n", 1430 | "============== Example # 24718 =================\n", 1431 | "\n", 1432 | "\"https://github.com/sensorario/go-tris/issues/34\"\n", 1433 | "Issue Body:\n", 1434 | " move 1 simone : 5 move 2 computer : 2 move 3 simone : 9 move 4 computer : 1 move 5 simone : 3 move 6 computer : 6 move 7 simone : 8 move 8 computer : 7 move 9 simone : 4 \n", 1435 | "\n", 1436 | "Original Title:\n", 1437 | " in this case computer loose\n", 1438 | "\n", 1439 | "****** Machine Generated Title (Prediction) ******:\n", 1440 | " move to * number *\n", 1441 | "\n", 1442 | "\n", 1443 | "==============================================\n", 1444 | "============== Example # 2005 =================\n", 1445 | "\n", 1446 | "\"https://github.com/fossasia/susi_firefoxbot/issues/6\"\n", 1447 | "Issue Body:\n", 1448 | " actual behaviour only text response from the server is shown expected behaviour support different types of responses like images, links, tables etc. would you like to work on the issue ? yes \n", 1449 | "\n", 1450 | "Original Title:\n", 1451 | " support for different types of responses from server\n", 1452 | "\n", 1453 | "****** Machine Generated Title (Prediction) ******:\n", 1454 | " support for different types of response\n", 1455 | "\n", 1456 | "\n", 1457 | "==============================================\n", 1458 | "============== Example # 144769 =================\n", 1459 | "\n", 1460 | "\"https://github.com/reallyenglish/ansible-role-poudriere/issues/8\"\n", 1461 | "Issue Body:\n", 1462 | " the role clones a remote git repository, which takes time to clone. to make the test faster, create a small, but functional repository in the role, and use it for the test. \n", 1463 | "\n", 1464 | "Original Title:\n", 1465 | " create minimal ports tree for the test\n", 1466 | "\n", 1467 | "****** Machine Generated Title (Prediction) ******:\n", 1468 | " add a test to the repo\n", 1469 | "\n", 1470 | "\n", 1471 | "==============================================\n", 1472 | "============== Example # 148842 =================\n", 1473 | "\n", 1474 | "\"https://github.com/felquis/HTJSON/issues/2\"\n", 1475 | "Issue Body:\n", 1476 | " firstly - thanks for making this, i had the same idea. but i would do it slightly differently. exactly 2 differerences. 1. i'd make content an array 2. i'd more the objects inside attr down a level and get rid of it. thus content would be an attribute. for example, instead of : var template = { a : { attr : { href : http://your-domain.com/images/any-image.jpg }, content: { link name } } }; it'd be: var template = a : { href : http://your-domain.com/images/any-image.jpg , content : some text , { img : { src: http://whatever.jpg }, some more text } ; 1. makes it more compact, without losing any document structure information 2. makes it more versatile, and, in fact, makes it complete - it can then encode any html document. \n", 1477 | "\n", 1478 | "Original Title:\n", 1479 | " shouldn't content be an array? is attr really neccessary?\n", 1480 | "\n", 1481 | "****** Machine Generated Title (Prediction) ******:\n", 1482 | " content - type : attribute\n", 1483 | "\n", 1484 | "\n", 1485 | "==============================================\n", 1486 | "============== Example # 83915 =================\n", 1487 | "\n", 1488 | "\"https://github.com/rrdelaney/ava-rethinkdb/issues/3\"\n", 1489 | "Issue Body:\n", 1490 | " when i run the ava-rethinkdb it works but when i ran it through travis ci i get error: spawn rethinkdb enoent is there something i am doing wrong or need to add for ci build? \n", 1491 | "\n", 1492 | "Original Title:\n", 1493 | " error: spawn rethinkdb enoent\n", 1494 | "\n", 1495 | "****** Machine Generated Title (Prediction) ******:\n", 1496 | " spawn enoent on ci\n", 1497 | "\n", 1498 | "\n", 1499 | "==============================================\n", 1500 | "============== Example # 22941 =================\n", 1501 | "\n", 1502 | "\"https://github.com/cartalyst/stripe/issues/90\"\n", 1503 | "Issue Body:\n", 1504 | " i am using your latest release 2.0.9 but that release does not include the payout file. kidnly upload the latest release that has the payout work. \n", 1505 | "\n", 1506 | "Original Title:\n", 1507 | " payout file is missing in latest release.\n", 1508 | "\n", 1509 | "****** Machine Generated Title (Prediction) ******:\n", 1510 | " release * number*.1 missing\n" 1511 | ] 1512 | } 1513 | ], 1514 | "source": [ 1515 | "# this method displays the predictions on random rows of the holdout set\n", 1516 | "seq2seq_inf.demo_model_predictions(n=50, issue_df=testdf)" 1517 | ] 1518 | }, 1519 | { 1520 | "cell_type": "markdown", 1521 | "metadata": {}, 1522 | "source": [ 1523 | "# Feature Extraction Demo" 1524 | ] 1525 | }, 1526 | { 1527 | "cell_type": "code", 1528 | "execution_count": 68, 1529 | "metadata": { 1530 | "collapsed": true 1531 | }, 1532 | "outputs": [], 1533 | "source": [ 1534 | "# Read All 5M data points\n", 1535 | "all_data_df = pd.read_csv('github_issues.csv')\n", 1536 | "# Extract the bodies from this dataframe\n", 1537 | "all_data_bodies = all_data_df['body'].tolist()" 1538 | ] 1539 | }, 1540 | { 1541 | "cell_type": "code", 1542 | "execution_count": 70, 1543 | "metadata": { 1544 | "collapsed": true 1545 | }, 1546 | "outputs": [], 1547 | "source": [ 1548 | "# transform all of the data using the ktext processor\n", 1549 | "all_data_vectorized = body_pp.transform_parallel(all_data_bodies)" 1550 | ] 1551 | }, 1552 | { 1553 | "cell_type": "code", 1554 | "execution_count": 71, 1555 | "metadata": { 1556 | "collapsed": true 1557 | }, 1558 | "outputs": [], 1559 | "source": [ 1560 | "# save transformed data\n", 1561 | "with open('all_data_vectorized.dpkl', 'wb') as f:\n", 1562 | " dpickle.dump(all_data_vectorized, f)" 1563 | ] 1564 | }, 1565 | { 1566 | "cell_type": "code", 1567 | "execution_count": 262, 1568 | "metadata": { 1569 | "collapsed": true 1570 | }, 1571 | "outputs": [], 1572 | "source": [ 1573 | "%reload_ext autoreload\n", 1574 | "%autoreload 2\n", 1575 | "from seq2seq_utils import Seq2Seq_Inference\n", 1576 | "seq2seq_inf_rec = Seq2Seq_Inference(encoder_preprocessor=body_pp,\n", 1577 | " decoder_preprocessor=title_pp,\n", 1578 | " seq2seq_model=seq2seq_Model)\n", 1579 | "recsys_annoyobj = seq2seq_inf_rec.prepare_recommender(all_data_vectorized, all_data_df)" 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "markdown", 1584 | "metadata": { 1585 | "collapsed": true 1586 | }, 1587 | "source": [ 1588 | "### Example 1: Issues Installing Python Packages" 1589 | ] 1590 | }, 1591 | { 1592 | "cell_type": "code", 1593 | "execution_count": 223, 1594 | "metadata": {}, 1595 | "outputs": [ 1596 | { 1597 | "name": "stdout", 1598 | "output_type": "stream", 1599 | "text": [ 1600 | "\n", 1601 | "\n", 1602 | "==============================================\n", 1603 | "============== Example # 13563 =================\n", 1604 | "\n", 1605 | "\"https://github.com/bnosac/pattern.nlp/issues/5\"\n", 1606 | "Issue Body:\n", 1607 | " thanks for your package, i can't wait to use it. unfortunately i have issues with the installation. prerequisite is 'first install python version 2.5+ not version 3 '. so this package cant be used with version 3.6 64bit that i have installed? i nevertheless tried to install it using pip, conda is not supported? but got an error: 'syntaxerror: missing parentheses in call to 'print''. besides when i try to run the library in r version 3.3.3. 64 bit i got errors with can_find_python_cmd required_modules = pattern.db : 'error in find_python_cmd......' pattern seems to be written in python but must be used in r, why cant it be used in python? i found another python pattern application that apparently does the same in python: https://pypi.python.org/pypi/pattern how is this related? \n", 1608 | "\n", 1609 | "Original Title:\n", 1610 | " error installation python\n", 1611 | "\n", 1612 | "****** Machine Generated Title (Prediction) ******:\n", 1613 | " install with python * number *\n", 1614 | "\n", 1615 | "**** Similar Issues (using encoder embedding) ****:\n", 1616 | "\n" 1617 | ] 1618 | }, 1619 | { 1620 | "data": { 1621 | "text/html": [ 1622 | "
\n", 1623 | "\n", 1636 | "\n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | "
issue_urlissue_titlebodydist
286906\"https://github.com/scikit-hep/root_numpy/issues/337\"root 6.10/02 and root_numpy compatibilityi am trying to pip install root_pandas and one of the dependency is root_numpy however some weird reasons i am unable to install it even though i can import root in python. i am working on python3.6 as i am more comfortable with it. is root_numpy is not yet compatible with the latest root?0.694671
314005\"https://github.com/andim/noisyopt/issues/4\"joss review: installing dependencies via piphi, i'm trying to install noisyopt in a clean conda environment running python 3.5. running pip install noisyopt does not install the dependencies numpy, scipy . i see that you do include a requires keyword argument in your setup.py file, does this need to be install_requires ? as in https://packaging.python.org/requirements/ . also, not necessary if you don't want to, but i think it would be good to include a list of dependences somewhere in the readme.0.698265
48120\"https://github.com/turi-code/SFrame/issues/389\"python 3.6 compatiblehi: i tried to install sframe using pip and conda but i can not find anything that will work with python 3.6? has sframe been updated to work with python 3.6 yet? thanks, drew0.718715
\n", 1670 | "
" 1671 | ], 1672 | "text/plain": [ 1673 | " issue_url \\\n", 1674 | "286906 \"https://github.com/scikit-hep/root_numpy/issues/337\" \n", 1675 | "314005 \"https://github.com/andim/noisyopt/issues/4\" \n", 1676 | "48120 \"https://github.com/turi-code/SFrame/issues/389\" \n", 1677 | "\n", 1678 | " issue_title \\\n", 1679 | "286906 root 6.10/02 and root_numpy compatibility \n", 1680 | "314005 joss review: installing dependencies via pip \n", 1681 | "48120 python 3.6 compatible \n", 1682 | "\n", 1683 | " body \\\n", 1684 | "286906 i am trying to pip install root_pandas and one of the dependency is root_numpy however some weird reasons i am unable to install it even though i can import root in python. i am working on python3.6 as i am more comfortable with it. is root_numpy is not yet compatible with the latest root? \n", 1685 | "314005 hi, i'm trying to install noisyopt in a clean conda environment running python 3.5. running pip install noisyopt does not install the dependencies numpy, scipy . i see that you do include a requires keyword argument in your setup.py file, does this need to be install_requires ? as in https://packaging.python.org/requirements/ . also, not necessary if you don't want to, but i think it would be good to include a list of dependences somewhere in the readme. \n", 1686 | "48120 hi: i tried to install sframe using pip and conda but i can not find anything that will work with python 3.6? has sframe been updated to work with python 3.6 yet? thanks, drew \n", 1687 | "\n", 1688 | " dist \n", 1689 | "286906 0.694671 \n", 1690 | "314005 0.698265 \n", 1691 | "48120 0.718715 " 1692 | ] 1693 | }, 1694 | "metadata": {}, 1695 | "output_type": "display_data" 1696 | } 1697 | ], 1698 | "source": [ 1699 | "seq2seq_inf_rec.demo_model_predictions(n=1, issue_df=testdf, threshold=1)" 1700 | ] 1701 | }, 1702 | { 1703 | "cell_type": "markdown", 1704 | "metadata": {}, 1705 | "source": [ 1706 | "### Example 2: Issues asking for feature improvements" 1707 | ] 1708 | }, 1709 | { 1710 | "cell_type": "code", 1711 | "execution_count": 226, 1712 | "metadata": {}, 1713 | "outputs": [ 1714 | { 1715 | "name": "stdout", 1716 | "output_type": "stream", 1717 | "text": [ 1718 | "\n", 1719 | "\n", 1720 | "==============================================\n", 1721 | "============== Example # 157322 =================\n", 1722 | "\n", 1723 | "\"https://github.com/Chingu-cohorts/devgaido/issues/89\"\n", 1724 | "Issue Body:\n", 1725 | " right now, your profile link is https://devgaido.com/profile. this is fine, but it would be really cool if there was a way to share your profile with other people. on my portfolio, i have social media buttons to freecodecamp, github, ect. without a custom link, i cannot show-off what i have done on devgaido to future employers. \n", 1726 | "\n", 1727 | "Original Title:\n", 1728 | " feature request: sharable profile.\n", 1729 | "\n", 1730 | "****** Machine Generated Title (Prediction) ******:\n", 1731 | " add a link to your profile\n", 1732 | "\n", 1733 | "**** Similar Issues (using encoder embedding) ****:\n", 1734 | "\n" 1735 | ] 1736 | }, 1737 | { 1738 | "data": { 1739 | "text/html": [ 1740 | "
\n", 1741 | "\n", 1754 | "\n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | "
issue_urlissue_titlebodydist
250423\"https://github.com/ParabolInc/action/issues/1379\"integrations list view discoverabilityissue - enhancement i was initially confused by the link to my account copy; seeing github in the integrations list made me think it had already been set up . i realize now that i had to allow parabol to post as me. i think that link to my account could use a tooltip explaining what link means, and why you'd want to do so. <img width= 728 alt= screen shot 2017-09-29 at 10 52 05 am src= https://user-images.githubusercontent.com/2146312/31024786-2fd39c46-a50e-11e7-9f2a-6d4a5ed2baeb.png >0.748828
222304\"https://github.com/viosey/hexo-theme-material/issues/166\"allow us to use sns-share for githubi'd love to be able to add a link at the bottom of the page for my github account. however, the sns-share option doesn't currently seem to be able to do this.0.774398
153327\"https://github.com/tobykurien/GoogleApps/issues/31\"drive provide download abilitysometimes people share files via g drive. provided a link this app can show some info about the files but doesn't show the download button. i hope that it can be fixed and users would be able to download files with this app.0.778953
\n", 1788 | "
" 1789 | ], 1790 | "text/plain": [ 1791 | " issue_url \\\n", 1792 | "250423 \"https://github.com/ParabolInc/action/issues/1379\" \n", 1793 | "222304 \"https://github.com/viosey/hexo-theme-material/issues/166\" \n", 1794 | "153327 \"https://github.com/tobykurien/GoogleApps/issues/31\" \n", 1795 | "\n", 1796 | " issue_title \\\n", 1797 | "250423 integrations list view discoverability \n", 1798 | "222304 allow us to use sns-share for github \n", 1799 | "153327 drive provide download ability \n", 1800 | "\n", 1801 | " body \\\n", 1802 | "250423 issue - enhancement i was initially confused by the link to my account copy; seeing github in the integrations list made me think it had already been set up . i realize now that i had to allow parabol to post as me. i think that link to my account could use a tooltip explaining what link means, and why you'd want to do so. screen \n", 1803 | "222304 i'd love to be able to add a link at the bottom of the page for my github account. however, the sns-share option doesn't currently seem to be able to do this. \n", 1804 | "153327 sometimes people share files via g drive. provided a link this app can show some info about the files but doesn't show the download button. i hope that it can be fixed and users would be able to download files with this app. \n", 1805 | "\n", 1806 | " dist \n", 1807 | "250423 0.748828 \n", 1808 | "222304 0.774398 \n", 1809 | "153327 0.778953 " 1810 | ] 1811 | }, 1812 | "metadata": {}, 1813 | "output_type": "display_data" 1814 | } 1815 | ], 1816 | "source": [ 1817 | "seq2seq_inf_rec.demo_model_predictions(n=1, issue_df=testdf, threshold=1)" 1818 | ] 1819 | }, 1820 | { 1821 | "cell_type": "code", 1822 | "execution_count": 78, 1823 | "metadata": {}, 1824 | "outputs": [ 1825 | { 1826 | "data": { 1827 | "text/plain": [ 1828 | "True" 1829 | ] 1830 | }, 1831 | "execution_count": 78, 1832 | "metadata": {}, 1833 | "output_type": "execute_result" 1834 | } 1835 | ], 1836 | "source": [ 1837 | "# incase you need to reset the rec system\n", 1838 | "# seq2seq_inf_rec.set_recsys_annoyobj(recsys_annoyobj)\n", 1839 | "# seq2seq_inf_rec.set_recsys_data(all_data_df)\n", 1840 | "\n", 1841 | "# save object\n", 1842 | "recsys_annoyobj.save('recsys_annoyobj.pkl')" 1843 | ] 1844 | }, 1845 | { 1846 | "cell_type": "code", 1847 | "execution_count": null, 1848 | "metadata": { 1849 | "collapsed": true 1850 | }, 1851 | "outputs": [], 1852 | "source": [] 1853 | } 1854 | ], 1855 | "metadata": { 1856 | "kernelspec": { 1857 | "display_name": "Python 3", 1858 | "language": "python", 1859 | "name": "python3" 1860 | }, 1861 | "language_info": { 1862 | "codemirror_mode": { 1863 | "name": "ipython", 1864 | "version": 3 1865 | }, 1866 | "file_extension": ".py", 1867 | "mimetype": "text/x-python", 1868 | "name": "python", 1869 | "nbconvert_exporter": "python", 1870 | "pygments_lexer": "ipython3", 1871 | "version": "3.6.2" 1872 | }, 1873 | "toc": { 1874 | "nav_menu": { 1875 | "height": "263px", 1876 | "width": "352px" 1877 | }, 1878 | "number_sections": true, 1879 | "sideBar": true, 1880 | "skip_h1_title": false, 1881 | "title_cell": "Table of Contents", 1882 | "title_sidebar": "Contents", 1883 | "toc_cell": true, 1884 | "toc_position": {}, 1885 | "toc_section_display": true, 1886 | "toc_window_display": false 1887 | } 1888 | }, 1889 | "nbformat": 4, 1890 | "nbformat_minor": 2 1891 | } 1892 | --------------------------------------------------------------------------------