├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Graph NLU- Natural Language Understanding with Python and Neo4j.pdf └── IPA_Memory_Dan_Kondratyuk_2017.04.30.pdf ├── notebooks ├── babi_dialogue_ridge.ipynb ├── dynamic_memory_1.ipynb ├── dynamic_memory_2.ipynb ├── dynamic_memory_3.ipynb ├── dynamic_memory_4.ipynb ├── resources │ ├── qa1_single-supporting-fact_test.txt │ ├── qa1_single-supporting-fact_train.txt │ ├── qa2_two-supporting-facts_test.txt │ ├── qa2_two-supporting-facts_train.txt │ ├── qa3_three-supporting-facts_test.txt │ ├── qa3_three-supporting-facts_train.txt │ ├── qa6_yes-no-questions_test.txt │ ├── qa6_yes-no-questions_train.txt │ ├── restaurants_props.pkl │ └── utts_refs.pkl └── screenshots │ ├── dialog-system.png │ ├── global-and-local-list.png │ ├── local-list.png │ ├── mary-john-example.png │ ├── prezzo.png │ ├── qa2-multiple-list.png │ ├── simple-relation.png │ ├── state-graph-1.png │ ├── state-graph-2.png │ └── v4-mary.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Vim template 3 | # swap 4 | [._]*.s[a-w][a-z] 5 | [._]s[a-w][a-z] 6 | # session 7 | Session.vim 8 | # temporary 9 | .netrwhist 10 | *~ 11 | # auto-generated tag files 12 | tags 13 | 14 | ### Java template 15 | *.class 16 | 17 | # Mobile Tools for Java (J2ME) 18 | .mtj.tmp/ 19 | 20 | # Package Files # 21 | *.jar 22 | *.war 23 | *.ear 24 | 25 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 26 | hs_err_pid* 27 | 28 | ### Maven template 29 | target/ 30 | pom.xml.tag 31 | pom.xml.releaseBackup 32 | pom.xml.versionsBackup 33 | pom.xml.next 34 | release.properties 35 | dependency-reduced-pom.xml 36 | buildNumber.properties 37 | .mvn/timing.properties 38 | 39 | ### VisualStudioCode template 40 | .vscode 41 | 42 | ### Gradle template 43 | .gradle 44 | build/ 45 | 46 | # Ignore Gradle GUI config 47 | gradle-app.setting 48 | 49 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 50 | !gradle-wrapper.jar 51 | 52 | # Cache of project 53 | .gradletasknamecache 54 | 55 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898 56 | # gradle/wrapper/gradle-wrapper.properties 57 | 58 | ### JetBrains template 59 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 60 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 61 | 62 | # User-specific stuff: 63 | .idea/workspace.xml 64 | .idea/tasks.xml 65 | .idea/dictionaries 66 | .idea/vcs.xml 67 | .idea/jsLibraryMappings.xml 68 | 69 | # Sensitive or high-churn files: 70 | .idea/dataSources.ids 71 | .idea/dataSources.xml 72 | .idea/dataSources.local.xml 73 | .idea/sqlDataSources.xml 74 | .idea/dynamic.xml 75 | .idea/uiDesigner.xml 76 | 77 | # Gradle: 78 | .idea/gradle.xml 79 | .idea/libraries 80 | 81 | # Mongo Explorer plugin: 82 | .idea/mongoSettings.xml 83 | 84 | ## File-based project format: 85 | *.iws 86 | 87 | ## Plugin-specific files: 88 | 89 | # IntelliJ 90 | /out/ 91 | .idea 92 | 93 | # mpeltonen/sbt-idea plugin 94 | .idea_modules/ 95 | 96 | # JIRA plugin 97 | atlassian-ide-plugin.xml 98 | 99 | # Crashlytics plugin (for Android Studio and IntelliJ) 100 | com_crashlytics_export_strings.xml 101 | crashlytics.properties 102 | crashlytics-build.properties 103 | fabric.properties 104 | 105 | ### Windows template 106 | # Windows image file caches 107 | Thumbs.db 108 | ehthumbs.db 109 | 110 | # Folder config file 111 | Desktop.ini 112 | 113 | # Recycle Bin used on file shares 114 | $RECYCLE.BIN/ 115 | 116 | # Windows Installer files 117 | *.cab 118 | *.msi 119 | *.msm 120 | *.msp 121 | 122 | # Windows shortcuts 123 | *.lnk 124 | 125 | ### SublimeText template 126 | # cache files for sublime text 127 | *.tmlanguage.cache 128 | *.tmPreferences.cache 129 | *.stTheme.cache 130 | 131 | # workspace files are user-specific 132 | *.sublime-workspace 133 | 134 | # project files should be checked into the repository, unless a significant 135 | # proportion of contributors will probably not be using SublimeText 136 | # *.sublime-project 137 | 138 | # sftp configuration file 139 | sftp-config.json 140 | 141 | # Package control specific files 142 | Package Control.last-run 143 | Package Control.ca-list 144 | Package Control.ca-bundle 145 | Package Control.system-ca-bundle 146 | Package Control.cache/ 147 | Package Control.ca-certs/ 148 | bh_unicode_properties.cache 149 | 150 | # Sublime-github package stores a github token in this file 151 | # https://packagecontrol.io/packages/sublime-github 152 | GitHub.sublime-settings 153 | 154 | ### Linux template 155 | # temporary files which can be created if a process still has a handle open of a deleted file 156 | .fuse_hidden* 157 | 158 | # KDE directory preferences 159 | .directory 160 | 161 | # Linux trash folder which might appear on any partition or disk 162 | .Trash-* 163 | 164 | ### Eclipse template 165 | 166 | .metadata 167 | bin/ 168 | tmp/ 169 | *.tmp 170 | *.bak 171 | *.swp 172 | *~.nib 173 | local.properties 174 | .settings/ 175 | .loadpath 176 | .recommenders 177 | 178 | # Eclipse Core 179 | .project 180 | 181 | # External tool builders 182 | .externalToolBuilders/ 183 | 184 | # Locally stored "Eclipse launch configurations" 185 | *.launch 186 | 187 | # PyDev specific (Python IDE for Eclipse) 188 | *.pydevproject 189 | 190 | # CDT-specific (C/C++ Development Tooling) 191 | .cproject 192 | 193 | # JDT-specific (Eclipse Java Development Tools) 194 | .classpath 195 | 196 | # Java annotation processor (APT) 197 | .factorypath 198 | 199 | # PDT-specific (PHP Development Tools) 200 | .buildpath 201 | 202 | # sbteclipse plugin 203 | .target 204 | 205 | # Tern plugin 206 | .tern-project 207 | 208 | # TeXlipse plugin 209 | .texlipse 210 | 211 | # STS (Spring Tool Suite) 212 | .springBeans 213 | 214 | # Code Recommenders 215 | .recommenders/ 216 | 217 | ### Project Specific 218 | 219 | .ipynb_checkpoints -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dan Kondratyuk 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graph NLU 2 | 3 | [![Preview Image](/notebooks/screenshots/qa2-multiple-list.png)](notebooks/dynamic_memory_1.ipynb) 4 | 5 | ## Motivation :bar_chart: 6 | 7 | Graph NLU uses graph databases as a means to represent natural language relationships flexibly and dynamically. 8 | 9 | The primary motivation for this project is to develop a way to understand natural language dialog in an interactive setting by remembering previous dialog states. [Virtual assistants](https://en.wikipedia.org/wiki/Virtual_assistant_(artificial_intelligence)) like Siri, Google Assistant, and Alexa have the common problem that they behave like amnesiacs, i.e., they do not remember much about previous interactions. 10 | 11 | One proposal to get around the memory problem is by representing the previous dialog states using a persistent graph. Because graphs offer a powerful and interpretable way of encoding high-level representations of entities and their associated relationships, an attractive proposition is to leverage them in processing natural language. Graph databases (e.g., [Neo4j](https://neo4j.com/)) offer a rich suite of tools to quickly construct such graphs and persist them over the long term. 12 | 13 | This project is in its research phase, hence all code in this repository is exploratory. The supplied Jupyter (iPython) notebooks do the following: 14 | 15 | 1. Examine several dialog domains 16 | 1. Explain some of the design considerations for using graphs to process natural language 17 | 1. Define models for solving a dialog domain 18 | 1. Evaluate these models for accuracy and usefulness 19 | 20 | Explanations behind each code snippet are given where possible. [Read the research paper (PDF)](docs/IPA_Memory_Dan_Kondratyuk_2017.04.30.pdf) discussing a more detailed approach to the personal assistant memory problem. 21 | 22 | ## Getting Started :traffic_light: 23 | 24 | - [Video talk overview (YouTube)](https://www.youtube.com/watch?v=mTCqQ2e08Q8) 25 | - [Video talk slides (PDF)](docs/Graph%20NLU-%20Natural%20Language%20Understanding%20with%20Python%20and%20Neo4j.pdf) 26 | 27 | Get an introduction to this project by viewing the supplied Jupyter notebooks in GitHub under the `notebooks` directory: 28 | 29 | - [dynamic_memory_1](notebooks/dynamic_memory_1.ipynb) - Evaluates the bAbI QA tasks using Neo4j queries 30 | 31 | ## Running the Code :snake: 32 | 33 | The Python code uses the Neo4j graph database to store and query natural language relationships. In addition, several processing steps will require popular Python data processing tools like `pandas`, `numpy`, `sklearn`, and `nltk`. 34 | 35 | ### Prerequisites 36 | 37 | 1. Make sure these are on your system: 38 | 39 | - [Python](https://www.python.org/downloads/) (3.5+) 40 | - [Neo4j](https://neo4j.com/download/community-edition/) (3.1+) 41 | 42 | 2. Install the python packages in `requirements.txt` if you don't have them already. 43 | 44 | ```bash 45 | pip install -r ./requirements.txt 46 | ``` 47 | 48 | ### Running Jupyter Notebooks 49 | 50 | 3. Clone the repository. 51 | 52 | ```bash 53 | git clone https://github.com/Hyperparticle/graph-nlu.git 54 | cd ./graph-nlu/notebooks 55 | ``` 56 | 57 | 4. Run the iPython notebooks with Jupyter. 58 | 59 | ```bash 60 | jupyter notebook 61 | ``` 62 | 63 | 5. Get an introduction to the project with [dynamic_memory_1](notebooks/dynamic_memory_1.ipynb). 64 | 65 | ## Contributing :mega: 66 | 67 | Interested in the project? We'd love to hear your ideas! Open a [GitHub issue](https://github.com/Hyperparticle/graph-nlu/issues) with your comments. 68 | 69 | ## About :clipboard: 70 | 71 | Created by [Dan Kondratyuk](https://hyperparticle.com/about/), a member of [Speech, Language & Interactive Machines (SLIM)](http://coen.boisestate.edu/slim/) at Boise State University. 72 | -------------------------------------------------------------------------------- /docs/Graph NLU- Natural Language Understanding with Python and Neo4j.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/docs/Graph NLU- Natural Language Understanding with Python and Neo4j.pdf -------------------------------------------------------------------------------- /docs/IPA_Memory_Dan_Kondratyuk_2017.04.30.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/docs/IPA_Memory_Dan_Kondratyuk_2017.04.30.pdf -------------------------------------------------------------------------------- /notebooks/dynamic_memory_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Memory Representation in Dialogue Systems\n", 11 | "\n", 12 | "The following notebook is the result of an NLP project that explores the question, \"How could interaction be stored in memory, and how can that information be leveraged for further use?\" \n", 13 | "\n", 14 | "[Dialog systems](https://en.wikipedia.org/wiki/Dialog_system) can be quite useful, but have difficulty keeping track of concepts and entities dynamically. Commercial implementations among the likes of Siri, Google Assistant, and Alexa are great for performing simple tasks, but fall short when remembering ad-hoc relationships that regularly present themselves in conversation. For more information on dialogue systems, graph databases, and ontologies as they relate to this project, see the white paper entitled [IPA_Memory](/files/docs/IPA_Memory_Dan_Kondratyuk_2017.04.30.pdf) under the `docs` directory of this repository.\n", 15 | "\n", 16 | "To enhance the capabilities of dialogue systems, this notebook will provide a simple software implementation of a model that is intended to by dynamic, incremental, flexible, and interpretable. By forming high-level concepts that evolve over time, this model will evaluate the dialogue system's ability to understand user input. This notebook will show how such a system can update its internal state based on natural language facts, and retrieve results based on natural language questions. See the white paper for more details on the rationale behind these design decisions.\n", 17 | "\n", 18 | "The code below is written in Python, and uses a [Neo4j Graph Database](https://neo4j.com/product/) to provide non-volatile storage and efficient querying capabilities.\n", 19 | "\n", 20 | "The test corpus is supplied by the [bAbI Tasks Data 1-20 (v1.2)](https://research.fb.com/downloads/babi/). It contains sequences of English sentences to provide the system knowledge of a simple domain involving characters moving to different rooms and interacting with objects. Questions are inserted periodically to evaluate that the system is keeping track of these relationships accurately.\n", 21 | "\n", 22 | "## Prerequisites to Running this Notebook\n", 23 | "- [Python](https://www.python.org/downloads/) (3.5+)\n", 24 | "- Python packages (install via pip): `pandas`, `numpy`, `nltk`, `scikit-learn`, `neo4j-driver`\n", 25 | "- [Neo4j](https://neo4j.com/download/) (3.1+)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "deletable": true, 32 | "editable": true 33 | }, 34 | "source": [ 35 | "# Part 1: bAbI QA 1\n", 36 | "\n", 37 | "## Process the Text\n", 38 | "\n", 39 | "### Import DataFrames\n", 40 | "First we will use `pandas` to import `qa1_single-supporting-fact_train.txt` from our corpus into a DataFrame called `data`. Every line in this document represents one sentence, which will be split using `nltk`'s word tokenizer." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 1, 46 | "metadata": { 47 | "collapsed": true, 48 | "deletable": true, 49 | "editable": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "# Import the necessary packages\n", 54 | "import pandas as pd\n", 55 | "import numpy as np\n", 56 | "import nltk\n", 57 | "from sklearn.metrics import accuracy_score" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 2, 63 | "metadata": { 64 | "collapsed": false, 65 | "deletable": true, 66 | "editable": true 67 | }, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml\n" 74 | ] 75 | }, 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "True" 80 | ] 81 | }, 82 | "execution_count": 2, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "# Download NLTK packages\n", 89 | "# An OS window should pop up for you to download the appropriate packages\n", 90 | "# Select all-nltk and click on the download button. Once download has finished exit the window and continue.\n", 91 | "nltk.download()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 3, 97 | "metadata": { 98 | "collapsed": false, 99 | "deletable": true, 100 | "editable": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "# Read the bAbI data as CSV\n", 105 | "filename = 'resources/qa1_single-supporting-fact_train.txt'\n", 106 | "data_qa1 = pd.read_csv(filename, delimiter='\\t', names=['sentence', 'answer', 'factid'])\n", 107 | "data_qa1 = data_qa1.fillna('')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "deletable": true, 114 | "editable": true 115 | }, 116 | "source": [ 117 | "The cell below shows what the input data looks like. Every `sentence` in this frame can either be a factual statement, or a question about the preceeding statements. Each statement describes four characters moving between six different rooms. The questions periodically ask the room in which a person is currently in, and the objective is to answer them all correctly, matching the corresponding `answer` column (it is blank if the sentence is a statement). The `factid` column indicates the index of the supporting facts for each answer, but we won't be needing it.\n", 118 | "\n", 119 | "Due to the nature of the model, training will not be necessary to answer each question. Therefore, the entire document will be used for test evaluation." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 4, 125 | "metadata": { 126 | "collapsed": false, 127 | "deletable": true, 128 | "editable": true 129 | }, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 148 | "\n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | "
sentenceanswerfactid
01 Mary moved to the bathroom.
12 John went to the hallway.
23 Where is Mary?bathroom1
34 Daniel went back to the hallway.
45 Sandra moved to the garden.
56 Where is Daniel?hallway4
\n", 196 | "
" 197 | ], 198 | "text/plain": [ 199 | " sentence answer factid\n", 200 | "0 1 Mary moved to the bathroom. \n", 201 | "1 2 John went to the hallway. \n", 202 | "2 3 Where is Mary? bathroom 1\n", 203 | "3 4 Daniel went back to the hallway. \n", 204 | "4 5 Sandra moved to the garden. \n", 205 | "5 6 Where is Daniel? hallway 4" 206 | ] 207 | }, 208 | "execution_count": 4, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "data_qa1[:6]" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "deletable": true, 221 | "editable": true 222 | }, 223 | "source": [ 224 | "Next, we process this data frame by splitting the sentences and tagging each sentence by its type (statement or question). " 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 5, 230 | "metadata": { 231 | "collapsed": false, 232 | "deletable": true, 233 | "editable": true 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "# Tag each sentence as a statement (S) or question (Q)\n", 238 | "tag_sentence = lambda row: 'S' if row.answer == '' else 'Q'\n", 239 | "data_qa1['type'] = data_qa1.apply(tag_sentence, axis=1)\n", 240 | "\n", 241 | "# Use NLTK to tokenize the sentences into arrays of words\n", 242 | "# If you get an error here, make sure you have downloaded the NLTK packages above\n", 243 | "tokenize = lambda row: nltk.word_tokenize(row.sentence)[1:]\n", 244 | "data_qa1.sentence = data_qa1.apply(tokenize, axis=1)\n", 245 | "\n", 246 | "# Drop the factid column, as we won't need it\n", 247 | "data_qa1 = data_qa1.drop('factid', axis=1)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 6, 253 | "metadata": { 254 | "collapsed": false, 255 | "deletable": true, 256 | "editable": true 257 | }, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/html": [ 262 | "
\n", 263 | "\n", 276 | "\n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | "
sentenceanswertype
0[Mary, moved, to, the, bathroom, .]S
1[John, went, to, the, hallway, .]S
2[Where, is, Mary, ?]bathroomQ
3[Daniel, went, back, to, the, hallway, .]S
4[Sandra, moved, to, the, garden, .]S
5[Where, is, Daniel, ?]hallwayQ
\n", 324 | "
" 325 | ], 326 | "text/plain": [ 327 | " sentence answer type\n", 328 | "0 [Mary, moved, to, the, bathroom, .] S\n", 329 | "1 [John, went, to, the, hallway, .] S\n", 330 | "2 [Where, is, Mary, ?] bathroom Q\n", 331 | "3 [Daniel, went, back, to, the, hallway, .] S\n", 332 | "4 [Sandra, moved, to, the, garden, .] S\n", 333 | "5 [Where, is, Daniel, ?] hallway Q" 334 | ] 335 | }, 336 | "execution_count": 6, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "data_qa1[:6]" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": { 348 | "deletable": true, 349 | "editable": true 350 | }, 351 | "source": [ 352 | "We further split the `data_qa1` DataFrame into `statements` and `questions` DataFrames for easy access to all statements and questions respectively." 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 7, 358 | "metadata": { 359 | "collapsed": true, 360 | "deletable": true, 361 | "editable": true 362 | }, 363 | "outputs": [], 364 | "source": [ 365 | "# Create a DataFrame with just the statements\n", 366 | "def statements(df):\n", 367 | " return df[df.type == 'S'] \\\n", 368 | " .reset_index(drop=True) \\\n", 369 | " .drop('answer', axis=1) \\\n", 370 | " .drop('type', axis=1)\n", 371 | "\n", 372 | "# Create a DataFrame with just the questions\n", 373 | "def questions(df):\n", 374 | " return df[df.type == 'Q'] \\\n", 375 | " .reset_index(drop=True) \\\n", 376 | " .drop('type', axis=1)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 8, 382 | "metadata": { 383 | "collapsed": false, 384 | "deletable": true, 385 | "editable": true 386 | }, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/html": [ 391 | "
\n", 392 | "\n", 405 | "\n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | "
sentence
0[Mary, moved, to, the, bathroom, .]
1[John, went, to, the, hallway, .]
2[Daniel, went, back, to, the, hallway, .]
3[Sandra, moved, to, the, garden, .]
\n", 431 | "
" 432 | ], 433 | "text/plain": [ 434 | " sentence\n", 435 | "0 [Mary, moved, to, the, bathroom, .]\n", 436 | "1 [John, went, to, the, hallway, .]\n", 437 | "2 [Daniel, went, back, to, the, hallway, .]\n", 438 | "3 [Sandra, moved, to, the, garden, .]" 439 | ] 440 | }, 441 | "execution_count": 8, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "statements(data_qa1)[:4]" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 9, 453 | "metadata": { 454 | "collapsed": false, 455 | "deletable": true, 456 | "editable": true 457 | }, 458 | "outputs": [ 459 | { 460 | "data": { 461 | "text/html": [ 462 | "
\n", 463 | "\n", 476 | "\n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | "
sentenceanswer
0[Where, is, Mary, ?]bathroom
1[Where, is, Daniel, ?]hallway
\n", 497 | "
" 498 | ], 499 | "text/plain": [ 500 | " sentence answer\n", 501 | "0 [Where, is, Mary, ?] bathroom\n", 502 | "1 [Where, is, Daniel, ?] hallway" 503 | ] 504 | }, 505 | "execution_count": 9, 506 | "metadata": {}, 507 | "output_type": "execute_result" 508 | } 509 | ], 510 | "source": [ 511 | "questions(data_qa1)[:2]" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": { 517 | "deletable": true, 518 | "editable": true 519 | }, 520 | "source": [ 521 | "### Extract Entities\n", 522 | "Next, we will extract the relevant entities from each statement and question so that we can more easily reason with these sentences.\n", 523 | "\n", 524 | "#### POS Tagging\n", 525 | "To process each sentence and produce a useful statement or question object, all that is necessary (for this dataset) is to use a part-of-speech tagger. The generated frame below displays the tagged list of (token, word) pairs." 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 10, 531 | "metadata": { 532 | "collapsed": false, 533 | "deletable": true, 534 | "editable": true 535 | }, 536 | "outputs": [], 537 | "source": [ 538 | "# Tag each token as a part of speech\n", 539 | "pos_tag = lambda row: nltk.pos_tag(row.sentence)\n", 540 | "data_qa1['tag'] = data_qa1.apply(pos_tag, axis=1)" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 11, 546 | "metadata": { 547 | "collapsed": false, 548 | "deletable": true, 549 | "editable": true 550 | }, 551 | "outputs": [ 552 | { 553 | "data": { 554 | "text/html": [ 555 | "
\n", 556 | "\n", 569 | "\n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | "
sentencetag
0[Mary, moved, to, the, bathroom, .][(Mary, NNP), (moved, VBD), (to, TO), (the, DT...
1[John, went, to, the, hallway, .][(John, NNP), (went, VBD), (to, TO), (the, DT)...
2[Where, is, Mary, ?][(Where, WRB), (is, VBZ), (Mary, NNP), (?, .)]
3[Daniel, went, back, to, the, hallway, .][(Daniel, NNP), (went, VBD), (back, RB), (to, ...
4[Sandra, moved, to, the, garden, .][(Sandra, NNP), (moved, VBD), (to, TO), (the, ...
\n", 605 | "
" 606 | ], 607 | "text/plain": [ 608 | " sentence \\\n", 609 | "0 [Mary, moved, to, the, bathroom, .] \n", 610 | "1 [John, went, to, the, hallway, .] \n", 611 | "2 [Where, is, Mary, ?] \n", 612 | "3 [Daniel, went, back, to, the, hallway, .] \n", 613 | "4 [Sandra, moved, to, the, garden, .] \n", 614 | "\n", 615 | " tag \n", 616 | "0 [(Mary, NNP), (moved, VBD), (to, TO), (the, DT... \n", 617 | "1 [(John, NNP), (went, VBD), (to, TO), (the, DT)... \n", 618 | "2 [(Where, WRB), (is, VBZ), (Mary, NNP), (?, .)] \n", 619 | "3 [(Daniel, NNP), (went, VBD), (back, RB), (to, ... \n", 620 | "4 [(Sandra, NNP), (moved, VBD), (to, TO), (the, ... " 621 | ] 622 | }, 623 | "execution_count": 11, 624 | "metadata": {}, 625 | "output_type": "execute_result" 626 | } 627 | ], 628 | "source": [ 629 | "data_qa1[['sentence', 'tag']][:5]" 630 | ] 631 | }, 632 | { 633 | "cell_type": "markdown", 634 | "metadata": { 635 | "deletable": true, 636 | "editable": true 637 | }, 638 | "source": [ 639 | "#### Statements\n", 640 | "Due to the simplicity of the data, each statement can be thought of as a `(subject, relation, object)` triple. We would like to define a function called `extract_statement`, that when given a sequence of statement tokens, produces this triple. For instance,\n", 641 | "```\n", 642 | "extract_statement([Mary, moved, to, the, bathroom, .]) = (Mary, moved, bathroom).\n", 643 | "```\n", 644 | "This allows one to construct a graph of relationships between objects, as we will see in the next sections. \n", 645 | "\n", 646 | "We can use the POS tags in the sentence to achieve this. If there is a word tagged as a proper noun, it is the subject, if there's a verb, it is the relation, and if there's a simple noun, it is the object." 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 12, 652 | "metadata": { 653 | "collapsed": true, 654 | "deletable": true, 655 | "editable": true 656 | }, 657 | "outputs": [], 658 | "source": [ 659 | "def extract_statement(tags):\n", 660 | " '''Extracts a (subject, relation, object) triple from each statement based on the POS tags'''\n", 661 | " subject, relation, obj = '', '', ''\n", 662 | " for word,tag in tags:\n", 663 | " if tag == 'NNP':\n", 664 | " subject = word\n", 665 | " elif tag == 'VBD' or word == 'journeyed': # TODO: 'journeyed' is tagged improperly\n", 666 | " relation = word\n", 667 | " if tag == 'NNP' or tag == 'NN':\n", 668 | " obj = word\n", 669 | " return (subject, relation, obj)" 670 | ] 671 | }, 672 | { 673 | "cell_type": "markdown", 674 | "metadata": { 675 | "deletable": true, 676 | "editable": true 677 | }, 678 | "source": [ 679 | "#### Questions\n", 680 | "To test the graph, we would like to define another function `extract_question`, that when given a sequence of quesiton tokens, returns the entity that the question is asking for.\n", 681 | "```\n", 682 | "extract_question([Where, is, Mary, ?]) = Mary\n", 683 | "```\n", 684 | "\n", 685 | "The result is the subject we are querying for, whose query should return us a room to answer the question." 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 13, 691 | "metadata": { 692 | "collapsed": true, 693 | "deletable": true, 694 | "editable": true 695 | }, 696 | "outputs": [], 697 | "source": [ 698 | "def extract_question(tags):\n", 699 | " '''Extracts the entity under discussion from each question based on the POS tags'''\n", 700 | " entityUnderDiscussion = ''\n", 701 | " # This will find the last noun in the sentence\n", 702 | " for word,tag in tags:\n", 703 | " if tag == 'NNP' or tag == 'NN':\n", 704 | " entityUnderDiscussion = word\n", 705 | " return entityUnderDiscussion" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": { 711 | "deletable": true, 712 | "editable": true 713 | }, 714 | "source": [ 715 | "Then, call the appropriate function on each `DataFrame` row to extract the corresponding info." 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": 14, 721 | "metadata": { 722 | "collapsed": true, 723 | "deletable": true, 724 | "editable": true 725 | }, 726 | "outputs": [], 727 | "source": [ 728 | "def extract(row):\n", 729 | " '''Extracts the appropriate data given a processed DataFrame row'''\n", 730 | " if row.type == 'S':\n", 731 | " return extract_statement(row.tag)\n", 732 | " else:\n", 733 | " return extract_question(row.tag)" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 15, 739 | "metadata": { 740 | "collapsed": false, 741 | "deletable": true, 742 | "editable": true 743 | }, 744 | "outputs": [], 745 | "source": [ 746 | "data_qa1['extracted'] = data_qa1.apply(extract, axis=1)" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": 16, 752 | "metadata": { 753 | "collapsed": false, 754 | "deletable": true, 755 | "editable": true 756 | }, 757 | "outputs": [ 758 | { 759 | "data": { 760 | "text/html": [ 761 | "
\n", 762 | "\n", 775 | "\n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | "
sentenceextracted
0[Mary, moved, to, the, bathroom, .](Mary, moved, bathroom)
1[John, went, to, the, hallway, .](John, went, hallway)
2[Where, is, Mary, ?]Mary
3[Daniel, went, back, to, the, hallway, .](Daniel, went, hallway)
4[Sandra, moved, to, the, garden, .](Sandra, moved, garden)
\n", 811 | "
" 812 | ], 813 | "text/plain": [ 814 | " sentence extracted\n", 815 | "0 [Mary, moved, to, the, bathroom, .] (Mary, moved, bathroom)\n", 816 | "1 [John, went, to, the, hallway, .] (John, went, hallway)\n", 817 | "2 [Where, is, Mary, ?] Mary\n", 818 | "3 [Daniel, went, back, to, the, hallway, .] (Daniel, went, hallway)\n", 819 | "4 [Sandra, moved, to, the, garden, .] (Sandra, moved, garden)" 820 | ] 821 | }, 822 | "execution_count": 16, 823 | "metadata": {}, 824 | "output_type": "execute_result" 825 | } 826 | ], 827 | "source": [ 828 | "data_qa1[['sentence', 'extracted']][:5]" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "metadata": { 834 | "deletable": true, 835 | "editable": true 836 | }, 837 | "source": [ 838 | "Voila, extraction is complete." 839 | ] 840 | }, 841 | { 842 | "cell_type": "markdown", 843 | "metadata": { 844 | "deletable": true, 845 | "editable": true 846 | }, 847 | "source": [ 848 | "### Debug Functions\n", 849 | "\n", 850 | "These are handy debugging functions that we will use for evaluation." 851 | ] 852 | }, 853 | { 854 | "cell_type": "markdown", 855 | "metadata": { 856 | "deletable": true, 857 | "editable": true 858 | }, 859 | "source": [ 860 | "This function finds all statements that refer to a person." 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": 17, 866 | "metadata": { 867 | "collapsed": true, 868 | "deletable": true, 869 | "editable": true 870 | }, 871 | "outputs": [], 872 | "source": [ 873 | "def person_statements(person):\n", 874 | " '''Get all statements that refer to the specified person'''\n", 875 | " stat = statements(data_qa1)\n", 876 | " return stat[stat.extracted.map(lambda t: t[0] == person)]" 877 | ] 878 | }, 879 | { 880 | "cell_type": "markdown", 881 | "metadata": { 882 | "deletable": true, 883 | "editable": true 884 | }, 885 | "source": [ 886 | "For instance, we can find all statements that refer to Sandra." 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 18, 892 | "metadata": { 893 | "collapsed": false, 894 | "deletable": true, 895 | "editable": true 896 | }, 897 | "outputs": [ 898 | { 899 | "data": { 900 | "text/html": [ 901 | "
\n", 902 | "\n", 915 | "\n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | "
sentencetagextracted
3[Sandra, moved, to, the, garden, .][(Sandra, NNP), (moved, VBD), (to, TO), (the, ...(Sandra, moved, garden)
5[Sandra, journeyed, to, the, bathroom, .][(Sandra, NNP), (journeyed, VBD), (to, TO), (t...(Sandra, journeyed, bathroom)
10[Sandra, travelled, to, the, office, .][(Sandra, NNP), (travelled, VBD), (to, TO), (t...(Sandra, travelled, office)
\n", 945 | "
" 946 | ], 947 | "text/plain": [ 948 | " sentence \\\n", 949 | "3 [Sandra, moved, to, the, garden, .] \n", 950 | "5 [Sandra, journeyed, to, the, bathroom, .] \n", 951 | "10 [Sandra, travelled, to, the, office, .] \n", 952 | "\n", 953 | " tag \\\n", 954 | "3 [(Sandra, NNP), (moved, VBD), (to, TO), (the, ... \n", 955 | "5 [(Sandra, NNP), (journeyed, VBD), (to, TO), (t... \n", 956 | "10 [(Sandra, NNP), (travelled, VBD), (to, TO), (t... \n", 957 | "\n", 958 | " extracted \n", 959 | "3 (Sandra, moved, garden) \n", 960 | "5 (Sandra, journeyed, bathroom) \n", 961 | "10 (Sandra, travelled, office) " 962 | ] 963 | }, 964 | "execution_count": 18, 965 | "metadata": {}, 966 | "output_type": "execute_result" 967 | } 968 | ], 969 | "source": [ 970 | "person_statements('Sandra')[:3]" 971 | ] 972 | }, 973 | { 974 | "cell_type": "markdown", 975 | "metadata": { 976 | "deletable": true, 977 | "editable": true 978 | }, 979 | "source": [ 980 | "This function finds the `n` most recent statements that refer to a person." 981 | ] 982 | }, 983 | { 984 | "cell_type": "code", 985 | "execution_count": 19, 986 | "metadata": { 987 | "collapsed": true, 988 | "deletable": true, 989 | "editable": true 990 | }, 991 | "outputs": [], 992 | "source": [ 993 | "def person_statements_recent(person, n=5):\n", 994 | " '''Get the n most recent statements that refer to the specified person in reverse chronological order'''\n", 995 | " return person_statements(person)[-n:].iloc[::-1]" 996 | ] 997 | }, 998 | { 999 | "cell_type": "markdown", 1000 | "metadata": { 1001 | "deletable": true, 1002 | "editable": true 1003 | }, 1004 | "source": [ 1005 | "For instance, we can find the 3 most recent statements Daniel has been referred in." 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "code", 1010 | "execution_count": 20, 1011 | "metadata": { 1012 | "collapsed": false, 1013 | "deletable": true, 1014 | "editable": true 1015 | }, 1016 | "outputs": [ 1017 | { 1018 | "data": { 1019 | "text/html": [ 1020 | "
\n", 1021 | "\n", 1034 | "\n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | "
sentencetagextracted
1999[Daniel, went, to, the, garden, .][(Daniel, NNP), (went, VBD), (to, TO), (the, D...(Daniel, went, garden)
1996[Daniel, travelled, to, the, kitchen, .][(Daniel, NNP), (travelled, VBD), (to, TO), (t...(Daniel, travelled, kitchen)
1992[Daniel, moved, to, the, office, .][(Daniel, NNP), (moved, VBD), (to, TO), (the, ...(Daniel, moved, office)
\n", 1064 | "
" 1065 | ], 1066 | "text/plain": [ 1067 | " sentence \\\n", 1068 | "1999 [Daniel, went, to, the, garden, .] \n", 1069 | "1996 [Daniel, travelled, to, the, kitchen, .] \n", 1070 | "1992 [Daniel, moved, to, the, office, .] \n", 1071 | "\n", 1072 | " tag \\\n", 1073 | "1999 [(Daniel, NNP), (went, VBD), (to, TO), (the, D... \n", 1074 | "1996 [(Daniel, NNP), (travelled, VBD), (to, TO), (t... \n", 1075 | "1992 [(Daniel, NNP), (moved, VBD), (to, TO), (the, ... \n", 1076 | "\n", 1077 | " extracted \n", 1078 | "1999 (Daniel, went, garden) \n", 1079 | "1996 (Daniel, travelled, kitchen) \n", 1080 | "1992 (Daniel, moved, office) " 1081 | ] 1082 | }, 1083 | "execution_count": 20, 1084 | "metadata": {}, 1085 | "output_type": "execute_result" 1086 | } 1087 | ], 1088 | "source": [ 1089 | "person_statements_recent('Daniel', n=3)" 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "markdown", 1094 | "metadata": { 1095 | "deletable": true, 1096 | "editable": true 1097 | }, 1098 | "source": [ 1099 | "## Build the Graph\n", 1100 | "Once we have processed the data into triples, we can build graphs from them. Below we have defined a couple functions to reset the database and run queries. We will use Neo4j's Python driver to accomplish this. Note that if the URL or auth credentials of your Neo4j server are different, you will need to change them below." 1101 | ] 1102 | }, 1103 | { 1104 | "cell_type": "code", 1105 | "execution_count": 21, 1106 | "metadata": { 1107 | "collapsed": false, 1108 | "deletable": true, 1109 | "editable": true 1110 | }, 1111 | "outputs": [], 1112 | "source": [ 1113 | "from neo4j.v1 import GraphDatabase, basic_auth" 1114 | ] 1115 | }, 1116 | { 1117 | "cell_type": "code", 1118 | "execution_count": 22, 1119 | "metadata": { 1120 | "collapsed": false, 1121 | "deletable": true, 1122 | "editable": true 1123 | }, 1124 | "outputs": [], 1125 | "source": [ 1126 | "# Create a neo4j session\n", 1127 | "# NOTE: Make sure that URL/credentials are correct and that Neo4j is running\n", 1128 | "driver = GraphDatabase.driver('bolt://localhost:7687', auth=basic_auth('neo4j', 'neo4j'))" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "code", 1133 | "execution_count": 23, 1134 | "metadata": { 1135 | "collapsed": true, 1136 | "deletable": true, 1137 | "editable": true 1138 | }, 1139 | "outputs": [], 1140 | "source": [ 1141 | "# WARNING: This function will clear the database when run!\n", 1142 | "# Make sure all important data is backed up before continuing\n", 1143 | "def reset_db():\n", 1144 | " '''Remove all nodes and relationships from the database'''\n", 1145 | " session = driver.session()\n", 1146 | " session.run('MATCH (n) DETACH DELETE n')" 1147 | ] 1148 | }, 1149 | { 1150 | "cell_type": "code", 1151 | "execution_count": 24, 1152 | "metadata": { 1153 | "collapsed": true, 1154 | "deletable": true, 1155 | "editable": true 1156 | }, 1157 | "outputs": [], 1158 | "source": [ 1159 | "def create(query, n=0):\n", 1160 | " '''Given a query, create a graph based on each triple in the extracted statements'''\n", 1161 | " session = driver.session()\n", 1162 | " stat = statements(data_qa1)\n", 1163 | " n = len(stat) if n <= 0 else n # Run the first n statements if specified\n", 1164 | " for subject,relation,obj in stat[:n].extracted:\n", 1165 | " session.run(query, subject=subject, relation=relation, obj=obj)" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": { 1171 | "deletable": true, 1172 | "editable": true 1173 | }, 1174 | "source": [ 1175 | "### V1: Direct relationships\n", 1176 | "One of the first impulses when building the graph may be to represent the subject and object as nodes, and the relations as edges between them." 1177 | ] 1178 | }, 1179 | { 1180 | "cell_type": "code", 1181 | "execution_count": 25, 1182 | "metadata": { 1183 | "collapsed": false, 1184 | "deletable": true, 1185 | "editable": true 1186 | }, 1187 | "outputs": [], 1188 | "source": [ 1189 | "reset_db() # This will clear the database!" 1190 | ] 1191 | }, 1192 | { 1193 | "cell_type": "code", 1194 | "execution_count": 26, 1195 | "metadata": { 1196 | "collapsed": false, 1197 | "deletable": true, 1198 | "editable": true 1199 | }, 1200 | "outputs": [], 1201 | "source": [ 1202 | "# Create a direct relationship between subject and object\n", 1203 | "v1_query = '''\n", 1204 | " MERGE (s:SUBJECT {name: $subject}) \n", 1205 | " MERGE (o:OBJECT {name: $obj}) \n", 1206 | " MERGE (s)-[r:RELATION {name: $relation}]->(o)\n", 1207 | "'''\n", 1208 | "\n", 1209 | "create(v1_query)" 1210 | ] 1211 | }, 1212 | { 1213 | "cell_type": "markdown", 1214 | "metadata": { 1215 | "deletable": true, 1216 | "editable": true 1217 | }, 1218 | "source": [ 1219 | "Run the query below and see what the graph looks like. Pop open a new tab in the Neo4j browser (default http://localhost:7474/browser/) and run the query:\n", 1220 | "```\n", 1221 | "MATCH (n) RETURN n LIMIT 50\n", 1222 | "```\n", 1223 | "The graph is a reasonable first start, as the relations point each person to where they have been. But this poses a potential problem: how do we know where each person is right now, or where they have been previously? All we can know from the graph is which rooms a person has been in, because they may have visited them all multiple times." 1224 | ] 1225 | }, 1226 | { 1227 | "cell_type": "markdown", 1228 | "metadata": { 1229 | "deletable": true, 1230 | "editable": true 1231 | }, 1232 | "source": [ 1233 | "" 1234 | ] 1235 | }, 1236 | { 1237 | "cell_type": "markdown", 1238 | "metadata": { 1239 | "deletable": true, 1240 | "editable": true 1241 | }, 1242 | "source": [ 1243 | "### V2: Nodes for relationships\n", 1244 | "One approach is to form a linked list of \"events\". Each event corresponds to a person updating the room that they are in. Since we chose edges to be our relations, we cannot form edges between relations. To alleviate this, we can transform the relation to a node, and draw two edges to form a 3-node triple." 1245 | ] 1246 | }, 1247 | { 1248 | "cell_type": "code", 1249 | "execution_count": 27, 1250 | "metadata": { 1251 | "collapsed": true, 1252 | "deletable": true, 1253 | "editable": true 1254 | }, 1255 | "outputs": [], 1256 | "source": [ 1257 | "reset_db() # This will clear the database!" 1258 | ] 1259 | }, 1260 | { 1261 | "cell_type": "code", 1262 | "execution_count": 28, 1263 | "metadata": { 1264 | "collapsed": true, 1265 | "deletable": true, 1266 | "editable": true 1267 | }, 1268 | "outputs": [], 1269 | "source": [ 1270 | "# Represent each relation as a node\n", 1271 | "v2_query = '''\n", 1272 | " MERGE (s:SUBJECT {name: $subject})\n", 1273 | " MERGE (o:OBJECT {name: $obj})\n", 1274 | " CREATE (s)-[:R0]->(r:RELATION {name: $relation})-[:R1]->(o)\n", 1275 | "'''\n", 1276 | "\n", 1277 | "create(v2_query)" 1278 | ] 1279 | }, 1280 | { 1281 | "cell_type": "markdown", 1282 | "metadata": { 1283 | "deletable": true, 1284 | "editable": true 1285 | }, 1286 | "source": [ 1287 | "Run the query again and see what changed. This is better, since we can see how often a room has been visited, but still doesn't solve the question as to which room a person is in at any given time." 1288 | ] 1289 | }, 1290 | { 1291 | "cell_type": "markdown", 1292 | "metadata": { 1293 | "deletable": true, 1294 | "editable": true 1295 | }, 1296 | "source": [ 1297 | "### V3: Linked list of relationships\n", 1298 | "The final step is to build the linked list based on the order in which the relations were created. This will allow us to not only find the room a person is in right now, but produce a list of rooms that they were in, in the order that they were visited." 1299 | ] 1300 | }, 1301 | { 1302 | "cell_type": "code", 1303 | "execution_count": 29, 1304 | "metadata": { 1305 | "collapsed": false, 1306 | "deletable": true, 1307 | "editable": true 1308 | }, 1309 | "outputs": [], 1310 | "source": [ 1311 | "reset_db()" 1312 | ] 1313 | }, 1314 | { 1315 | "cell_type": "code", 1316 | "execution_count": 30, 1317 | "metadata": { 1318 | "collapsed": false, 1319 | "deletable": true, 1320 | "editable": true 1321 | }, 1322 | "outputs": [], 1323 | "source": [ 1324 | "# Represent each relation as a node, ordered by a linked list (per subject)\n", 1325 | "v3_query = '''\n", 1326 | " MERGE (s:SUBJECT {name: $subject})\n", 1327 | " MERGE (o:OBJECT {name: $obj})\n", 1328 | " \n", 1329 | " WITH s,o\n", 1330 | " \n", 1331 | " // Create an new relation between the subject and object\n", 1332 | " CREATE (s)-[:R0]->(r:RELATION {name: $relation})-[:R1]->(o)\n", 1333 | " CREATE (s)-[h:HEAD]->(r) // Make the newly created relation the head of the list\n", 1334 | " \n", 1335 | " WITH s,r,o,h\n", 1336 | " \n", 1337 | " // Find the previous head of the list (if none exist, this query will terminate here)\n", 1338 | " MATCH (s)-[h_prev:HEAD]->(r_prev:RELATION)\n", 1339 | " WHERE h_prev <> h\n", 1340 | " \n", 1341 | " // Complete the link, remove the previous head pointer\n", 1342 | " CREATE (r_prev)-[:NEXT]->(r)\n", 1343 | " DELETE h_prev\n", 1344 | "'''" 1345 | ] 1346 | }, 1347 | { 1348 | "cell_type": "code", 1349 | "execution_count": 31, 1350 | "metadata": { 1351 | "collapsed": true, 1352 | "deletable": true, 1353 | "editable": true 1354 | }, 1355 | "outputs": [], 1356 | "source": [ 1357 | "session = driver.session()\n", 1358 | "# Create an index for faster access\n", 1359 | "session.run('CREATE INDEX ON :SUBJECT(name)')\n", 1360 | "session.run('CREATE INDEX ON :RELATION(name)')\n", 1361 | "session.run('CREATE INDEX ON :OBJECT(name)')\n", 1362 | "create(v3_query)" 1363 | ] 1364 | }, 1365 | { 1366 | "cell_type": "markdown", 1367 | "metadata": { 1368 | "deletable": true, 1369 | "editable": true 1370 | }, 1371 | "source": [ 1372 | "Check the new graph out and see what changed. It's helpful to change the colors of the nodes and edges to visualize this better." 1373 | ] 1374 | }, 1375 | { 1376 | "cell_type": "markdown", 1377 | "metadata": { 1378 | "deletable": true, 1379 | "editable": true 1380 | }, 1381 | "source": [ 1382 | "" 1383 | ] 1384 | }, 1385 | { 1386 | "cell_type": "markdown", 1387 | "metadata": { 1388 | "deletable": true, 1389 | "editable": true 1390 | }, 1391 | "source": [ 1392 | "## Query the Graph\n", 1393 | "Now we can ask the graph useful questions.\n", 1394 | "#### Find the room a person is in" 1395 | ] 1396 | }, 1397 | { 1398 | "cell_type": "code", 1399 | "execution_count": 32, 1400 | "metadata": { 1401 | "collapsed": true, 1402 | "deletable": true, 1403 | "editable": true 1404 | }, 1405 | "outputs": [], 1406 | "source": [ 1407 | "def find_person(person):\n", 1408 | " '''Find the room a person is currently in'''\n", 1409 | " query = '''\n", 1410 | " MATCH (s:SUBJECT {name:$name})-[:HEAD]->(r:RELATION)-->(o:OBJECT)\n", 1411 | " RETURN s AS subject, r AS relation, o AS obj\n", 1412 | " '''\n", 1413 | " return session.run(query, name=person)" 1414 | ] 1415 | }, 1416 | { 1417 | "cell_type": "markdown", 1418 | "metadata": { 1419 | "deletable": true, 1420 | "editable": true 1421 | }, 1422 | "source": [ 1423 | "Using the graph-querying function above we can ask, \"Where is Mary?\"" 1424 | ] 1425 | }, 1426 | { 1427 | "cell_type": "code", 1428 | "execution_count": 33, 1429 | "metadata": { 1430 | "collapsed": false, 1431 | "deletable": true, 1432 | "editable": true 1433 | }, 1434 | "outputs": [ 1435 | { 1436 | "name": "stdout", 1437 | "output_type": "stream", 1438 | "text": [ 1439 | "kitchen\n" 1440 | ] 1441 | } 1442 | ], 1443 | "source": [ 1444 | "# Note: If this is run less than a second after creating the knowledge graph, \n", 1445 | "# the Python driver may cause a race condition where the graph \n", 1446 | "# isn't finished updating, which could give you the wrong answer.\n", 1447 | "session = driver.session()\n", 1448 | "record = find_person('Mary').single()\n", 1449 | "print(record['obj'].get('name'))" 1450 | ] 1451 | }, 1452 | { 1453 | "cell_type": "markdown", 1454 | "metadata": { 1455 | "deletable": true, 1456 | "editable": true 1457 | }, 1458 | "source": [ 1459 | "According to the graph, Mary is in the kitchen. We can verify that this is true with the debug function below, and we can see the corresponding sentence that generated the relationship as well." 1460 | ] 1461 | }, 1462 | { 1463 | "cell_type": "code", 1464 | "execution_count": 34, 1465 | "metadata": { 1466 | "collapsed": false, 1467 | "deletable": true, 1468 | "editable": true 1469 | }, 1470 | "outputs": [ 1471 | { 1472 | "data": { 1473 | "text/html": [ 1474 | "
\n", 1475 | "\n", 1488 | "\n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | "
sentencetagextracted
1994[Mary, journeyed, to, the, kitchen, .][(Mary, NNP), (journeyed, VBD), (to, TO), (the...(Mary, journeyed, kitchen)
\n", 1506 | "
" 1507 | ], 1508 | "text/plain": [ 1509 | " sentence \\\n", 1510 | "1994 [Mary, journeyed, to, the, kitchen, .] \n", 1511 | "\n", 1512 | " tag \\\n", 1513 | "1994 [(Mary, NNP), (journeyed, VBD), (to, TO), (the... \n", 1514 | "\n", 1515 | " extracted \n", 1516 | "1994 (Mary, journeyed, kitchen) " 1517 | ] 1518 | }, 1519 | "execution_count": 34, 1520 | "metadata": {}, 1521 | "output_type": "execute_result" 1522 | } 1523 | ], 1524 | "source": [ 1525 | "person_statements_recent('Mary', n=1)" 1526 | ] 1527 | }, 1528 | { 1529 | "cell_type": "markdown", 1530 | "metadata": { 1531 | "deletable": true, 1532 | "editable": true 1533 | }, 1534 | "source": [ 1535 | "#### Find the rooms a person has been in (reverse chronological order)" 1536 | ] 1537 | }, 1538 | { 1539 | "cell_type": "code", 1540 | "execution_count": 35, 1541 | "metadata": { 1542 | "collapsed": false, 1543 | "deletable": true, 1544 | "editable": true 1545 | }, 1546 | "outputs": [], 1547 | "source": [ 1548 | "def find_person_history(person, n=100):\n", 1549 | " '''Find the list of rooms a person was in, ordered by recency'''\n", 1550 | " length = str(n) if n >= 1 else ''\n", 1551 | " \n", 1552 | " query = '''\n", 1553 | " MATCH (s:SUBJECT {name:$name})-[:HEAD]->(r:RELATION)-->(o:OBJECT)\n", 1554 | " MATCH (s)-->(r_prev:RELATION)-[k*1..%s]->(r), (r_prev)-->(o_prev:OBJECT)\n", 1555 | " \n", 1556 | " WITH size(k) AS dist, r, o, r_prev, o_prev\n", 1557 | " ORDER BY size(k)\n", 1558 | " \n", 1559 | " WITH r, o, r_prev, o_prev\n", 1560 | " RETURN [r.name] + collect(r_prev.name) AS relation, [o.name] + collect(o_prev.name) AS obj\n", 1561 | " '''\n", 1562 | " query = query % length\n", 1563 | " \n", 1564 | " session = driver.session()\n", 1565 | " record = session.run(query, name=person).single()\n", 1566 | " history = list(zip(record['relation'], record['obj']))[:-1]\n", 1567 | " \n", 1568 | " return history" 1569 | ] 1570 | }, 1571 | { 1572 | "cell_type": "markdown", 1573 | "metadata": { 1574 | "deletable": true, 1575 | "editable": true 1576 | }, 1577 | "source": [ 1578 | "A more advanced question that we get for free based on the graph structure is, \"Where has John been recently?\"" 1579 | ] 1580 | }, 1581 | { 1582 | "cell_type": "code", 1583 | "execution_count": 36, 1584 | "metadata": { 1585 | "collapsed": false, 1586 | "deletable": true, 1587 | "editable": true, 1588 | "scrolled": true 1589 | }, 1590 | "outputs": [ 1591 | { 1592 | "data": { 1593 | "text/plain": [ 1594 | "[('went', 'bedroom'),\n", 1595 | " ('went', 'garden'),\n", 1596 | " ('went', 'office'),\n", 1597 | " ('journeyed', 'bedroom'),\n", 1598 | " ('travelled', 'hallway')]" 1599 | ] 1600 | }, 1601 | "execution_count": 36, 1602 | "metadata": {}, 1603 | "output_type": "execute_result" 1604 | } 1605 | ], 1606 | "source": [ 1607 | "find_person_history('John', n=5)" 1608 | ] 1609 | }, 1610 | { 1611 | "cell_type": "markdown", 1612 | "metadata": { 1613 | "deletable": true, 1614 | "editable": true 1615 | }, 1616 | "source": [ 1617 | "Verify that John has been to to those places, in that order." 1618 | ] 1619 | }, 1620 | { 1621 | "cell_type": "code", 1622 | "execution_count": 37, 1623 | "metadata": { 1624 | "collapsed": false, 1625 | "deletable": true, 1626 | "editable": true 1627 | }, 1628 | "outputs": [ 1629 | { 1630 | "data": { 1631 | "text/html": [ 1632 | "
\n", 1633 | "\n", 1646 | "\n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | "
sentencetagextracted
1995[John, went, back, to, the, bedroom, .][(John, NNP), (went, VBD), (back, RB), (to, TO...(John, went, bedroom)
1989[John, went, back, to, the, garden, .][(John, NNP), (went, VBD), (back, RB), (to, TO...(John, went, garden)
1986[John, went, back, to, the, office, .][(John, NNP), (went, VBD), (back, RB), (to, TO...(John, went, office)
1982[John, journeyed, to, the, bedroom, .][(John, NNP), (journeyed, NN), (to, TO), (the,...(John, journeyed, bedroom)
1979[John, travelled, to, the, hallway, .][(John, NNP), (travelled, VBD), (to, TO), (the...(John, travelled, hallway)
\n", 1688 | "
" 1689 | ], 1690 | "text/plain": [ 1691 | " sentence \\\n", 1692 | "1995 [John, went, back, to, the, bedroom, .] \n", 1693 | "1989 [John, went, back, to, the, garden, .] \n", 1694 | "1986 [John, went, back, to, the, office, .] \n", 1695 | "1982 [John, journeyed, to, the, bedroom, .] \n", 1696 | "1979 [John, travelled, to, the, hallway, .] \n", 1697 | "\n", 1698 | " tag \\\n", 1699 | "1995 [(John, NNP), (went, VBD), (back, RB), (to, TO... \n", 1700 | "1989 [(John, NNP), (went, VBD), (back, RB), (to, TO... \n", 1701 | "1986 [(John, NNP), (went, VBD), (back, RB), (to, TO... \n", 1702 | "1982 [(John, NNP), (journeyed, NN), (to, TO), (the,... \n", 1703 | "1979 [(John, NNP), (travelled, VBD), (to, TO), (the... \n", 1704 | "\n", 1705 | " extracted \n", 1706 | "1995 (John, went, bedroom) \n", 1707 | "1989 (John, went, garden) \n", 1708 | "1986 (John, went, office) \n", 1709 | "1982 (John, journeyed, bedroom) \n", 1710 | "1979 (John, travelled, hallway) " 1711 | ] 1712 | }, 1713 | "execution_count": 37, 1714 | "metadata": {}, 1715 | "output_type": "execute_result" 1716 | } 1717 | ], 1718 | "source": [ 1719 | "person_statements_recent('John', n=5)" 1720 | ] 1721 | }, 1722 | { 1723 | "cell_type": "markdown", 1724 | "metadata": { 1725 | "deletable": true, 1726 | "editable": true 1727 | }, 1728 | "source": [ 1729 | "#### Find the history of visitors for a room" 1730 | ] 1731 | }, 1732 | { 1733 | "cell_type": "code", 1734 | "execution_count": 38, 1735 | "metadata": { 1736 | "collapsed": true, 1737 | "deletable": true, 1738 | "editable": true 1739 | }, 1740 | "outputs": [], 1741 | "source": [ 1742 | "def find_room_visitors(room):\n", 1743 | " '''Find the list of visitors a room has, ordered by recency'''\n", 1744 | " \n", 1745 | " query = '''\n", 1746 | " MATCH (r:RELATION)-->(o:OBJECT {name:$name})\n", 1747 | " RETURN count(r) AS count\n", 1748 | " '''\n", 1749 | " \n", 1750 | " session = driver.session()\n", 1751 | " record = session.run(query, name=room).single()\n", 1752 | " \n", 1753 | " return record['count']" 1754 | ] 1755 | }, 1756 | { 1757 | "cell_type": "markdown", 1758 | "metadata": { 1759 | "deletable": true, 1760 | "editable": true 1761 | }, 1762 | "source": [ 1763 | "Just for fun, we can find out how many times a room has been visited. \"How many times has the office been visited?\"" 1764 | ] 1765 | }, 1766 | { 1767 | "cell_type": "code", 1768 | "execution_count": 39, 1769 | "metadata": { 1770 | "collapsed": false, 1771 | "deletable": true, 1772 | "editable": true 1773 | }, 1774 | "outputs": [ 1775 | { 1776 | "data": { 1777 | "text/plain": [ 1778 | "334" 1779 | ] 1780 | }, 1781 | "execution_count": 39, 1782 | "metadata": {}, 1783 | "output_type": "execute_result" 1784 | } 1785 | ], 1786 | "source": [ 1787 | "find_room_visitors('office')" 1788 | ] 1789 | }, 1790 | { 1791 | "cell_type": "markdown", 1792 | "metadata": { 1793 | "collapsed": true, 1794 | "deletable": true, 1795 | "editable": true 1796 | }, 1797 | "source": [ 1798 | "## Calculate an Accuracy Score" 1799 | ] 1800 | }, 1801 | { 1802 | "cell_type": "code", 1803 | "execution_count": 41, 1804 | "metadata": { 1805 | "collapsed": false, 1806 | "deletable": true, 1807 | "editable": true 1808 | }, 1809 | "outputs": [], 1810 | "source": [ 1811 | "def get_answers(row):\n", 1812 | " '''Given an input row merge the statement in the graph, \n", 1813 | " or query the graph if it is a question'''\n", 1814 | " if row.type == 'S':\n", 1815 | " subject,relation,obj = row.extracted\n", 1816 | " session.run(v3_query, subject=subject, relation=relation, obj=obj)\n", 1817 | " return ''\n", 1818 | " elif row.type == 'Q':\n", 1819 | " person = row.extracted\n", 1820 | " # WARNING: do not consume the result (e.g., call .consume() or .single()) \n", 1821 | " # until the entire iteration is done.\n", 1822 | " # Failure to do so may cause the queries to be VERY slow!\n", 1823 | " return find_person(person)" 1824 | ] 1825 | }, 1826 | { 1827 | "cell_type": "markdown", 1828 | "metadata": { 1829 | "deletable": true, 1830 | "editable": true 1831 | }, 1832 | "source": [ 1833 | "Start all over, and run through the entire dataset." 1834 | ] 1835 | }, 1836 | { 1837 | "cell_type": "code", 1838 | "execution_count": 42, 1839 | "metadata": { 1840 | "collapsed": false, 1841 | "deletable": true, 1842 | "editable": true 1843 | }, 1844 | "outputs": [], 1845 | "source": [ 1846 | "reset_db()" 1847 | ] 1848 | }, 1849 | { 1850 | "cell_type": "code", 1851 | "execution_count": 43, 1852 | "metadata": { 1853 | "collapsed": false, 1854 | "deletable": true, 1855 | "editable": true 1856 | }, 1857 | "outputs": [], 1858 | "source": [ 1859 | "session = driver.session()\n", 1860 | "results = data_qa1.apply(get_answers, axis=1)\n", 1861 | "results = [x for x in results if x != '']\n", 1862 | "predicted = [result.single()['obj'].get('name') for result in results]" 1863 | ] 1864 | }, 1865 | { 1866 | "cell_type": "markdown", 1867 | "metadata": { 1868 | "deletable": true, 1869 | "editable": true 1870 | }, 1871 | "source": [ 1872 | "The `predicted` array contains the predicted answer to each question.`" 1873 | ] 1874 | }, 1875 | { 1876 | "cell_type": "code", 1877 | "execution_count": 44, 1878 | "metadata": { 1879 | "collapsed": false, 1880 | "deletable": true, 1881 | "editable": true 1882 | }, 1883 | "outputs": [ 1884 | { 1885 | "data": { 1886 | "text/plain": [ 1887 | "['bathroom', 'hallway', 'hallway', 'office', 'bathroom']" 1888 | ] 1889 | }, 1890 | "execution_count": 44, 1891 | "metadata": {}, 1892 | "output_type": "execute_result" 1893 | } 1894 | ], 1895 | "source": [ 1896 | "predicted[:5]" 1897 | ] 1898 | }, 1899 | { 1900 | "cell_type": "markdown", 1901 | "metadata": { 1902 | "deletable": true, 1903 | "editable": true 1904 | }, 1905 | "source": [ 1906 | "The `actual` array contains the actual answers to all questions." 1907 | ] 1908 | }, 1909 | { 1910 | "cell_type": "code", 1911 | "execution_count": 45, 1912 | "metadata": { 1913 | "collapsed": false, 1914 | "deletable": true, 1915 | "editable": true 1916 | }, 1917 | "outputs": [], 1918 | "source": [ 1919 | "actual = list(data_qa1[data_qa1.type == 'Q'].answer)" 1920 | ] 1921 | }, 1922 | { 1923 | "cell_type": "code", 1924 | "execution_count": 46, 1925 | "metadata": { 1926 | "collapsed": false, 1927 | "deletable": true, 1928 | "editable": true 1929 | }, 1930 | "outputs": [ 1931 | { 1932 | "data": { 1933 | "text/plain": [ 1934 | "['bathroom', 'hallway', 'hallway', 'office', 'bathroom']" 1935 | ] 1936 | }, 1937 | "execution_count": 46, 1938 | "metadata": {}, 1939 | "output_type": "execute_result" 1940 | } 1941 | ], 1942 | "source": [ 1943 | "actual[:5]" 1944 | ] 1945 | }, 1946 | { 1947 | "cell_type": "code", 1948 | "execution_count": 47, 1949 | "metadata": { 1950 | "collapsed": false, 1951 | "deletable": true, 1952 | "editable": true 1953 | }, 1954 | "outputs": [ 1955 | { 1956 | "data": { 1957 | "text/plain": [ 1958 | "1.0" 1959 | ] 1960 | }, 1961 | "execution_count": 47, 1962 | "metadata": {}, 1963 | "output_type": "execute_result" 1964 | } 1965 | ], 1966 | "source": [ 1967 | "accuracy_score(actual, predicted)" 1968 | ] 1969 | }, 1970 | { 1971 | "cell_type": "markdown", 1972 | "metadata": { 1973 | "collapsed": true, 1974 | "deletable": true, 1975 | "editable": true 1976 | }, 1977 | "source": [ 1978 | "And just like that, we get an accuracy of 100%. Of course, this dataset is very simple (and machine generated), so it should be of no surprise. But one notable achievement is that the graph we created can generalize to any statements of the form, `(subject, relation, object)`." 1979 | ] 1980 | } 1981 | ], 1982 | "metadata": { 1983 | "kernelspec": { 1984 | "display_name": "Python 3", 1985 | "language": "python", 1986 | "name": "python3" 1987 | }, 1988 | "language_info": { 1989 | "codemirror_mode": { 1990 | "name": "ipython", 1991 | "version": 3 1992 | }, 1993 | "file_extension": ".py", 1994 | "mimetype": "text/x-python", 1995 | "name": "python", 1996 | "nbconvert_exporter": "python", 1997 | "pygments_lexer": "ipython3", 1998 | "version": "3.6.1" 1999 | } 2000 | }, 2001 | "nbformat": 4, 2002 | "nbformat_minor": 2 2003 | } 2004 | -------------------------------------------------------------------------------- /notebooks/dynamic_memory_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Memory Representation in Dialogue Systems (Part 2)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "deletable": true, 17 | "editable": true 18 | }, 19 | "source": [ 20 | "This notebook is part 2 of the dynamic memory representation series. See part 1 to get started." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "deletable": true, 27 | "editable": true 28 | }, 29 | "source": [ 30 | "## Process the Text\n", 31 | "As with part 1, part 2 will perform the same evaluation as part 1, except with bAbI tasks QA2, Two Supporting Facts. In QA1, there were two types of entities: `person`s and `room`s. In QA2, there is one additional entity type: `item`s. Each dialogue provides a sequence of statements that indicate persons going to different rooms as before, and also items that persons may have acquired or released. The key insight is that objects move into rooms with the person that last acquired them, and stay in rooms once released. This requires the system to make the distinction between rooms and items, and also between acquiring and releasing actions.\n", 32 | "\n", 33 | "The first step is to import `resources/qa2_two-supporting-facts_train.txt` into `data`. Text processing is exactly the same as before: tokenize and POS tag the sentences." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": { 40 | "collapsed": true, 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import pandas as pd\n", 47 | "import numpy as np\n", 48 | "import nltk\n", 49 | "from sklearn.metrics import accuracy_score" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": { 56 | "collapsed": false, 57 | "deletable": true, 58 | "editable": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# Read the bAbI data as CSV\n", 63 | "filename = 'resources/qa2_two-supporting-facts_train.txt'\n", 64 | "data = pd.read_csv(filename, delimiter='\\t', names=['sentence', 'answer', 'factid'])\n", 65 | "data = data.fillna('')\n", 66 | "\n", 67 | "# Tag each sentence as a statement or question\n", 68 | "tag_sentence = lambda row: 'S' if row.answer == '' else 'Q'\n", 69 | "data['type'] = data.apply(tag_sentence, axis=1)\n", 70 | "\n", 71 | "# Use NLTK to tokenize the sentences into arrays of words\n", 72 | "tokenize = lambda row: nltk.word_tokenize(row.sentence)[1:]\n", 73 | "data.sentence = data.apply(tokenize, axis=1)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": { 80 | "collapsed": true, 81 | "deletable": true, 82 | "editable": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# Create a DataFrame with just the statements\n", 87 | "def statements():\n", 88 | " return data[data.type == 'S'] \\\n", 89 | " .reset_index(drop=True) \\\n", 90 | " .drop('answer', axis=1) \\\n", 91 | " .drop('factid', axis=1) \\\n", 92 | " .drop('type', axis=1)\n", 93 | "\n", 94 | "# Create a DataFrame with just the questions\n", 95 | "def questions():\n", 96 | " return data[data.type == 'Q'] \\\n", 97 | " .reset_index(drop=True) \\\n", 98 | " .drop('type', axis=1)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 4, 104 | "metadata": { 105 | "collapsed": true, 106 | "deletable": true, 107 | "editable": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# Tag each token as a part of speech\n", 112 | "pos_tag = lambda row: nltk.pos_tag(row.sentence)\n", 113 | "data['tag'] = data.apply(pos_tag, axis=1)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 5, 119 | "metadata": { 120 | "collapsed": true, 121 | "deletable": true, 122 | "editable": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "def extract_statement(tags):\n", 127 | " '''Extracts a (subject, relation, object) triple from each statement based on the POS tags'''\n", 128 | " subject, relation, obj = '', '', ''\n", 129 | " for word,tag in tags:\n", 130 | " if tag == 'NNP':\n", 131 | " subject = word\n", 132 | " elif tag == 'VBD' or word == 'journeyed': # TODO: 'journeyed' is tagged improperly\n", 133 | " relation = word\n", 134 | " elif tag == 'NN':\n", 135 | " obj = word\n", 136 | " return (subject, relation, obj)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 6, 142 | "metadata": { 143 | "collapsed": true, 144 | "deletable": true, 145 | "editable": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "def extract_question(tags):\n", 150 | " '''Extracts the entity under discussion from each question based on the POS tags'''\n", 151 | " eud = ''\n", 152 | " for word,tag in tags:\n", 153 | " if tag == 'NNP' or tag == 'NN':\n", 154 | " eud = word\n", 155 | " return eud" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 7, 161 | "metadata": { 162 | "collapsed": true, 163 | "deletable": true, 164 | "editable": true 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "def extract(row):\n", 169 | " '''Extracts the appropriate data given a processed DataFrame row'''\n", 170 | " if row.type == 'S':\n", 171 | " return extract_statement(row.tag)\n", 172 | " else: \n", 173 | " return extract_question(row.tag)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 8, 179 | "metadata": { 180 | "collapsed": true, 181 | "deletable": true, 182 | "editable": true 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "data['extracted'] = data.apply(extract, axis=1)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "deletable": true, 193 | "editable": true 194 | }, 195 | "source": [ 196 | "## Define the Graph" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 11, 202 | "metadata": { 203 | "collapsed": true, 204 | "deletable": true, 205 | "editable": true 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "from neo4j.v1 import GraphDatabase, basic_auth" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 12, 215 | "metadata": { 216 | "collapsed": true, 217 | "deletable": true, 218 | "editable": true 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "# Create a neo4j session\n", 223 | "# NOTE: Make sure that URL/credentials are correct and that Neo4j is running\n", 224 | "driver = GraphDatabase.driver('bolt://localhost:7687', auth=basic_auth('neo4j', 'neo4j'))" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 13, 230 | "metadata": { 231 | "collapsed": true, 232 | "deletable": true, 233 | "editable": true 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "# WARNING: This will clear the database when run!\n", 238 | "def reset_db():\n", 239 | " '''Remove all nodes and relationships from the database'''\n", 240 | " session = driver.session()\n", 241 | " session.run('MATCH (n) DETACH DELETE n')" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 14, 247 | "metadata": { 248 | "collapsed": true, 249 | "deletable": true, 250 | "editable": true 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "def create(query, start=0, end=0):\n", 255 | " '''Create a graph based on each triple in the extracted statements'''\n", 256 | " session = driver.session()\n", 257 | " stat = statements()\n", 258 | " end = len(stat) if end <= start else end\n", 259 | " for subject,relation,obj in stat[start:end].extracted:\n", 260 | " session.run(query, subject=subject, relation=relation, obj=obj)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "deletable": true, 267 | "editable": true 268 | }, 269 | "source": [ 270 | "This is the point where QA2 starts to be different from QA1. The query generating the knowledge graph needs to be altered slightly to encode information about the ordering of events relative to objects as well as subjects.\n", 271 | "\n", 272 | "In QA1, a linked list was constructed to keep track of events relative to a character; the `NEXT` edge type indicated the next event that the _person_ acted upon. This was all that was necessary, since the questions asked directly about the most recent event that corresponded to a particular person.\n", 273 | "\n", 274 | "In QA2, questions ask about the item a room is in, which requires a way to keep track of the last person who interacted with it. As such, it is not enough to know the order in which a person performed actions, but it is also necessary to know the order in which an item was handled. The most recent interaction indicates the person who interacted with that object last, and that can be used to find the room based on their visit history.\n", 275 | "\n", 276 | "Thus, the v4 graph query will create three types of lists.\n", 277 | "1. The first list is the global list of events indicated by the `NEXT` edge type.\n", 278 | "2. The second list is a `person`'s list of events indicated by the `S_NEXT` (next subject) edge type.\n", 279 | "3. The third list is an `item`'s list of events indicated by the `O_NEXT` (next object) edge type.\n", 280 | "Each list has a `HEAD` edge that points to the most recent event relative to their respective lists." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "collapsed": true, 288 | "deletable": true, 289 | "editable": true 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "v4_query = '''\n", 294 | " /// 1. Create Nodes\n", 295 | " MERGE (global:GLOBAL {name:'global'}) // Find/create the global entity\n", 296 | " MERGE (subject:SUBJECT {name:$subject}) // Find/create the subject and object\n", 297 | " MERGE (object:OBJECT {name:$obj})\n", 298 | "\n", 299 | " /// 2. Create a new relation between the subject and object\n", 300 | " CREATE (subject)-[:R_BEGIN]->(relation:RELATION {name:$relation})-[:R_END]->(object)\n", 301 | "\n", 302 | " /// 3. Create head pointers to the newly created relation\n", 303 | " CREATE (global)-[globalHead:HEAD]->(relation)\n", 304 | " CREATE (subject)-[subjectHead:HEAD]->(relation)\n", 305 | " CREATE (object)-[objectHead:HEAD]->(relation)\n", 306 | "\n", 307 | " WITH global,subject,relation,object,subjectHead,objectHead,globalHead\n", 308 | "\n", 309 | " /// 4. Link the existing global list with the new head node\n", 310 | " // Find the previous global head of the list (if none exist, this query will terminate here)\n", 311 | " MATCH (global)-[prevGlobalHead:HEAD]->(prevGlobalRelation:RELATION) WHERE prevGlobalRelation <> relation\n", 312 | " CREATE (prevGlobalRelation)-[:NEXT]->(relation) // Complete the link\n", 313 | " DELETE prevGlobalHead // Remove the previous head pointer\n", 314 | "\n", 315 | " WITH subject,relation,object,subjectHead,objectHead\n", 316 | "\n", 317 | " /// 5. Link the existing subject list with the new head node\n", 318 | " // Find the previous subject head of the list (if none exist, this query will terminate here)\n", 319 | " MATCH (subject)-[prevSubjectHead:HEAD]->(prevSubjectRelation:RELATION) WHERE prevSubjectRelation <> relation\n", 320 | " CREATE (prevSubjectRelation)-[:S_NEXT]->(relation) // Complete the link\n", 321 | " DELETE prevSubjectHead // Remove the previous head pointer\n", 322 | "\n", 323 | " WITH subject,relation,object,objectHead\n", 324 | "\n", 325 | " /// 6. Link the existing object list with the new head node\n", 326 | " // Find the previous subject head of the list (if none exist, this query will terminate here)\n", 327 | " MATCH (object)-[prevObjectHead:HEAD]->(prevObjectRelation:RELATION) WHERE prevObjectRelation <> relation\n", 328 | " CREATE (prevObjectRelation)-[:O_NEXT]->(relation) // Complete the link\n", 329 | " DELETE prevObjectHead // Remove the previous head pointer\n", 330 | "'''" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 15, 336 | "metadata": { 337 | "collapsed": false, 338 | "deletable": true, 339 | "editable": true 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "# Represent each relation as a node, ordered by multiple linked lists\n", 344 | "def build_v4_graph(start=0, end=0):\n", 345 | " reset_db()\n", 346 | " \n", 347 | " session = driver.session()\n", 348 | " \n", 349 | " # Create an index for faster access\n", 350 | " session.run('CREATE INDEX ON :SUBJECT(name)')\n", 351 | " session.run('CREATE INDEX ON :RELATION(name)')\n", 352 | " session.run('CREATE INDEX ON :OBJECT(name)')\n", 353 | " \n", 354 | " create(v4_query, start=start, end=end)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 16, 360 | "metadata": { 361 | "collapsed": false, 362 | "deletable": true, 363 | "editable": true 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "all_actions = sorted(list(set(x[1] for x in data.extracted if x != '' and x[1] != '')))" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 17, 373 | "metadata": { 374 | "collapsed": true, 375 | "deletable": true, 376 | "editable": true 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "movement_actions = ['journeyed', 'moved', 'travelled', 'went']\n", 381 | "acquire_actions = ['got', 'grabbed', 'picked', 'took']\n", 382 | "release_actions = ['discarded', 'dropped', 'left', 'put']" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 18, 388 | "metadata": { 389 | "collapsed": true, 390 | "deletable": true, 391 | "editable": true 392 | }, 393 | "outputs": [], 394 | "source": [ 395 | "def find_last_person(obj):\n", 396 | " '''Finds the last person in contact with the object'''\n", 397 | " query = '''\n", 398 | " MATCH (:OBJECT {name:$name})-[:HEAD]->(relation:RELATION)<-[:R_BEGIN]-(subject:SUBJECT)\n", 399 | " RETURN relation.name AS relation, subject.name AS subject\n", 400 | " '''\n", 401 | " return session.run(query, name=obj)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 20, 407 | "metadata": { 408 | "collapsed": true, 409 | "deletable": true, 410 | "editable": true 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "def find_object_location(obj):\n", 415 | " query = '''\n", 416 | " // Find the last person in contact with the object\n", 417 | " MATCH (:OBJECT {name:$obj})-[:HEAD]->(relation:RELATION)<-[:R_BEGIN]-(subject:SUBJECT)\n", 418 | "\n", 419 | " // Acquire\n", 420 | " MATCH (subject)-[:HEAD]->(head_relation:RELATION)\n", 421 | " \n", 422 | " MATCH p=(head_relation)<-[next:S_NEXT *1..20]-(prevRelation:RELATION)\n", 423 | " WHERE prevRelation.name IN $movement\n", 424 | " WITH size(next) as dist, p, relation\n", 425 | " ORDER BY dist\n", 426 | " WITH filter(n IN nodes(p) WHERE n.name IN $movement)[0] AS shortest, relation\n", 427 | " MATCH (shortest)-[:R_END]->(object_acquire:OBJECT)\n", 428 | " \n", 429 | " WITH relation, object_acquire\n", 430 | "\n", 431 | " // Release\n", 432 | " MATCH p=(relation)<-[next:S_NEXT *1..20]-(prevRelation:RELATION)\n", 433 | " WHERE prevRelation.name IN $movement\n", 434 | " WITH size(next) as dist, p, object_acquire, relation\n", 435 | " ORDER BY dist\n", 436 | " WITH filter(n IN nodes(p) WHERE n.name IN $movement)[0] AS shortest, object_acquire, relation\n", 437 | " MATCH (shortest)-[:R_END]->(object_release:OBJECT)\n", 438 | "\n", 439 | " RETURN DISTINCT object_acquire.name AS acquire, object_release.name AS release, relation.name AS relation\n", 440 | " '''\n", 441 | " return session.run(query, obj=obj, movement=movement_actions)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 21, 447 | "metadata": { 448 | "collapsed": true, 449 | "deletable": true, 450 | "editable": true 451 | }, 452 | "outputs": [], 453 | "source": [ 454 | "## 98.6% Accuracy\n", 455 | "\n", 456 | "# def find_object_location(obj):\n", 457 | "# query = '''\n", 458 | "# // Find the last person in contact with the object\n", 459 | "# MATCH (:OBJECT {name:$obj})-[:HEAD]->(relation:RELATION)<-[:R_BEGIN]-(subject:SUBJECT)\n", 460 | "\n", 461 | "# // Acquire\n", 462 | "# MATCH (subject)-[:HEAD]->(:RELATION)-[:R_END]->(object_acquire:OBJECT)\n", 463 | "\n", 464 | "# // Release\n", 465 | "# MATCH p=(relation)<-[next:S_NEXT *1..20]-(prevRelation:RELATION)\n", 466 | "# WHERE prevRelation.name IN $movement\n", 467 | "# WITH size(next) as dist, p, object_acquire, relation\n", 468 | "# ORDER BY dist\n", 469 | "# WITH filter(n IN nodes(p) WHERE n.name IN $movement)[0] AS shortest, object_acquire, relation\n", 470 | "# MATCH (shortest)-[:R_END]->(object_release:OBJECT)\n", 471 | "\n", 472 | "# RETURN DISTINCT object_acquire.name AS acquire, object_release.name AS release, relation.name AS relation\n", 473 | "# '''\n", 474 | "# return session.run(query, obj=obj, movement=movement_actions)" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 24, 480 | "metadata": { 481 | "collapsed": false, 482 | "deletable": true, 483 | "editable": true 484 | }, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/plain": [ 489 | "" 490 | ] 491 | }, 492 | "execution_count": 24, 493 | "metadata": {}, 494 | "output_type": "execute_result" 495 | } 496 | ], 497 | "source": [ 498 | "build_v4_graph(start=0, end=6)\n", 499 | "\n", 500 | "session = driver.session()\n", 501 | "find_object_location('football').single()" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 25, 507 | "metadata": { 508 | "collapsed": false, 509 | "deletable": true, 510 | "editable": true 511 | }, 512 | "outputs": [ 513 | { 514 | "data": { 515 | "text/plain": [ 516 | "" 517 | ] 518 | }, 519 | "execution_count": 25, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "session = driver.session()\n", 526 | "find_object_location('football').single()" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": { 532 | "deletable": true, 533 | "editable": true 534 | }, 535 | "source": [ 536 | "## Build the Graph" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 26, 542 | "metadata": { 543 | "collapsed": false, 544 | "deletable": true, 545 | "editable": true 546 | }, 547 | "outputs": [], 548 | "source": [ 549 | "build_v4_graph()" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "deletable": true, 556 | "editable": true 557 | }, 558 | "source": [ 559 | "" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": { 565 | "deletable": true, 566 | "editable": true 567 | }, 568 | "source": [ 569 | "## Calcualte an Accuracy Score" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 27, 575 | "metadata": { 576 | "collapsed": true, 577 | "deletable": true, 578 | "editable": true 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "def get_answers(row):\n", 583 | " '''Given an input row merge the statement in the graph, \n", 584 | " or query the graph if it is a question'''\n", 585 | " if row.type == 'S':\n", 586 | " subject,relation,obj = row.extracted\n", 587 | " session.run(v4_query, subject=subject, relation=relation, obj=obj)\n", 588 | " return ''\n", 589 | " elif row.type == 'Q':\n", 590 | " obj = row.extracted\n", 591 | " # WARNING: do not consume the result (e.g., call .consume() or .single()) \n", 592 | " # until the entire iteration is done.\n", 593 | " # Failure to do so may cause the queries to be VERY slow!\n", 594 | " return find_object_location(obj)" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 28, 600 | "metadata": { 601 | "collapsed": true, 602 | "deletable": true, 603 | "editable": true 604 | }, 605 | "outputs": [], 606 | "source": [ 607 | "def traverse(result):\n", 608 | " if result['relation'] in acquire_actions:\n", 609 | " return result['acquire']\n", 610 | " else:\n", 611 | " return result['release']" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 30, 617 | "metadata": { 618 | "collapsed": false, 619 | "deletable": true, 620 | "editable": true 621 | }, 622 | "outputs": [], 623 | "source": [ 624 | "reset_db()" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 31, 630 | "metadata": { 631 | "collapsed": false, 632 | "deletable": true, 633 | "editable": true, 634 | "scrolled": true 635 | }, 636 | "outputs": [], 637 | "source": [ 638 | "session = driver.session()\n", 639 | "results = data.apply(get_answers, axis=1)\n", 640 | "results = [x for x in results if x != '']\n", 641 | "predicted = [traverse(result.single()) for result in results]" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 32, 647 | "metadata": { 648 | "collapsed": false, 649 | "deletable": true, 650 | "editable": true 651 | }, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "['garden', 'garden', 'hallway', 'hallway', 'kitchen']" 657 | ] 658 | }, 659 | "execution_count": 32, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | } 663 | ], 664 | "source": [ 665 | "predicted[:5]" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 33, 671 | "metadata": { 672 | "collapsed": true, 673 | "deletable": true, 674 | "editable": true 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "actual = list(questions().answer)" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": 34, 684 | "metadata": { 685 | "collapsed": false, 686 | "deletable": true, 687 | "editable": true 688 | }, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/plain": [ 693 | "['garden', 'garden', 'hallway', 'hallway', 'kitchen']" 694 | ] 695 | }, 696 | "execution_count": 34, 697 | "metadata": {}, 698 | "output_type": "execute_result" 699 | } 700 | ], 701 | "source": [ 702 | "actual[:5]" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": 35, 708 | "metadata": { 709 | "collapsed": false, 710 | "deletable": true, 711 | "editable": true 712 | }, 713 | "outputs": [ 714 | { 715 | "data": { 716 | "text/plain": [ 717 | "1.0" 718 | ] 719 | }, 720 | "execution_count": 35, 721 | "metadata": {}, 722 | "output_type": "execute_result" 723 | } 724 | ], 725 | "source": [ 726 | "accuracy_score(actual, predicted)" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 36, 732 | "metadata": { 733 | "collapsed": false, 734 | "deletable": true, 735 | "editable": true 736 | }, 737 | "outputs": [], 738 | "source": [ 739 | "def find_incorrect(actual, predicted):\n", 740 | " z = list(zip(actual, predicted))\n", 741 | " return [(i, x[0], x[1]) for i,x in enumerate(z) if x[0] != x[1]]" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": { 748 | "collapsed": true, 749 | "deletable": true, 750 | "editable": true 751 | }, 752 | "outputs": [], 753 | "source": [] 754 | } 755 | ], 756 | "metadata": { 757 | "kernelspec": { 758 | "display_name": "Python 3", 759 | "language": "python", 760 | "name": "python3" 761 | }, 762 | "language_info": { 763 | "codemirror_mode": { 764 | "name": "ipython", 765 | "version": 3 766 | }, 767 | "file_extension": ".py", 768 | "mimetype": "text/x-python", 769 | "name": "python", 770 | "nbconvert_exporter": "python", 771 | "pygments_lexer": "ipython3", 772 | "version": "3.6.1" 773 | } 774 | }, 775 | "nbformat": 4, 776 | "nbformat_minor": 2 777 | } 778 | -------------------------------------------------------------------------------- /notebooks/dynamic_memory_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Memory Representation in Dialogue Systems (Part 3)\n", 8 | "\n", 9 | "Under construction, will update with explanations when finished.\n", 10 | "\n", 11 | "## Import" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import numpy as np\n", 24 | "import nltk\n", 25 | "from sklearn.metrics import accuracy_score\n", 26 | "from neo4j.v1 import GraphDatabase, basic_auth\n", 27 | "from collections import defaultdict" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "(414, 52256)" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "refs_utts = pd.read_pickle('resources/utts_refs.pkl')\n", 50 | "props = pd.read_pickle('resources/restaurants_props.pkl')\n", 51 | "len(refs_utts), len(props)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/html": [ 64 | "
\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
textbotoindmaskgidtarget
0[i, want, a, moderately, priced, restaurant, i...api_call R_cuisine west moderatetrn2True2prezzo
2[cheap, restaurant, in, the, north, part, of, ...api_call R_cuisine north cheaptrn2True11da_vinci_pizzeria
3[cheap, restaurant, in, the, south, part, of, ...api_call R_cuisine south cheaptrn2True12the_lucky_star
4[cheap, restaurant, serving, indian, food]api_call indian R_location cheaptrn2True15the_gandhi
5[thai, food]api_call thai R_location R_pricetrn2True22bangkok_city
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " text \\\n", 135 | "0 [i, want, a, moderately, priced, restaurant, i... \n", 136 | "2 [cheap, restaurant, in, the, north, part, of, ... \n", 137 | "3 [cheap, restaurant, in, the, south, part, of, ... \n", 138 | "4 [cheap, restaurant, serving, indian, food] \n", 139 | "5 [thai, food] \n", 140 | "\n", 141 | " bot o ind mask gid target \n", 142 | "0 api_call R_cuisine west moderate trn 2 True 2 prezzo \n", 143 | "2 api_call R_cuisine north cheap trn 2 True 11 da_vinci_pizzeria \n", 144 | "3 api_call R_cuisine south cheap trn 2 True 12 the_lucky_star \n", 145 | "4 api_call indian R_location cheap trn 2 True 15 the_gandhi \n", 146 | "5 api_call thai R_location R_price trn 2 True 22 bangkok_city " 147 | ] 148 | }, 149 | "execution_count": 3, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "refs_utts[:5]" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 4, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/html": [ 168 | "
\n", 169 | "\n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | "
rnameattr_keyattr_value
3saint_johns_chop_houseR_cuisinebritish
4saint_johns_chop_houseR_locationwest
7saint_johns_chop_houseR_pricemoderate
10prezzoR_cuisineitalian
11prezzoR_locationwest
\n", 211 | "
" 212 | ], 213 | "text/plain": [ 214 | " rname attr_key attr_value\n", 215 | "3 saint_johns_chop_house R_cuisine british\n", 216 | "4 saint_johns_chop_house R_location west\n", 217 | "7 saint_johns_chop_house R_price moderate\n", 218 | "10 prezzo R_cuisine italian\n", 219 | "11 prezzo R_location west" 220 | ] 221 | }, 222 | "execution_count": 4, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "props[:5]" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Process Text" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 5, 241 | "metadata": { 242 | "collapsed": true 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "stemmer = nltk.stem.snowball.EnglishStemmer()\n", 247 | "\n", 248 | "def stem(sentence):\n", 249 | " return [stemmer.stem(w) for w in sentence]" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 6, 255 | "metadata": { 256 | "collapsed": false 257 | }, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "414" 263 | ] 264 | }, 265 | "execution_count": 6, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "test = pd.DataFrame()\n", 272 | "test['text'] = [stem(s) for s in refs_utts.text]\n", 273 | "test['frame'] = [tuple(stem(f.split()[1:])) for f in refs_utts.bot]\n", 274 | "len(test)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 7, 280 | "metadata": { 281 | "collapsed": false 282 | }, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/plain": [ 287 | "405" 288 | ] 289 | }, 290 | "execution_count": 7, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "# Remove poorly formatted frames\n", 297 | "test = test[test.frame.map(len) == 3]\n", 298 | "len(test)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 8, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/html": [ 311 | "
\n", 312 | "\n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | "
textframe
0[i, want, a, moder, price, restaur, in, the, w...(r_cuisin, west, moder)
1[cheap, restaur, in, the, north, part, of, town](r_cuisin, north, cheap)
2[cheap, restaur, in, the, south, part, of, town](r_cuisin, south, cheap)
3[cheap, restaur, serv, indian, food](indian, r_locat, cheap)
4[thai, food](thai, r_locat, r_price)
\n", 348 | "
" 349 | ], 350 | "text/plain": [ 351 | " text frame\n", 352 | "0 [i, want, a, moder, price, restaur, in, the, w... (r_cuisin, west, moder)\n", 353 | "1 [cheap, restaur, in, the, north, part, of, town] (r_cuisin, north, cheap)\n", 354 | "2 [cheap, restaur, in, the, south, part, of, town] (r_cuisin, south, cheap)\n", 355 | "3 [cheap, restaur, serv, indian, food] (indian, r_locat, cheap)\n", 356 | "4 [thai, food] (thai, r_locat, r_price)" 357 | ] 358 | }, 359 | "execution_count": 8, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "test[:5]" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 9, 371 | "metadata": { 372 | "collapsed": true 373 | }, 374 | "outputs": [], 375 | "source": [ 376 | "knowledge = pd.DataFrame()\n", 377 | "knowledge['restaurant'] = props.rname.copy()\n", 378 | "knowledge['key'] = [stemmer.stem(s) for s in props.attr_key]\n", 379 | "knowledge['value'] = [stemmer.stem(s) for s in props.attr_value]" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 11, 385 | "metadata": { 386 | "collapsed": false 387 | }, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/html": [ 392 | "
\n", 393 | "\n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | "
restaurantkeyvalue
3saint_johns_chop_houser_cuisinbritish
4saint_johns_chop_houser_locatwest
7saint_johns_chop_houser_pricemoder
10prezzor_cuisinitalian
11prezzor_locatwest
\n", 435 | "
" 436 | ], 437 | "text/plain": [ 438 | " restaurant key value\n", 439 | "3 saint_johns_chop_house r_cuisin british\n", 440 | "4 saint_johns_chop_house r_locat west\n", 441 | "7 saint_johns_chop_house r_price moder\n", 442 | "10 prezzo r_cuisin italian\n", 443 | "11 prezzo r_locat west" 444 | ] 445 | }, 446 | "execution_count": 11, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "knowledge[:5]" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 11, 458 | "metadata": { 459 | "collapsed": false 460 | }, 461 | "outputs": [], 462 | "source": [ 463 | "# A dictionary of keys to the list of values they can take\n", 464 | "# In this instance, keys form mutually exclusive lists of values\n", 465 | "types = knowledge[['key', 'value']] \\\n", 466 | " .groupby('key') \\\n", 467 | " .aggregate(lambda x: tuple(set(x))) \\\n", 468 | " .reset_index() \\\n", 469 | " .set_index('key') \\\n", 470 | " .value \\\n", 471 | " .to_dict()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 12, 477 | "metadata": { 478 | "collapsed": false 479 | }, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "('asian_orient', 'vietnames', 'lebanes', 'african', 'thai')" 485 | ] 486 | }, 487 | "execution_count": 12, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "types['r_cuisin'][:5]" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 13, 499 | "metadata": { 500 | "collapsed": false 501 | }, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "('centr', 'south', 'west', 'east', 'north')" 507 | ] 508 | }, 509 | "execution_count": 13, 510 | "metadata": {}, 511 | "output_type": "execute_result" 512 | } 513 | ], 514 | "source": [ 515 | "types['r_locat']" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 14, 521 | "metadata": { 522 | "collapsed": false 523 | }, 524 | "outputs": [ 525 | { 526 | "data": { 527 | "text/plain": [ 528 | "('expens', 'moder', 'cheap')" 529 | ] 530 | }, 531 | "execution_count": 14, 532 | "metadata": {}, 533 | "output_type": "execute_result" 534 | } 535 | ], 536 | "source": [ 537 | "types['r_price']" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "## Create Knowledge Graph" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 15, 550 | "metadata": { 551 | "collapsed": true 552 | }, 553 | "outputs": [], 554 | "source": [ 555 | "# Create a neo4j session\n", 556 | "driver = GraphDatabase.driver('bolt://localhost:7687', auth=basic_auth('neo4j', 'neo4j'))" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 16, 562 | "metadata": { 563 | "collapsed": true 564 | }, 565 | "outputs": [], 566 | "source": [ 567 | "# WARNING: This will clear the database when run!\n", 568 | "def reset_db():\n", 569 | " session = driver.session()\n", 570 | " session.run('MATCH (n) DETACH DELETE n')" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 17, 576 | "metadata": { 577 | "collapsed": false 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "reset_db()" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 18, 587 | "metadata": { 588 | "collapsed": false 589 | }, 590 | "outputs": [], 591 | "source": [ 592 | "session = driver.session()\n", 593 | "\n", 594 | "for i,row in knowledge.iterrows():\n", 595 | " subject, relation, obj = row.restaurant, row.key, row.value\n", 596 | " session.run('''\n", 597 | " MERGE (s:SUBJECT {name: $subject}) \n", 598 | " MERGE (o:OBJECT {name: $obj}) \n", 599 | " MERGE (s)-[r:RELATION {name: $relation}]->(o)\n", 600 | " ''', { \n", 601 | " 'subject': subject,\n", 602 | " 'relation': relation,\n", 603 | " 'obj': obj\n", 604 | " })" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "## Test\n", 612 | "#### Baseline\n", 613 | "The baseline accuracy is the slot accuracy, calculated by the assumption of not knowing any frame values for any of the sentences." 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 19, 619 | "metadata": { 620 | "collapsed": false 621 | }, 622 | "outputs": [ 623 | { 624 | "data": { 625 | "text/plain": [ 626 | "('r_cuisin', 'r_locat', 'r_price')" 627 | ] 628 | }, 629 | "execution_count": 19, 630 | "metadata": {}, 631 | "output_type": "execute_result" 632 | } 633 | ], 634 | "source": [ 635 | "dont_know = tuple(types.keys())\n", 636 | "dont_know" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 20, 642 | "metadata": { 643 | "collapsed": false 644 | }, 645 | "outputs": [], 646 | "source": [ 647 | "base_predicted = list(dont_know) * len(test)\n", 648 | "base_actual = [w for frame in test.frame for w in frame]" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 21, 654 | "metadata": { 655 | "collapsed": false 656 | }, 657 | "outputs": [ 658 | { 659 | "data": { 660 | "text/plain": [ 661 | "0.45267489711934156" 662 | ] 663 | }, 664 | "execution_count": 21, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "accuracy_score(base_actual, base_predicted)" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": {}, 676 | "source": [ 677 | "#### Accuracy" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 91, 683 | "metadata": { 684 | "collapsed": false 685 | }, 686 | "outputs": [], 687 | "source": [ 688 | "# Cache properties from DB\n", 689 | "# Running this query will obtain all properties at this point in time\n", 690 | "def get_properties():\n", 691 | " session = driver.session()\n", 692 | " return session.run('''\n", 693 | " MATCH ()-[r:RELATION]->(o:OBJECT) \n", 694 | " RETURN collect(distinct o.name) AS properties\n", 695 | " ''').single()['properties']" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 92, 701 | "metadata": { 702 | "collapsed": true 703 | }, 704 | "outputs": [], 705 | "source": [ 706 | "# def get_types():\n", 707 | "# session = driver.session()\n", 708 | "# result = session.run('''\n", 709 | "# MATCH ()-[r:RELATION]->(o:OBJECT) \n", 710 | "# RETURN collect(distinct [r.name, o.name]) AS pair\n", 711 | "# ''').single()[0]\n", 712 | " \n", 713 | "# g_types = defaultdict(lambda: [])\n", 714 | "# for k,v in result:\n", 715 | "# g_types[k].append(v)\n", 716 | "# return g_types" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 115, 722 | "metadata": { 723 | "collapsed": false 724 | }, 725 | "outputs": [], 726 | "source": [ 727 | "properties = set(get_properties())" 728 | ] 729 | }, 730 | { 731 | "cell_type": "code", 732 | "execution_count": 116, 733 | "metadata": { 734 | "collapsed": true 735 | }, 736 | "outputs": [], 737 | "source": [ 738 | "# Hotword listener\n", 739 | "def is_hotword(word):\n", 740 | " return word in properties" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 117, 746 | "metadata": { 747 | "collapsed": false 748 | }, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/plain": [ 753 | "(True, False)" 754 | ] 755 | }, 756 | "execution_count": 117, 757 | "metadata": {}, 758 | "output_type": "execute_result" 759 | } 760 | ], 761 | "source": [ 762 | "is_hotword('british'), is_hotword('python')" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 122, 768 | "metadata": { 769 | "collapsed": true 770 | }, 771 | "outputs": [], 772 | "source": [ 773 | "# Issue DB queries\n", 774 | "def find_slot(prop):\n", 775 | " return session.run('''\n", 776 | " MATCH (s:SUBJECT)-[r:RELATION]->(o:OBJECT {name:$name}) \n", 777 | " RETURN collect(distinct [r.name, o.name]) AS properties\n", 778 | " ''', {\n", 779 | " 'name': prop\n", 780 | " })\n", 781 | "\n", 782 | "def extract(result):\n", 783 | " return result.single()['properties'][0]" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 123, 789 | "metadata": { 790 | "collapsed": false 791 | }, 792 | "outputs": [ 793 | { 794 | "data": { 795 | "text/plain": [ 796 | "['r_locat', 'west']" 797 | ] 798 | }, 799 | "execution_count": 123, 800 | "metadata": {}, 801 | "output_type": "execute_result" 802 | } 803 | ], 804 | "source": [ 805 | "session = driver.session()\n", 806 | "extract(find_slot('west'))" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 183, 812 | "metadata": { 813 | "collapsed": false 814 | }, 815 | "outputs": [], 816 | "source": [ 817 | "session = driver.session()\n", 818 | "all_slots = [[find_slot(word) for word in sentence if is_hotword(word)] for sentence in test.text]\n", 819 | "extracted_slots = [[tuple(extract(slot)) for slot in slots] for slots in all_slots]\n", 820 | "test['slots'] = extracted_slots" 821 | ] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": 184, 826 | "metadata": { 827 | "collapsed": true 828 | }, 829 | "outputs": [], 830 | "source": [ 831 | "def to_frame(slots):\n", 832 | " frame = list(dont_know)\n", 833 | " s = dict(slots)\n", 834 | " \n", 835 | " for i,x in enumerate(frame):\n", 836 | " if x in s.keys():\n", 837 | " frame[i] = s[x]\n", 838 | " \n", 839 | " return tuple(frame)" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 185, 845 | "metadata": { 846 | "collapsed": false 847 | }, 848 | "outputs": [], 849 | "source": [ 850 | "test['predicted'] = [to_frame(slot) for slot in test.slots]" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": 186, 856 | "metadata": { 857 | "collapsed": false 858 | }, 859 | "outputs": [ 860 | { 861 | "data": { 862 | "text/html": [ 863 | "
\n", 864 | "\n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | "
textframeslotspredicted
0[i, want, a, moder, price, restaur, in, the, w...(r_cuisin, west, moder)[(r_price, moder), (r_locat, west)](r_cuisin, west, moder)
1[cheap, restaur, in, the, north, part, of, town](r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
2[cheap, restaur, in, the, south, part, of, town](r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
3[cheap, restaur, serv, indian, food](indian, r_locat, cheap)[(r_price, cheap), (r_cuisin, indian)](indian, r_locat, cheap)
4[thai, food](thai, r_locat, r_price)[(r_cuisin, thai)](thai, r_locat, r_price)
\n", 912 | "
" 913 | ], 914 | "text/plain": [ 915 | " text \\\n", 916 | "0 [i, want, a, moder, price, restaur, in, the, w... \n", 917 | "1 [cheap, restaur, in, the, north, part, of, town] \n", 918 | "2 [cheap, restaur, in, the, south, part, of, town] \n", 919 | "3 [cheap, restaur, serv, indian, food] \n", 920 | "4 [thai, food] \n", 921 | "\n", 922 | " frame slots \\\n", 923 | "0 (r_cuisin, west, moder) [(r_price, moder), (r_locat, west)] \n", 924 | "1 (r_cuisin, north, cheap) [(r_price, cheap), (r_locat, north)] \n", 925 | "2 (r_cuisin, south, cheap) [(r_price, cheap), (r_locat, south)] \n", 926 | "3 (indian, r_locat, cheap) [(r_price, cheap), (r_cuisin, indian)] \n", 927 | "4 (thai, r_locat, r_price) [(r_cuisin, thai)] \n", 928 | "\n", 929 | " predicted \n", 930 | "0 (r_cuisin, west, moder) \n", 931 | "1 (r_cuisin, north, cheap) \n", 932 | "2 (r_cuisin, south, cheap) \n", 933 | "3 (indian, r_locat, cheap) \n", 934 | "4 (thai, r_locat, r_price) " 935 | ] 936 | }, 937 | "execution_count": 186, 938 | "metadata": {}, 939 | "output_type": "execute_result" 940 | } 941 | ], 942 | "source": [ 943 | "test[:5]" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 173, 949 | "metadata": { 950 | "collapsed": false 951 | }, 952 | "outputs": [], 953 | "source": [ 954 | "predicted = [w for frame in test.predicted for w in frame]\n", 955 | "actual = [w for frame in test.frame for w in frame]" 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": 187, 961 | "metadata": { 962 | "collapsed": false 963 | }, 964 | "outputs": [ 965 | { 966 | "data": { 967 | "text/plain": [ 968 | "0.96954732510288066" 969 | ] 970 | }, 971 | "execution_count": 187, 972 | "metadata": {}, 973 | "output_type": "execute_result" 974 | } 975 | ], 976 | "source": [ 977 | "accuracy_score(actual, predicted)" 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": 193, 983 | "metadata": { 984 | "collapsed": false 985 | }, 986 | "outputs": [ 987 | { 988 | "name": "stdout", 989 | "output_type": "stream", 990 | "text": [ 991 | " | r |\n", 992 | " | _ r r |\n", 993 | " | c _ _ e |\n", 994 | " | u l c p m n s x |\n", 995 | " | i o h r o e o w o p |\n", 996 | " | s c e i d a r e u e |\n", 997 | " | i a a c e s t s t n |\n", 998 | " | n t p e r t h t h s |\n", 999 | "---------+-----------------------------------------------------------------------+\n", 1000 | "r_cuisin | <18.9%> . . . . . . . . . |\n", 1001 | " r_locat | . <14.7%> . . . . . 0.1% 0.6% . |\n", 1002 | " cheap | . . <11.9%> 0.2% . . . . . . |\n", 1003 | " r_price | . . 0.1% <10.5%> 0.2% . . . . 0.1% |\n", 1004 | " moder | . . . 0.2% <8.2%> . . . . . |\n", 1005 | " east | . . . . . <6.4%> . . . . |\n", 1006 | " north | . . . . . . <5.1%> . . . |\n", 1007 | " west | . 0.2% . . . . . <3.3%> . . |\n", 1008 | " south | . . . . . . . . <3.0%> . |\n", 1009 | " expens | . . . . . . . . . <1.9%>|\n", 1010 | "---------+-----------------------------------------------------------------------+\n", 1011 | "(row = reference; col = test)\n", 1012 | "\n" 1013 | ] 1014 | } 1015 | ], 1016 | "source": [ 1017 | "cm = nltk.ConfusionMatrix(actual, predicted)\n", 1018 | "print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=10))" 1019 | ] 1020 | }, 1021 | { 1022 | "cell_type": "code", 1023 | "execution_count": 207, 1024 | "metadata": { 1025 | "collapsed": false 1026 | }, 1027 | "outputs": [ 1028 | { 1029 | "data": { 1030 | "text/html": [ 1031 | "
\n", 1032 | "\n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | "
textframeslotspredicted
1[cheap, restaur, in, the, north, part, of, town](r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
2[cheap, restaur, in, the, south, part, of, town](r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
3[cheap, restaur, serv, indian, food](indian, r_locat, cheap)[(r_price, cheap), (r_cuisin, indian)](indian, r_locat, cheap)
7[im, look, for, a, cheap, restaur, in, the, no...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
10[cheap, restaur](r_cuisin, r_locat, cheap)[(r_price, cheap)](r_cuisin, r_locat, cheap)
12[i, want, a, cheap, restaur, in, the, west, pa...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
14[i, am, look, for, a, cheap, restaur, in, the,...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
15[im, look, for, a, cheap, restaur, serv, inter...(intern, r_locat, cheap)[(r_price, cheap), (r_cuisin, intern)](intern, r_locat, cheap)
17[look, for, a, cheap, restaur, in, the, south,...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
20[look, for, someth, cheap, in, the, north, sid...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
33[im, look, for, a, cheap, restaur, in, the, so...(r_cuisin, r_locat, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
40[i, want, a, cheap, restaur, in, the, south, p...(r_cuisin, r_locat, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
44[i, need, a, cheap, restaur, in, the, south, p...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
46[i, want, a, cheap, restaur, in, the, east, pa...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
57[breath, id, like, a, cheap, restaur, in, the,...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
61[im, look, for, a, cheap, restaur, in, the, we...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
62[i, would, like, to, find, a, cheap, restaur, ...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
68[cheap, restaur, in, the, north, part, of, town](r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
71[i, would, like, a, cheap, restaur, in, the, n...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
72[i, would, like, a, cheap, restaur, in, the, w...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
76[im, look, for, a, cheap, restaur, in, the, we...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
82[im, look, for, a, cheap, restaur, in, the, so...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
83[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
84[cheap, restaur, serv, spanish, food](spanish, r_locat, cheap)[(r_price, cheap), (r_cuisin, spanish)](spanish, r_locat, cheap)
86[im, look, for, a, cheap, restaur, in, the, no...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
89[im, look, for, a, cheap, restaur, in, the, we...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
92[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
96[i, need, a, cheap, restaur, in, the, west, pa...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
98[im, look, for, a, cheap, restaur, in, the, no...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
99[im, look, for, a, cheap, restaur, in, the, no...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
...............
337[look, for, a, cheap, restaur, in, the, east, ...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
340[cheap, restaur, west, part, of, town](r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
343[im, look, for, a, cheap, restaur, and, it, sh...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
346[im, look, for, a, cheap, restaur, in, the, we...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
350[cheap, restaur, south, part, of, town](r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
351[iam, look, for, a, cheap, restaur, and, it, s...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
352[uh, i, want, a, cheap, restaur, and, it, shou...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
353[im, look, for, a, cheap, restaur, in, the, so...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
354[i, would, like, a, cheap, restaur, in, the, s...(r_cuisin, r_locat, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
356[can, i, have, a, cheap, restaur, in, the, wes...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
358[i, am, look, for, a, cheap, restaur, in, the,...(r_cuisin, west, cheap)[(r_price, cheap), (r_locat, west)](r_cuisin, west, cheap)
360[a, want, a, cheap, restaur, in, the, north, p...(r_cuisin, north, cheap)[(r_price, cheap), (r_locat, north)](r_cuisin, north, cheap)
365[cheap, restaur, in, th, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
366[im, look, for, a, cheap, restaur, and, it, sh...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
369[cheap, restaur, in, th, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
371[cheap, restaur, in, the, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
372[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
374[look, for, someth, cheap, on, the, east, part...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
377[im, look, for, a, cheap, restaur, serv, medit...(mediterranean, r_locat, cheap)[(r_price, cheap), (r_cuisin, mediterranean)](mediterranean, r_locat, cheap)
378[im, look, for, a, cheap, restaur, in, the, so...(r_cuisin, south, cheap)[(r_price, cheap), (r_locat, south)](r_cuisin, south, cheap)
384[im, look, for, a, cheap, restaur, that, serv,...(vietnames, r_locat, cheap)[(r_price, cheap), (r_cuisin, vietnames)](vietnames, r_locat, cheap)
389[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
393[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
396[cheap, restaur, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
398[look, for, a, cheap, restaur, in, the, east, ...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
400[cheap, restaur, on, the, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
406[cheap, restaur, in, the, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
410[i, need, a, cheap, restaur, in, the, east, pa...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
412[cheap, restaur, east, part, of, town](r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
413[im, look, for, a, cheap, restaur, in, the, ea...(r_cuisin, east, cheap)[(r_price, cheap), (r_locat, east)](r_cuisin, east, cheap)
\n", 1472 | "

145 rows × 4 columns

\n", 1473 | "
" 1474 | ], 1475 | "text/plain": [ 1476 | " text \\\n", 1477 | "1 [cheap, restaur, in, the, north, part, of, town] \n", 1478 | "2 [cheap, restaur, in, the, south, part, of, town] \n", 1479 | "3 [cheap, restaur, serv, indian, food] \n", 1480 | "7 [im, look, for, a, cheap, restaur, in, the, no... \n", 1481 | "10 [cheap, restaur] \n", 1482 | "12 [i, want, a, cheap, restaur, in, the, west, pa... \n", 1483 | "14 [i, am, look, for, a, cheap, restaur, in, the,... \n", 1484 | "15 [im, look, for, a, cheap, restaur, serv, inter... \n", 1485 | "17 [look, for, a, cheap, restaur, in, the, south,... \n", 1486 | "20 [look, for, someth, cheap, in, the, north, sid... \n", 1487 | "33 [im, look, for, a, cheap, restaur, in, the, so... \n", 1488 | "40 [i, want, a, cheap, restaur, in, the, south, p... \n", 1489 | "44 [i, need, a, cheap, restaur, in, the, south, p... \n", 1490 | "46 [i, want, a, cheap, restaur, in, the, east, pa... \n", 1491 | "57 [breath, id, like, a, cheap, restaur, in, the,... \n", 1492 | "61 [im, look, for, a, cheap, restaur, in, the, we... \n", 1493 | "62 [i, would, like, to, find, a, cheap, restaur, ... \n", 1494 | "68 [cheap, restaur, in, the, north, part, of, town] \n", 1495 | "71 [i, would, like, a, cheap, restaur, in, the, n... \n", 1496 | "72 [i, would, like, a, cheap, restaur, in, the, w... \n", 1497 | "76 [im, look, for, a, cheap, restaur, in, the, we... \n", 1498 | "82 [im, look, for, a, cheap, restaur, in, the, so... \n", 1499 | "83 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1500 | "84 [cheap, restaur, serv, spanish, food] \n", 1501 | "86 [im, look, for, a, cheap, restaur, in, the, no... \n", 1502 | "89 [im, look, for, a, cheap, restaur, in, the, we... \n", 1503 | "92 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1504 | "96 [i, need, a, cheap, restaur, in, the, west, pa... \n", 1505 | "98 [im, look, for, a, cheap, restaur, in, the, no... \n", 1506 | "99 [im, look, for, a, cheap, restaur, in, the, no... \n", 1507 | ".. ... \n", 1508 | "337 [look, for, a, cheap, restaur, in, the, east, ... \n", 1509 | "340 [cheap, restaur, west, part, of, town] \n", 1510 | "343 [im, look, for, a, cheap, restaur, and, it, sh... \n", 1511 | "346 [im, look, for, a, cheap, restaur, in, the, we... \n", 1512 | "350 [cheap, restaur, south, part, of, town] \n", 1513 | "351 [iam, look, for, a, cheap, restaur, and, it, s... \n", 1514 | "352 [uh, i, want, a, cheap, restaur, and, it, shou... \n", 1515 | "353 [im, look, for, a, cheap, restaur, in, the, so... \n", 1516 | "354 [i, would, like, a, cheap, restaur, in, the, s... \n", 1517 | "356 [can, i, have, a, cheap, restaur, in, the, wes... \n", 1518 | "358 [i, am, look, for, a, cheap, restaur, in, the,... \n", 1519 | "360 [a, want, a, cheap, restaur, in, the, north, p... \n", 1520 | "365 [cheap, restaur, in, th, east, part, of, town] \n", 1521 | "366 [im, look, for, a, cheap, restaur, and, it, sh... \n", 1522 | "369 [cheap, restaur, in, th, east, part, of, town] \n", 1523 | "371 [cheap, restaur, in, the, east, part, of, town] \n", 1524 | "372 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1525 | "374 [look, for, someth, cheap, on, the, east, part... \n", 1526 | "377 [im, look, for, a, cheap, restaur, serv, medit... \n", 1527 | "378 [im, look, for, a, cheap, restaur, in, the, so... \n", 1528 | "384 [im, look, for, a, cheap, restaur, that, serv,... \n", 1529 | "389 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1530 | "393 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1531 | "396 [cheap, restaur, east, part, of, town] \n", 1532 | "398 [look, for, a, cheap, restaur, in, the, east, ... \n", 1533 | "400 [cheap, restaur, on, the, east, part, of, town] \n", 1534 | "406 [cheap, restaur, in, the, east, part, of, town] \n", 1535 | "410 [i, need, a, cheap, restaur, in, the, east, pa... \n", 1536 | "412 [cheap, restaur, east, part, of, town] \n", 1537 | "413 [im, look, for, a, cheap, restaur, in, the, ea... \n", 1538 | "\n", 1539 | " frame \\\n", 1540 | "1 (r_cuisin, north, cheap) \n", 1541 | "2 (r_cuisin, south, cheap) \n", 1542 | "3 (indian, r_locat, cheap) \n", 1543 | "7 (r_cuisin, north, cheap) \n", 1544 | "10 (r_cuisin, r_locat, cheap) \n", 1545 | "12 (r_cuisin, west, cheap) \n", 1546 | "14 (r_cuisin, east, cheap) \n", 1547 | "15 (intern, r_locat, cheap) \n", 1548 | "17 (r_cuisin, south, cheap) \n", 1549 | "20 (r_cuisin, north, cheap) \n", 1550 | "33 (r_cuisin, r_locat, cheap) \n", 1551 | "40 (r_cuisin, r_locat, cheap) \n", 1552 | "44 (r_cuisin, south, cheap) \n", 1553 | "46 (r_cuisin, east, cheap) \n", 1554 | "57 (r_cuisin, south, cheap) \n", 1555 | "61 (r_cuisin, west, cheap) \n", 1556 | "62 (r_cuisin, south, cheap) \n", 1557 | "68 (r_cuisin, north, cheap) \n", 1558 | "71 (r_cuisin, north, cheap) \n", 1559 | "72 (r_cuisin, west, cheap) \n", 1560 | "76 (r_cuisin, west, cheap) \n", 1561 | "82 (r_cuisin, south, cheap) \n", 1562 | "83 (r_cuisin, east, cheap) \n", 1563 | "84 (spanish, r_locat, cheap) \n", 1564 | "86 (r_cuisin, north, cheap) \n", 1565 | "89 (r_cuisin, west, cheap) \n", 1566 | "92 (r_cuisin, east, cheap) \n", 1567 | "96 (r_cuisin, west, cheap) \n", 1568 | "98 (r_cuisin, north, cheap) \n", 1569 | "99 (r_cuisin, north, cheap) \n", 1570 | ".. ... \n", 1571 | "337 (r_cuisin, east, cheap) \n", 1572 | "340 (r_cuisin, west, cheap) \n", 1573 | "343 (r_cuisin, north, cheap) \n", 1574 | "346 (r_cuisin, west, cheap) \n", 1575 | "350 (r_cuisin, south, cheap) \n", 1576 | "351 (r_cuisin, north, cheap) \n", 1577 | "352 (r_cuisin, north, cheap) \n", 1578 | "353 (r_cuisin, south, cheap) \n", 1579 | "354 (r_cuisin, r_locat, cheap) \n", 1580 | "356 (r_cuisin, west, cheap) \n", 1581 | "358 (r_cuisin, west, cheap) \n", 1582 | "360 (r_cuisin, north, cheap) \n", 1583 | "365 (r_cuisin, east, cheap) \n", 1584 | "366 (r_cuisin, east, cheap) \n", 1585 | "369 (r_cuisin, east, cheap) \n", 1586 | "371 (r_cuisin, east, cheap) \n", 1587 | "372 (r_cuisin, east, cheap) \n", 1588 | "374 (r_cuisin, east, cheap) \n", 1589 | "377 (mediterranean, r_locat, cheap) \n", 1590 | "378 (r_cuisin, south, cheap) \n", 1591 | "384 (vietnames, r_locat, cheap) \n", 1592 | "389 (r_cuisin, east, cheap) \n", 1593 | "393 (r_cuisin, east, cheap) \n", 1594 | "396 (r_cuisin, east, cheap) \n", 1595 | "398 (r_cuisin, east, cheap) \n", 1596 | "400 (r_cuisin, east, cheap) \n", 1597 | "406 (r_cuisin, east, cheap) \n", 1598 | "410 (r_cuisin, east, cheap) \n", 1599 | "412 (r_cuisin, east, cheap) \n", 1600 | "413 (r_cuisin, east, cheap) \n", 1601 | "\n", 1602 | " slots \\\n", 1603 | "1 [(r_price, cheap), (r_locat, north)] \n", 1604 | "2 [(r_price, cheap), (r_locat, south)] \n", 1605 | "3 [(r_price, cheap), (r_cuisin, indian)] \n", 1606 | "7 [(r_price, cheap), (r_locat, north)] \n", 1607 | "10 [(r_price, cheap)] \n", 1608 | "12 [(r_price, cheap), (r_locat, west)] \n", 1609 | "14 [(r_price, cheap), (r_locat, east)] \n", 1610 | "15 [(r_price, cheap), (r_cuisin, intern)] \n", 1611 | "17 [(r_price, cheap), (r_locat, south)] \n", 1612 | "20 [(r_price, cheap), (r_locat, north)] \n", 1613 | "33 [(r_price, cheap), (r_locat, south)] \n", 1614 | "40 [(r_price, cheap), (r_locat, south)] \n", 1615 | "44 [(r_price, cheap), (r_locat, south)] \n", 1616 | "46 [(r_price, cheap), (r_locat, east)] \n", 1617 | "57 [(r_price, cheap), (r_locat, south)] \n", 1618 | "61 [(r_price, cheap), (r_locat, west)] \n", 1619 | "62 [(r_price, cheap), (r_locat, south)] \n", 1620 | "68 [(r_price, cheap), (r_locat, north)] \n", 1621 | "71 [(r_price, cheap), (r_locat, north)] \n", 1622 | "72 [(r_price, cheap), (r_locat, west)] \n", 1623 | "76 [(r_price, cheap), (r_locat, west)] \n", 1624 | "82 [(r_price, cheap), (r_locat, south)] \n", 1625 | "83 [(r_price, cheap), (r_locat, east)] \n", 1626 | "84 [(r_price, cheap), (r_cuisin, spanish)] \n", 1627 | "86 [(r_price, cheap), (r_locat, north)] \n", 1628 | "89 [(r_price, cheap), (r_locat, west)] \n", 1629 | "92 [(r_price, cheap), (r_locat, east)] \n", 1630 | "96 [(r_price, cheap), (r_locat, west)] \n", 1631 | "98 [(r_price, cheap), (r_locat, north)] \n", 1632 | "99 [(r_price, cheap), (r_locat, north)] \n", 1633 | ".. ... \n", 1634 | "337 [(r_price, cheap), (r_locat, east)] \n", 1635 | "340 [(r_price, cheap), (r_locat, west)] \n", 1636 | "343 [(r_price, cheap), (r_locat, north)] \n", 1637 | "346 [(r_price, cheap), (r_locat, west)] \n", 1638 | "350 [(r_price, cheap), (r_locat, south)] \n", 1639 | "351 [(r_price, cheap), (r_locat, north)] \n", 1640 | "352 [(r_price, cheap), (r_locat, north)] \n", 1641 | "353 [(r_price, cheap), (r_locat, south)] \n", 1642 | "354 [(r_price, cheap), (r_locat, south)] \n", 1643 | "356 [(r_price, cheap), (r_locat, west)] \n", 1644 | "358 [(r_price, cheap), (r_locat, west)] \n", 1645 | "360 [(r_price, cheap), (r_locat, north)] \n", 1646 | "365 [(r_price, cheap), (r_locat, east)] \n", 1647 | "366 [(r_price, cheap), (r_locat, east)] \n", 1648 | "369 [(r_price, cheap), (r_locat, east)] \n", 1649 | "371 [(r_price, cheap), (r_locat, east)] \n", 1650 | "372 [(r_price, cheap), (r_locat, east)] \n", 1651 | "374 [(r_price, cheap), (r_locat, east)] \n", 1652 | "377 [(r_price, cheap), (r_cuisin, mediterranean)] \n", 1653 | "378 [(r_price, cheap), (r_locat, south)] \n", 1654 | "384 [(r_price, cheap), (r_cuisin, vietnames)] \n", 1655 | "389 [(r_price, cheap), (r_locat, east)] \n", 1656 | "393 [(r_price, cheap), (r_locat, east)] \n", 1657 | "396 [(r_price, cheap), (r_locat, east)] \n", 1658 | "398 [(r_price, cheap), (r_locat, east)] \n", 1659 | "400 [(r_price, cheap), (r_locat, east)] \n", 1660 | "406 [(r_price, cheap), (r_locat, east)] \n", 1661 | "410 [(r_price, cheap), (r_locat, east)] \n", 1662 | "412 [(r_price, cheap), (r_locat, east)] \n", 1663 | "413 [(r_price, cheap), (r_locat, east)] \n", 1664 | "\n", 1665 | " predicted \n", 1666 | "1 (r_cuisin, north, cheap) \n", 1667 | "2 (r_cuisin, south, cheap) \n", 1668 | "3 (indian, r_locat, cheap) \n", 1669 | "7 (r_cuisin, north, cheap) \n", 1670 | "10 (r_cuisin, r_locat, cheap) \n", 1671 | "12 (r_cuisin, west, cheap) \n", 1672 | "14 (r_cuisin, east, cheap) \n", 1673 | "15 (intern, r_locat, cheap) \n", 1674 | "17 (r_cuisin, south, cheap) \n", 1675 | "20 (r_cuisin, north, cheap) \n", 1676 | "33 (r_cuisin, south, cheap) \n", 1677 | "40 (r_cuisin, south, cheap) \n", 1678 | "44 (r_cuisin, south, cheap) \n", 1679 | "46 (r_cuisin, east, cheap) \n", 1680 | "57 (r_cuisin, south, cheap) \n", 1681 | "61 (r_cuisin, west, cheap) \n", 1682 | "62 (r_cuisin, south, cheap) \n", 1683 | "68 (r_cuisin, north, cheap) \n", 1684 | "71 (r_cuisin, north, cheap) \n", 1685 | "72 (r_cuisin, west, cheap) \n", 1686 | "76 (r_cuisin, west, cheap) \n", 1687 | "82 (r_cuisin, south, cheap) \n", 1688 | "83 (r_cuisin, east, cheap) \n", 1689 | "84 (spanish, r_locat, cheap) \n", 1690 | "86 (r_cuisin, north, cheap) \n", 1691 | "89 (r_cuisin, west, cheap) \n", 1692 | "92 (r_cuisin, east, cheap) \n", 1693 | "96 (r_cuisin, west, cheap) \n", 1694 | "98 (r_cuisin, north, cheap) \n", 1695 | "99 (r_cuisin, north, cheap) \n", 1696 | ".. ... \n", 1697 | "337 (r_cuisin, east, cheap) \n", 1698 | "340 (r_cuisin, west, cheap) \n", 1699 | "343 (r_cuisin, north, cheap) \n", 1700 | "346 (r_cuisin, west, cheap) \n", 1701 | "350 (r_cuisin, south, cheap) \n", 1702 | "351 (r_cuisin, north, cheap) \n", 1703 | "352 (r_cuisin, north, cheap) \n", 1704 | "353 (r_cuisin, south, cheap) \n", 1705 | "354 (r_cuisin, south, cheap) \n", 1706 | "356 (r_cuisin, west, cheap) \n", 1707 | "358 (r_cuisin, west, cheap) \n", 1708 | "360 (r_cuisin, north, cheap) \n", 1709 | "365 (r_cuisin, east, cheap) \n", 1710 | "366 (r_cuisin, east, cheap) \n", 1711 | "369 (r_cuisin, east, cheap) \n", 1712 | "371 (r_cuisin, east, cheap) \n", 1713 | "372 (r_cuisin, east, cheap) \n", 1714 | "374 (r_cuisin, east, cheap) \n", 1715 | "377 (mediterranean, r_locat, cheap) \n", 1716 | "378 (r_cuisin, south, cheap) \n", 1717 | "384 (vietnames, r_locat, cheap) \n", 1718 | "389 (r_cuisin, east, cheap) \n", 1719 | "393 (r_cuisin, east, cheap) \n", 1720 | "396 (r_cuisin, east, cheap) \n", 1721 | "398 (r_cuisin, east, cheap) \n", 1722 | "400 (r_cuisin, east, cheap) \n", 1723 | "406 (r_cuisin, east, cheap) \n", 1724 | "410 (r_cuisin, east, cheap) \n", 1725 | "412 (r_cuisin, east, cheap) \n", 1726 | "413 (r_cuisin, east, cheap) \n", 1727 | "\n", 1728 | "[145 rows x 4 columns]" 1729 | ] 1730 | }, 1731 | "execution_count": 207, 1732 | "metadata": {}, 1733 | "output_type": "execute_result" 1734 | } 1735 | ], 1736 | "source": [ 1737 | "test[test.text.map(lambda s: 'cheap' in s)]" 1738 | ] 1739 | }, 1740 | { 1741 | "cell_type": "code", 1742 | "execution_count": 202, 1743 | "metadata": { 1744 | "collapsed": false 1745 | }, 1746 | "outputs": [ 1747 | { 1748 | "data": { 1749 | "text/plain": [ 1750 | "['id',\n", 1751 | " 'like',\n", 1752 | " 'a',\n", 1753 | " 'cheap',\n", 1754 | " 'restaur',\n", 1755 | " 'in',\n", 1756 | " 'the',\n", 1757 | " 'south',\n", 1758 | " 'part',\n", 1759 | " 'of',\n", 1760 | " 'town']" 1761 | ] 1762 | }, 1763 | "execution_count": 202, 1764 | "metadata": {}, 1765 | "output_type": "execute_result" 1766 | } 1767 | ], 1768 | "source": [ 1769 | "test[test.text.map(lambda s: 'south' in s)]['text'][284]" 1770 | ] 1771 | }, 1772 | { 1773 | "cell_type": "code", 1774 | "execution_count": null, 1775 | "metadata": { 1776 | "collapsed": true 1777 | }, 1778 | "outputs": [], 1779 | "source": [] 1780 | } 1781 | ], 1782 | "metadata": { 1783 | "kernelspec": { 1784 | "display_name": "Python 3", 1785 | "language": "python", 1786 | "name": "python3" 1787 | }, 1788 | "language_info": { 1789 | "codemirror_mode": { 1790 | "name": "ipython", 1791 | "version": 3 1792 | }, 1793 | "file_extension": ".py", 1794 | "mimetype": "text/x-python", 1795 | "name": "python", 1796 | "nbconvert_exporter": "python", 1797 | "pygments_lexer": "ipython3", 1798 | "version": "3.6.0" 1799 | } 1800 | }, 1801 | "nbformat": 4, 1802 | "nbformat_minor": 2 1803 | } 1804 | -------------------------------------------------------------------------------- /notebooks/resources/restaurants_props.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/resources/restaurants_props.pkl -------------------------------------------------------------------------------- /notebooks/resources/utts_refs.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/resources/utts_refs.pkl -------------------------------------------------------------------------------- /notebooks/screenshots/dialog-system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/dialog-system.png -------------------------------------------------------------------------------- /notebooks/screenshots/global-and-local-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/global-and-local-list.png -------------------------------------------------------------------------------- /notebooks/screenshots/local-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/local-list.png -------------------------------------------------------------------------------- /notebooks/screenshots/mary-john-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/mary-john-example.png -------------------------------------------------------------------------------- /notebooks/screenshots/prezzo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/prezzo.png -------------------------------------------------------------------------------- /notebooks/screenshots/qa2-multiple-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/qa2-multiple-list.png -------------------------------------------------------------------------------- /notebooks/screenshots/simple-relation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/simple-relation.png -------------------------------------------------------------------------------- /notebooks/screenshots/state-graph-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/state-graph-1.png -------------------------------------------------------------------------------- /notebooks/screenshots/state-graph-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/state-graph-2.png -------------------------------------------------------------------------------- /notebooks/screenshots/v4-mary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hyperparticle/graph-nlu/2aa7ef3ce67e4dadd5d1b89b9d7bf40d3d53d9fc/notebooks/screenshots/v4-mary.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | jupyter 3 | neo4j-driver 4 | numpy 5 | nltk 6 | pandas 7 | scipy 8 | scikit-learn --------------------------------------------------------------------------------