├── .gitignore ├── LICENSE ├── README.md ├── article.md ├── batch ├── en │ ├── en001.txt │ ├── en002.txt │ └── en003.txt └── img-backup │ ├── en001 │ ├── en001-0001.png │ ├── en001-0002.png │ ├── en001-0003.png │ └── en001-0004.png │ ├── en002 │ ├── en02-001.png │ ├── en02-002.png │ ├── en02-003.png │ └── en02-004.png │ └── en003 │ ├── en03-001.png │ ├── en03-002.png │ ├── en03-003.png │ └── en03-004.png ├── package-lock.json ├── package.json ├── pom.xml ├── screenshot └── img.png ├── src └── main │ ├── java │ └── com │ │ └── bpodgursky │ │ └── nlpviz │ │ ├── AbstractParser.java │ │ ├── EnglishParser.java │ │ ├── SpanishParser.java │ │ ├── api │ │ └── WebServer.java │ │ ├── scripts │ │ └── StanfordCoreNLPTest.java │ │ └── servlet │ │ ├── HomeServlet.java │ │ └── ParseServlet.java │ ├── resources │ └── com │ │ └── bpodgursky │ │ └── nlpviz │ │ ├── jobjar.xml │ │ └── log4j.xml │ └── www │ └── com │ └── bpodgursky │ └── nlpviz │ └── www │ ├── css │ └── digraph.css │ ├── index.html │ └── resources │ ├── animated-overlay.gif │ ├── bootstrap.min.css │ ├── bootstrap.min.js │ ├── d3.v3.min.js │ ├── dagre-d3.js │ ├── docs.css │ ├── font-awesome.min.css │ ├── glyphicons-halflings-white.png │ ├── glyphicons-halflings.png │ ├── images │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ ├── ui-bg_flat_10_000000_40x100.png │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ ├── ui-bg_glass_65_ffffff_1x400.png │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ ├── ui-bg_highlight-soft_75_ffe45c_1x100.png │ ├── ui-icons_222222_256x240.png │ ├── ui-icons_228ef1_256x240.png │ ├── ui-icons_ef8c08_256x240.png │ ├── ui-icons_ffd27a_256x240.png │ └── ui-icons_ffffff_256x240.png │ ├── jquery-2.0.0.min.js │ ├── jquery-ui-1.9.2.custom.css │ ├── jquery-ui-1.9.2.custom.css~ │ ├── jquery-ui-1.9.2.custom.min.js │ ├── jquery.form.min.js │ ├── marked.js │ ├── purl.js │ └── uri.min.js └── target ├── classes └── com │ └── bpodgursky │ └── nlpviz │ ├── AbstractParser.class │ ├── EnglishParser.class │ ├── SpanishParser.class │ ├── api │ └── WebServer.class │ ├── jobjar.xml │ ├── log4j.xml │ ├── scripts │ └── StanfordCoreNLPTest.class │ ├── servlet │ ├── HomeServlet.class │ └── ParseServlet.class │ └── www │ ├── css │ └── digraph.css │ ├── index.html │ └── resources │ ├── animated-overlay.gif │ ├── bootstrap.min.css │ ├── bootstrap.min.js │ ├── d3.v3.min.js │ ├── dagre-d3.js │ ├── docs.css │ ├── font-awesome.min.css │ ├── glyphicons-halflings-white.png │ ├── glyphicons-halflings.png │ ├── images │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ ├── ui-bg_flat_10_000000_40x100.png │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ ├── ui-bg_glass_65_ffffff_1x400.png │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ ├── ui-bg_highlight-soft_75_ffe45c_1x100.png │ ├── ui-icons_222222_256x240.png │ ├── ui-icons_228ef1_256x240.png │ ├── ui-icons_ef8c08_256x240.png │ ├── ui-icons_ffd27a_256x240.png │ └── ui-icons_ffffff_256x240.png │ ├── jquery-2.0.0.min.js │ ├── jquery-ui-1.9.2.custom.css │ ├── jquery-ui-1.9.2.custom.min.js │ ├── jquery.form.min.js │ ├── marked.js │ ├── purl.js │ └── uri.min.js └── maven-status └── maven-compiler-plugin └── compile └── default-compile ├── createdFiles.lst └── inputFiles.lst /.gitignore: -------------------------------------------------------------------------------- 1 | # dependencies 2 | /node_modules 3 | #System Files 4 | .DS_Store 5 | hs_err_pid5384.log 6 | nlpviz.log 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Ben Podgursky 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nlpviz 2 | 3 | Nlpviz visualizes the structure of English sentences using Stanford CoreNLP and D3.js. 4 | 5 | ![image](screenshot/img.png)   6 | 7 | This repository is a fork of [bpodgursky/nlpviz](https://github.com/bpodgursky/nlpviz). 8 | 9 | I made this repository for my private use on my MacBook. 10 | 11 | ## Requirement 12 | 13 | [Java SE Development Kit 8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) 14 | 15 | [Apache Maven](https://maven.apache.org) 16 | 17 | [Node.js](https://nodejs.org/en/) (v8.9.3) 18 | 19 | ## Platform 20 | 21 | macOS High Sierra 10.13 22 | 23 | ## Installtion 24 | 25 | ```shell 26 | $ npm install 27 | ``` 28 | 29 | The first time you run this, it will take some time and network bandwidth to install the dependencies - the Stanford NLP core model jar is on its own over 200 MB. 30 | 31 | ## Starting the App 32 | 33 | ``` 34 | $ npm run start 35 | ``` 36 | 37 | At this point, you can open [http://localhost:43315](http://localhost:43315) in your browser. 38 | -------------------------------------------------------------------------------- /article.md: -------------------------------------------------------------------------------- 1 | # Using CoreNLP, d3.js, and dagre.js to visualize sentence parse trees 2 | 3 | Posted on August 19, 2013 4 | by bpodgursky 5 | 6 | I’ve always been casually interested in the field of Natural Langauge Processing (NLP), a field of computer science interested in extracting information from natural human language. I have no training or education whatsoever in the field so I’m not in a position to contribute much to the field, but I am definitely interested in seeing where the state of the art is, and in particular how powerful open-source NLP libraries have gotten (Google and Microsoft certainly have more powerful closed-source systems, but that doesn’t really help me.) 7 | 8 | A few years ago I started playing with Apache’s OpenNLP project. I’m a big fan of the Apache foundation and their libraries, but I found myself very frustrated by OpenNLP’s lack of documentation and the hacky-feeling interfaces the library exposed. However recently I took another look at the available NLP libraries and came across Stanford’s CoreNLP project. CoreNLP, as it turns out, is an awesome project, and it took almost zero effort to get their example demo working. 9 | 10 | As a total NLP beginnner, the sentence parsing functionality was the most immediately approachable example. Sentence parsing takes a natural-English sentence: 11 | 12 | “I am parsing an example sentence.” 13 | 14 | and breaks it down into component tokens and their relations: 15 | 16 | ` (ROOT1 (S (NP (PRP I)) (VP (VBP am) (VP (VBG parsing) (NP (DT an) (NN example) (NN sentence)))) (. .)))` 17 | 18 | where each token type corresponds to a particular word type–“NP” means “Noun Phrase”, VBG means “Verb, gerund or present participle”, and so forth (I’ve been referencing this as a complete token list.) 19 | 20 | I’ve also been looking into JavaScript graph visualization libraries recently (I’ve struggled to find a JS library remotely as powerful and pretty as graphviz), and wanted to test out the dagre library, which re-implements a simplified dot algorithm in javascipt and can render the results to d3 (the current coolest-kid-on-the-block JS graph library). So I put the two together and put together a simple visualization which uses dagre to show CoreNLP’s sentence parse tree. It’s pretty simple, but you can play with it here. 21 | -------------------------------------------------------------------------------- /batch/en/en001.txt: -------------------------------------------------------------------------------- 1 | HTML (Hypertext Markup Language) is not a programming language; it is a markup language used to tell your browser how to structure the web pages you visit. It can be as complicated or as simple as the web developer wishes it to be. HTML consists of a series of elements, which you use to enclose, wrap, or mark up different parts of the content to make it appear or act a certain way. The enclosing tags can make a bit of content into a hyperlink to link to another page on the web, italicize words, and so on. For example, take the following line of content. -------------------------------------------------------------------------------- /batch/en/en002.txt: -------------------------------------------------------------------------------- 1 | Block-level elements form a visible block on a page — they will appear on a new line from whatever content went before it, and any content that goes after it will also appear on a new line. Block-level elements tend to be structural elements on the page that represent, for example, paragraphs, lists, navigation menus, footers, etc. A block-level element wouldn't be nested inside an inline element, but it might be nested inside another block-level element. -------------------------------------------------------------------------------- /batch/en/en003.txt: -------------------------------------------------------------------------------- 1 | A very common task in HTML is structuring tabular data, and it has a number of elements and attributes for just this purpose. Coupled with a little CSS for styling, HTML makes it easy to display tables of information on the web such as your school lesson plan, the timetable at your local swimming pool, or statistics about your favorite dinosaurs or football team. This module takes you through all you need to know about structuring tabular data using HTML. -------------------------------------------------------------------------------- /batch/img-backup/en001/en001-0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en001/en001-0001.png -------------------------------------------------------------------------------- /batch/img-backup/en001/en001-0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en001/en001-0002.png -------------------------------------------------------------------------------- /batch/img-backup/en001/en001-0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en001/en001-0003.png -------------------------------------------------------------------------------- /batch/img-backup/en001/en001-0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en001/en001-0004.png -------------------------------------------------------------------------------- /batch/img-backup/en002/en02-001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en002/en02-001.png -------------------------------------------------------------------------------- /batch/img-backup/en002/en02-002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en002/en02-002.png -------------------------------------------------------------------------------- /batch/img-backup/en002/en02-003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en002/en02-003.png -------------------------------------------------------------------------------- /batch/img-backup/en002/en02-004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en002/en02-004.png -------------------------------------------------------------------------------- /batch/img-backup/en003/en03-001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en003/en03-001.png -------------------------------------------------------------------------------- /batch/img-backup/en003/en03-002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en003/en03-002.png -------------------------------------------------------------------------------- /batch/img-backup/en003/en03-003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en003/en03-003.png -------------------------------------------------------------------------------- /batch/img-backup/en003/en03-004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/batch/img-backup/en003/en03-004.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nlpviz", 3 | "version": "0.0.1", 4 | "description": "codeNLP with d3.js", 5 | "main": "index.js", 6 | "scripts": { 7 | "preinstall": "mvn compile", 8 | "start": "mvn exec:java", 9 | "test": "echo \"Error: no test specified\" && exit 1" 10 | }, 11 | "keywords": [ 12 | "coreNLP" 13 | ], 14 | "license": "Apache-2.0", 15 | "dependencies": { 16 | "nodemon": "^1.17.5" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.bpodgursky 6 | nlpviz 7 | 1.0-SNAPSHOT 8 | jar 9 | 10 | nlpviz 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | 20 | junit 21 | junit 22 | 4.4 23 | test 24 | 25 | 26 | 27 | org.json 28 | json 29 | 20090211 30 | 31 | 32 | 33 | org.slf4j 34 | slf4j-api 35 | 1.7.5 36 | 37 | 38 | 39 | org.slf4j 40 | slf4j-log4j12 41 | 1.7.5 42 | 43 | 44 | 45 | commons-lang 46 | commons-lang 47 | 2.6 48 | 49 | 50 | 51 | commons-codec 52 | commons-codec 53 | 1.8 54 | 55 | 56 | 57 | commons-io 58 | commons-io 59 | 2.4 60 | 61 | 62 | 63 | com.google.guava 64 | guava 65 | 14.0.1 66 | 67 | 68 | 69 | org.apache.httpcomponents 70 | httpclient 71 | 4.2.5 72 | 73 | 74 | 75 | org.eclipse.jetty.aggregate 76 | jetty-all-server 77 | 8.1.10.v20130312 78 | 79 | 80 | 81 | org.jsoup 82 | jsoup 83 | 1.7.2 84 | 85 | 86 | 87 | 88 | 89 | edu.stanford.nlp 90 | stanford-corenlp 91 | 3.5.2 92 | 93 | 94 | 95 | edu.stanford.nlp 96 | stanford-parser 97 | 3.5.2 98 | 99 | 100 | 101 | edu.stanford.nlp 102 | stanford-corenlp 103 | 3.5.2 104 | models 105 | 106 | 107 | 108 | edu.stanford.nlp 109 | stanford-corenlp 110 | 3.5.2 111 | models-spanish 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-compiler-plugin 123 | 3.1 124 | 125 | 1.8 126 | 1.8 127 | 128 | 129 | 130 | 131 | maven-assembly-plugin 132 | 133 | 134 | 135 | true 136 | 137 | 138 | ${build-commit} 139 | 140 | 141 | 142 | src/main/resources/com/bpodgursky/nlpviz/jobjar.xml 143 | 144 | false 145 | 146 | 147 | 148 | package 149 | 150 | single 151 | 152 | 153 | false 154 | 155 | 156 | 157 | 158 | 159 | 160 | org.codehaus.mojo 161 | exec-maven-plugin 162 | 1.4.0 163 | 164 | 165 | 166 | java 167 | 168 | 169 | 170 | 171 | com.bpodgursky.nlpviz.api.WebServer 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | src/main/resources/ 181 | 182 | **/*.xml 183 | **/*.bin 184 | 185 | 186 | 187 | src/main/www/ 188 | 189 | **/*.jsp 190 | **/*.css 191 | **/*.js 192 | **/*.html 193 | **/*.png 194 | **/*.gif 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /screenshot/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/screenshot/img.png -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/AbstractParser.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz; 2 | 3 | import java.util.Iterator; 4 | import java.util.List; 5 | import java.util.Properties; 6 | 7 | import com.google.common.collect.Lists; 8 | import edu.stanford.nlp.ling.CoreAnnotations; 9 | import edu.stanford.nlp.ling.CoreLabel; 10 | import edu.stanford.nlp.pipeline.Annotation; 11 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 12 | import edu.stanford.nlp.trees.Tree; 13 | import edu.stanford.nlp.trees.TreeCoreAnnotations; 14 | import edu.stanford.nlp.util.CoreMap; 15 | import org.json.JSONArray; 16 | import org.json.JSONException; 17 | import org.json.JSONObject; 18 | 19 | public abstract class AbstractParser { 20 | 21 | private final StanfordCoreNLP pipeline; 22 | 23 | public AbstractParser(Properties properties) { 24 | pipeline = new StanfordCoreNLP(properties); 25 | } 26 | 27 | public JSONArray parse(String text) throws JSONException { 28 | Annotation document = new Annotation(text); 29 | pipeline.annotate(document); 30 | 31 | JSONArray array = new JSONArray(); 32 | 33 | List sentences = document.get(CoreAnnotations.SentencesAnnotation.class); 34 | 35 | for (CoreMap sentence : sentences) { 36 | Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); 37 | List coreLabels = sentence.get(CoreAnnotations.TokensAnnotation.class); 38 | 39 | array.put(toJSON(tree, coreLabels.iterator())); 40 | } 41 | 42 | return array; 43 | } 44 | 45 | public static JSONObject toJSON(Tree tree, Iterator labels) throws JSONException { 46 | 47 | List children = Lists.newArrayList(); 48 | for (Tree child : tree.getChildrenAsList()) { 49 | children.add(toJSON(child, labels)); 50 | } 51 | 52 | JSONObject obj = new JSONObject(); 53 | 54 | if(tree.isLeaf()){ 55 | CoreLabel next = labels.next(); 56 | 57 | String word = next.get(CoreAnnotations.TextAnnotation.class); 58 | String pos = next.get(CoreAnnotations.PartOfSpeechAnnotation.class); 59 | String ne = next.get(CoreAnnotations.NamedEntityTagAnnotation.class); 60 | 61 | System.out.println(pos); 62 | 63 | obj.put("word", word); 64 | obj.put("pos", pos); 65 | obj.put("ne", ne); 66 | obj.put("type", "TK"); 67 | 68 | }else{ 69 | 70 | // System.out.println(tree.label()); 71 | 72 | obj.put("type", tree.label()); 73 | } 74 | 75 | return new JSONObject() 76 | .put("data", obj) 77 | .put("children", new JSONArray(children)); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/EnglishParser.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz; 2 | 3 | import java.util.Properties; 4 | 5 | public class EnglishParser extends AbstractParser { 6 | 7 | public EnglishParser(){ 8 | super(getProperties()); 9 | } 10 | 11 | private static Properties getProperties(){ 12 | Properties props = new Properties(); 13 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); 14 | return props; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/SpanishParser.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz; 2 | 3 | import java.util.Properties; 4 | 5 | public class SpanishParser extends AbstractParser { 6 | 7 | public SpanishParser() { 8 | super(getProperties()); 9 | } 10 | 11 | private static Properties getProperties() { 12 | Properties props = new Properties(); 13 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse"); 14 | props.put("tokenize.language", "es"); 15 | props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger"); 16 | props.put("ner.model", "edu/stanford/nlp/models/ner/spanish.ancora.distsim.s512.crf.ser.gz"); 17 | props.put("ner.applyNumericClassifiers", "false"); 18 | props.put("ner.applyNumericClassifiers", "false"); 19 | props.put("ner.useSUTime", "false"); 20 | props.put("parse.model", "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz"); 21 | return props; 22 | } 23 | 24 | } 25 | 26 | -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/api/WebServer.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz.api; 2 | 3 | import javax.servlet.DispatcherType; 4 | import java.net.URL; 5 | import java.util.EnumSet; 6 | import java.util.concurrent.Semaphore; 7 | 8 | import com.bpodgursky.nlpviz.servlet.ParseServlet; 9 | import org.apache.log4j.xml.DOMConfigurator; 10 | import org.eclipse.jetty.server.Server; 11 | import org.eclipse.jetty.servlet.ServletHolder; 12 | import org.eclipse.jetty.servlets.GzipFilter; 13 | import org.eclipse.jetty.webapp.WebAppContext; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | public class WebServer implements Runnable { 18 | public static final int DEFAULT_PORT = 43315; 19 | public static final String PARSER = "/parser"; 20 | public static final String HOME = "/home"; 21 | 22 | private final Semaphore shutdownLock = new Semaphore(0); 23 | private static final Logger LOG = LoggerFactory.getLogger(WebServer.class); 24 | 25 | public WebServer(){} 26 | 27 | public final void shutdown() { 28 | shutdownLock.release(); 29 | } 30 | 31 | public void run() { 32 | try { 33 | 34 | Server uiServer = new Server(DEFAULT_PORT); 35 | final URL warUrl = uiServer.getClass().getClassLoader().getResource("com/bpodgursky/nlpviz/www"); 36 | final String warUrlString = warUrl.toExternalForm(); 37 | 38 | WebAppContext context = new WebAppContext(warUrlString, "/"); 39 | context.addServlet(new ServletHolder(new ParseServlet()), PARSER); 40 | // context.addServlet(new ServletHolder(new HomeServlet()), HOME); 41 | context.addFilter(GzipFilter.class, "/*", EnumSet.of(DispatcherType.REQUEST)); 42 | 43 | uiServer.setHandler(context); 44 | 45 | LOG.info("Parse Server is listening on port: " + DEFAULT_PORT); 46 | 47 | uiServer.start(); 48 | 49 | shutdownLock.acquire(); 50 | 51 | } catch (Exception e) { 52 | throw new RuntimeException(e); 53 | } 54 | } 55 | 56 | public static void main(String[] args) throws InterruptedException { 57 | DOMConfigurator.configure(WebServer.class.getResource("/com/bpodgursky/nlpviz/log4j.xml")); 58 | 59 | WebServer server = new WebServer(); 60 | Thread thread1 = new Thread(server); 61 | 62 | thread1.start(); 63 | thread1.join(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/scripts/StanfordCoreNLPTest.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz.scripts; 2 | 3 | import com.google.common.collect.Lists; 4 | import edu.stanford.nlp.dcoref.CorefChain; 5 | import edu.stanford.nlp.dcoref.CorefCoreAnnotations; 6 | import edu.stanford.nlp.ling.CoreAnnotations; 7 | import edu.stanford.nlp.ling.CoreLabel; 8 | import edu.stanford.nlp.pipeline.Annotation; 9 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 10 | import edu.stanford.nlp.semgraph.SemanticGraph; 11 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; 12 | import edu.stanford.nlp.trees.Tree; 13 | import edu.stanford.nlp.trees.TreeCoreAnnotations; 14 | import edu.stanford.nlp.util.CoreMap; 15 | import org.json.JSONArray; 16 | import org.json.JSONException; 17 | import org.json.JSONObject; 18 | 19 | import java.io.IOException; 20 | import java.util.Iterator; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Properties; 24 | 25 | public class StanfordCoreNLPTest { 26 | 27 | 28 | public static void main(String[] args) throws IOException, ClassNotFoundException, JSONException { 29 | 30 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 31 | Properties props = new Properties(); 32 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); 33 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 34 | 35 | // read some text in the text variable 36 | String text = "Bob is a truck driver. He drives a lot every day.";// Add your text here! 37 | 38 | // create an empty Annotation just with the given text 39 | Annotation document = new Annotation(text); 40 | 41 | // run all Annotators on this text 42 | pipeline.annotate(document); 43 | 44 | // these are all the sentences in this document 45 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 46 | List sentences = document.get(CoreAnnotations.SentencesAnnotation.class); 47 | 48 | for (CoreMap sentence : sentences) { 49 | // traversing the words in the current sentence 50 | // a CoreLabel is a CoreMap with additional token-specific methods 51 | for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { 52 | // this is the text of the token 53 | String word = token.get(CoreAnnotations.TextAnnotation.class); 54 | // this is the POS tag of the token 55 | String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); 56 | // this is the NER label of the token 57 | String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); 58 | 59 | System.out.println(); 60 | System.out.println(token.docID()); 61 | System.out.println(word); 62 | System.out.println(pos); 63 | System.out.println(ne); 64 | } 65 | 66 | // this is the parse tree of the current sentence 67 | Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); 68 | List coreLabels = sentence.get(CoreAnnotations.TokensAnnotation.class); 69 | 70 | tree.label(); 71 | 72 | System.out.println(tree); 73 | 74 | 75 | JSONObject json = toJSON(tree, coreLabels.iterator()); 76 | System.out.println(json); 77 | 78 | // this is the Stanford dependency graph of the current sentence 79 | SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); 80 | 81 | System.out.println(dependencies); 82 | 83 | } 84 | 85 | // This is the coreference link graph 86 | // Each chain stores a set of mentions that link to each other, 87 | // along with a method for getting the most representative mention 88 | // Both sentence and token offsets start at 1! 89 | Map graph = 90 | document.get(CorefCoreAnnotations.CorefChainAnnotation.class); 91 | 92 | System.out.println(graph); 93 | 94 | } 95 | 96 | public static JSONObject toJSON(Tree tree, Iterator labels) throws JSONException { 97 | 98 | List children = Lists.newArrayList(); 99 | for (Tree child : tree.getChildrenAsList()) { 100 | children.add(toJSON(child, labels)); 101 | } 102 | 103 | JSONObject obj = new JSONObject(); 104 | 105 | if(tree.isLeaf()){ 106 | CoreLabel next = labels.next(); 107 | 108 | String word = next.get(CoreAnnotations.TextAnnotation.class); 109 | String pos = next.get(CoreAnnotations.PartOfSpeechAnnotation.class); 110 | String ne = next.get(CoreAnnotations.NamedEntityTagAnnotation.class); 111 | 112 | obj.put("word", word); 113 | obj.put("pos", pos); 114 | obj.put("ne", ne); 115 | 116 | }else{ 117 | obj.put("type", tree.label()); 118 | } 119 | 120 | return new JSONObject() 121 | .put("data", obj) 122 | .put("children", new JSONArray(children)); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/servlet/HomeServlet.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz.servlet; 2 | 3 | import javax.servlet.ServletException; 4 | import javax.servlet.http.HttpServlet; 5 | import javax.servlet.http.HttpServletRequest; 6 | import javax.servlet.http.HttpServletResponse; 7 | import java.io.IOException; 8 | 9 | // not really necessary but here to make old links work 10 | public class HomeServlet extends HttpServlet { 11 | 12 | @Override 13 | protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { 14 | req.getRequestDispatcher("index.html").forward(req, resp); 15 | } 16 | } -------------------------------------------------------------------------------- /src/main/java/com/bpodgursky/nlpviz/servlet/ParseServlet.java: -------------------------------------------------------------------------------- 1 | package com.bpodgursky.nlpviz.servlet; 2 | 3 | import javax.servlet.ServletException; 4 | import javax.servlet.http.HttpServlet; 5 | import javax.servlet.http.HttpServletRequest; 6 | import javax.servlet.http.HttpServletResponse; 7 | import java.io.IOException; 8 | 9 | import com.bpodgursky.nlpviz.AbstractParser; 10 | import com.bpodgursky.nlpviz.EnglishParser; 11 | import com.bpodgursky.nlpviz.SpanishParser; 12 | import org.json.JSONException; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | public class ParseServlet extends HttpServlet { 17 | private static final Logger LOG = LoggerFactory.getLogger(ParseServlet.class); 18 | 19 | private final AbstractParser englishParser; 20 | private final AbstractParser spanishParser; 21 | 22 | public ParseServlet() throws IOException { 23 | englishParser = new EnglishParser(); 24 | spanishParser = new SpanishParser(); 25 | } 26 | 27 | @Override 28 | protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { 29 | 30 | try { 31 | String sentence = req.getParameter("text"); 32 | LOG.info("Processing request: "+sentence); 33 | 34 | String lang = req.getParameter("lang"); 35 | 36 | if(lang == null || lang.equals("en")) { 37 | resp.getWriter().append(englishParser.parse(sentence).toString()); 38 | }else{ 39 | resp.getWriter().append(spanishParser.parse(sentence).toString()); 40 | } 41 | 42 | } catch (JSONException e) { 43 | throw new RuntimeException(e); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/resources/com/bpodgursky/nlpviz/jobjar.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | jobjar 6 | 7 | jar 8 | 9 | false 10 | 11 | 12 | / 13 | true 14 | true 15 | runtime 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/main/resources/com/bpodgursky/nlpviz/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/main/www/com/bpodgursky/nlpviz/www/css/digraph.css: -------------------------------------------------------------------------------- 1 | 2 | text { 3 | font-weight: 300; 4 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 5 | font-size: 14px; 6 | } 7 | 8 | rect { 9 | fill: #fff; 10 | } 11 | 12 | .node > rect { 13 | stroke-width: 1px; 14 | stroke: #333; 15 | fill: none; 16 | opacity: 0.5; 17 | transform: rotate(-90deg); 18 | } 19 | 20 | .edge rect { 21 | fill: #fff 22 | } 23 | 24 | .edgePath path { 25 | stroke: #333; 26 | stroke-width: 1.5px; 27 | } 28 | 29 | .ne-O > rect { 30 | fill: #00ffd0; 31 | } 32 | 33 | .ne-PERSON > rect { 34 | fill: #b997ff; 35 | } 36 | 37 | .ne-DATE > rect { 38 | fill: #ffae6a; 39 | } 40 | 41 | .ne-ORGANIZATION > rect { 42 | fill: #96c2ff; 43 | } 44 | 45 | .ne-LOCATION > rect { 46 | fill: #7e7e7e; 47 | } 48 | 49 | .ne-ORDINAL > rect { 50 | fill: #92ff7d; 51 | } 52 | 53 | .ne-NUMBER > rect { 54 | fill: #fdb9ff; 55 | } 56 | 57 | html, body { 58 | margin: 0; 59 | padding: 0; 60 | overflow: hidden 61 | } 62 | 63 | .main-svg { 64 | position: absolute; 65 | top: 50; 66 | left: 0; 67 | height: 1000px; 68 | width: 100%; 69 | border: 1px solid #999; 70 | transform: rotate(-90deg); 71 | } 72 | 73 | .legend-svg { 74 | position: fixed; 75 | height: 60px; 76 | width: 600px; 77 | } 78 | 79 | textarea { 80 | border: 1px solid #999999; 81 | width: 100%; 82 | margin: 5px 0; 83 | padding: 3px; 84 | } 85 | 86 | .legend { 87 | stroke-width: 3px; 88 | stroke: #333; 89 | fill: none; 90 | opacity: 0.5; 91 | } 92 | -------------------------------------------------------------------------------- /src/main/www/com/bpodgursky/nlpviz/www/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/src/main/www/com/bpodgursky/nlpviz/www/index.html -------------------------------------------------------------------------------- /src/main/www/com/bpodgursky/nlpviz/www/resources/animated-overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsguru-git/NLP-Parse-Visualization/01ef26fefa1ee1a7744eb65aef0e3e9863012e15/src/main/www/com/bpodgursky/nlpviz/www/resources/animated-overlay.gif -------------------------------------------------------------------------------- /src/main/www/com/bpodgursky/nlpviz/www/resources/bootstrap.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Bootstrap.js by @fat & @mdo 3 | * plugins: bootstrap-transition.js, bootstrap-modal.js, bootstrap-dropdown.js, bootstrap-scrollspy.js, bootstrap-tab.js, bootstrap-tooltip.js, bootstrap-popover.js, bootstrap-affix.js, bootstrap-alert.js, bootstrap-button.js, bootstrap-collapse.js, bootstrap-carousel.js, bootstrap-typeahead.js 4 | * Copyright 2012 Twitter, Inc. 5 | * http://www.apache.org/licenses/LICENSE-2.0.txt 6 | */ 7 | !function(a){a(function(){a.support.transition=function(){var a=function(){var a=document.createElement("bootstrap"),b={WebkitTransition:"webkitTransitionEnd",MozTransition:"transitionend",OTransition:"oTransitionEnd otransitionend",transition:"transitionend"},c;for(c in b)if(a.style[c]!==undefined)return b[c]}();return a&&{end:a}}()})}(window.jQuery),!function(a){var b=function(b,c){this.options=c,this.$element=a(b).delegate('[data-dismiss="modal"]',"click.dismiss.modal",a.proxy(this.hide,this)),this.options.remote&&this.$element.find(".modal-body").load(this.options.remote)};b.prototype={constructor:b,toggle:function(){return this[this.isShown?"hide":"show"]()},show:function(){var b=this,c=a.Event("show");this.$element.trigger(c);if(this.isShown||c.isDefaultPrevented())return;this.isShown=!0,this.escape(),this.backdrop(function(){var c=a.support.transition&&b.$element.hasClass("fade");b.$element.parent().length||b.$element.appendTo(document.body),b.$element.show(),c&&b.$element[0].offsetWidth,b.$element.addClass("in").attr("aria-hidden",!1),b.enforceFocus(),c?b.$element.one(a.support.transition.end,function(){b.$element.focus().trigger("shown")}):b.$element.focus().trigger("shown")})},hide:function(b){b&&b.preventDefault();var c=this;b=a.Event("hide"),this.$element.trigger(b);if(!this.isShown||b.isDefaultPrevented())return;this.isShown=!1,this.escape(),a(document).off("focusin.modal"),this.$element.removeClass("in").attr("aria-hidden",!0),a.support.transition&&this.$element.hasClass("fade")?this.hideWithTransition():this.hideModal()},enforceFocus:function(){var b=this;a(document).on("focusin.modal",function(a){b.$element[0]!==a.target&&!b.$element.has(a.target).length&&b.$element.focus()})},escape:function(){var a=this;this.isShown&&this.options.keyboard?this.$element.on("keyup.dismiss.modal",function(b){b.which==27&&a.hide()}):this.isShown||this.$element.off("keyup.dismiss.modal")},hideWithTransition:function(){var b=this,c=setTimeout(function(){b.$element.off(a.support.transition.end),b.hideModal()},500);this.$element.one(a.support.transition.end,function(){clearTimeout(c),b.hideModal()})},hideModal:function(){var a=this;this.$element.hide(),this.backdrop(function(){a.removeBackdrop(),a.$element.trigger("hidden")})},removeBackdrop:function(){this.$backdrop&&this.$backdrop.remove(),this.$backdrop=null},backdrop:function(b){var c=this,d=this.$element.hasClass("fade")?"fade":"";if(this.isShown&&this.options.backdrop){var e=a.support.transition&&d;this.$backdrop=a('