├── .classpath
├── .gitignore
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
├── org.eclipse.jdt.core.prefs
└── org.eclipse.m2e.core.prefs
├── README.md
├── pom.xml
└── src
├── main
└── java
│ └── drew
│ └── corenlp
│ ├── SimpleExample.java
│ └── TruecaseExample.java
└── test
└── resources
└── sample-content.txt
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .idea
3 | *.iml
4 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | corenlp-example
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding//src/test/resources=UTF-8
6 | encoding/=UTF-8
7 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
3 | org.eclipse.jdt.core.compiler.compliance=1.6
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.6
6 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | corenlp-examples
2 | ================
3 |
4 | Stanford Core NLP API usage examples
5 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 | 4.0.0
6 | drew
7 | corenlp-example
8 | 1
9 | Simple Stanford CoreNLP/Parser examples
10 | Examples of Stanford Core NLP and Stanford Parser API usage
11 |
12 |
13 | UTF-8
14 |
15 |
16 |
17 |
18 |
19 | org.apache.maven.plugins
20 | maven-compiler-plugin
21 |
22 | 1.6
23 | 1.6
24 |
25 | 3.0
26 |
27 |
28 |
29 |
30 |
31 | edu.stanford.nlp
32 | stanford-corenlp
33 | 4.4.0
34 |
35 |
36 | edu.stanford.nlp
37 | stanford-corenlp
38 | 4.4.0
39 | models
40 |
41 |
42 | edu.stanford.nlp
43 | stanford-parser
44 | 3.9.2
45 |
46 |
47 | com.google.guava
48 | guava
49 | 31.0.1-jre
50 |
51 |
52 | org.slf4j
53 | slf4j-simple
54 | 1.7.12
55 | runtime
56 | true
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/src/main/java/drew/corenlp/SimpleExample.java:
--------------------------------------------------------------------------------
1 | package drew.corenlp;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.nio.charset.Charset;
6 | import java.util.List;
7 | import java.util.Map;
8 | import java.util.Properties;
9 |
10 | import com.google.common.io.Files;
11 |
12 | import edu.stanford.nlp.dcoref.CorefChain;
13 | import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation;
14 | import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
15 | import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
16 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
17 | import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
18 | import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
19 | import edu.stanford.nlp.ling.CoreLabel;
20 | import edu.stanford.nlp.pipeline.Annotation;
21 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
22 | import edu.stanford.nlp.semgraph.SemanticGraph;
23 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
24 | import edu.stanford.nlp.trees.Tree;
25 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation;
26 | import edu.stanford.nlp.util.CoreMap;
27 |
28 | /** A simple corenlp example ripped directly from the Stanford CoreNLP website using text from wikinews. */
29 | public class SimpleExample {
30 |
31 | public static void main(String[] args) throws IOException {
32 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
33 | Properties props = new Properties();
34 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
35 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
36 |
37 | // read some text from the file..
38 | File inputFile = new File("src/test/resources/sample-content.txt");
39 | String text = Files.asCharSource(inputFile, Charset.forName("UTF-8")).read();
40 |
41 | // create an empty Annotation just with the given text
42 | Annotation document = new Annotation(text);
43 |
44 | // run all Annotators on this text
45 | pipeline.annotate(document);
46 |
47 | // these are all the sentences in this document
48 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
49 | List sentences = document.get(SentencesAnnotation.class);
50 |
51 | for(CoreMap sentence: sentences) {
52 | // traversing the words in the current sentence
53 | // a CoreLabel is a CoreMap with additional token-specific methods
54 | for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
55 | // this is the text of the token
56 | String word = token.get(TextAnnotation.class);
57 | // this is the POS tag of the token
58 | String pos = token.get(PartOfSpeechAnnotation.class);
59 | // this is the NER label of the token
60 | String ne = token.get(NamedEntityTagAnnotation.class);
61 |
62 | System.out.println("word: " + word + " pos: " + pos + " ne:" + ne);
63 | }
64 |
65 | // this is the parse tree of the current sentence
66 | Tree tree = sentence.get(TreeAnnotation.class);
67 | System.out.println("parse tree:\n" + tree);
68 |
69 | // this is the Stanford dependency graph of the current sentence
70 | SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
71 | System.out.println("dependency graph:\n" + dependencies);
72 | }
73 |
74 | // This is the coreference link graph
75 | // Each chain stores a set of mentions that link to each other,
76 | // along with a method for getting the most representative mention
77 | // Both sentence and token offsets start at 1!
78 | Map graph =
79 | document.get(CorefChainAnnotation.class);
80 |
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/drew/corenlp/TruecaseExample.java:
--------------------------------------------------------------------------------
1 | package drew.corenlp;
2 |
3 | import com.google.common.io.Files;
4 | import edu.stanford.nlp.dcoref.CorefChain;
5 | import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation;
6 | import edu.stanford.nlp.ling.CoreAnnotations.*;
7 | import edu.stanford.nlp.ling.CoreLabel;
8 | import edu.stanford.nlp.pipeline.Annotation;
9 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
10 | import edu.stanford.nlp.semgraph.SemanticGraph;
11 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
12 | import edu.stanford.nlp.trees.Tree;
13 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation;
14 | import edu.stanford.nlp.util.CoreMap;
15 |
16 | import java.io.File;
17 | import java.io.IOException;
18 | import java.nio.charset.Charset;
19 | import java.util.ArrayList;
20 | import java.util.List;
21 | import java.util.Map;
22 | import java.util.Properties;
23 |
24 | /** A simple corenlp example ripped directly from the Stanford CoreNLP website using text from wikinews. */
25 | public class TruecaseExample {
26 |
27 | public static void main(String[] args) throws IOException {
28 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
29 | Properties props = new Properties();
30 | props.put("annotators", "tokenize, ssplit, pos, lemma, truecase");
31 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
32 |
33 | // read some text from the file..
34 | File inputFile = new File("src/test/resources/sample-content.txt");
35 | String input = Files.toString(inputFile, Charset.forName("UTF-8"));
36 | String lcInput = input.toLowerCase(); // downcase everything.
37 |
38 | // create an empty Annotation with just the downcased text.
39 | Annotation document = new Annotation(lcInput);
40 |
41 | // run all Annotators on this text
42 | pipeline.annotate(document);
43 |
44 | // these are all the sentences in this document
45 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
46 | List sentences = document.get(SentencesAnnotation.class);
47 |
48 | // capture the true cased tokens for evaluation.
49 | List tcTokens = new ArrayList();
50 |
51 | System.out.println("------ begin truecase output -----");
52 | for (CoreMap sentence : sentences) {
53 | // traversing the words in the current sentence
54 | // a CoreLabel is a CoreMap with additional token-specific methods
55 | for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
56 | // this is the text of the token
57 | String text = token.get(TextAnnotation.class);
58 | String trueCase = token.get(TrueCaseAnnotation.class);
59 | String trueCaseText = token.get(TrueCaseTextAnnotation.class);
60 | System.out.printf("input:%s state:%s output:%s\n", text, trueCase, trueCaseText);
61 | tcTokens.add(trueCaseText);
62 | }
63 | }
64 | System.out.println("------ end truecase otuput -----");
65 |
66 |
67 | // create an empty Annotation with just the standard text.
68 | document = new Annotation(input);
69 |
70 | // run all Annotators on this text
71 | pipeline.annotate(document);
72 | sentences = document.get(SentencesAnnotation.class);
73 |
74 | // capture the standard tokens for evaluation - note this assumes that
75 | // the pipeline won't generate additional tokens for the same input.
76 | List stdTokens = new ArrayList();
77 |
78 | for (CoreMap sentence : sentences) {
79 | // traversing the words in the current sentence
80 | // a CoreLabel is a CoreMap with additional token-specific methods
81 | for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
82 | // this is the text of the token
83 | String word = token.get(TextAnnotation.class);
84 | stdTokens.add(word);
85 | }
86 | }
87 |
88 | // compare the output of the tc and the original to see how well we've done
89 | int match = 0;
90 | int sz = tcTokens.size();
91 |
92 | System.out.println("------ begin evaluation output -----");
93 |
94 | for (int i=0; i < sz; i++) {
95 | String tcToken = tcTokens.get(i);
96 | String stdToken = stdTokens.get(i);
97 | if (tcToken.equals(stdToken)) {
98 | match++;
99 | }
100 | else {
101 | System.out.printf("Truecase mismatch: input:'%s' output:'%s' @ %d\n", stdToken, tcToken, i);
102 | }
103 | }
104 |
105 | float errorRate = ((float) sz - match) / sz;
106 | System.out.println("Error Rate: " + errorRate);
107 |
108 | System.out.println("------ end evaluation output -----");
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/test/resources/sample-content.txt:
--------------------------------------------------------------------------------
1 | Last night, Adly Mansour, the interim leader of Egypt, announced plans to
2 | reform Egypt's constitution and hold a new round of parliamentary and
3 | presidential elections. The interim president also announced a judicial
4 | investigation into yesterday's shooting of at least 51 supporters of deposed
5 | president Mohamed Morsi.
6 |
7 | Mansour plans to form a panel within fifteen days to review and suggest changes
8 | to the now-suspended constitution. Those amendments would be voted on in a
9 | referendum within four months. Parliamentary elections would then be held,
10 | perhaps in early 2014, followed by presidential elections upon the forming
11 | of a new parliament.""
12 |
--------------------------------------------------------------------------------