├── .classpath ├── .gitignore ├── .project ├── .settings ├── org.eclipse.core.resources.prefs ├── org.eclipse.jdt.core.prefs └── org.eclipse.m2e.core.prefs ├── README.md ├── pom.xml └── src ├── main └── java │ └── drew │ └── corenlp │ ├── SimpleExample.java │ └── TruecaseExample.java └── test └── resources └── sample-content.txt /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea 3 | *.iml 4 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | corenlp-example 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding//src/test/resources=UTF-8 6 | encoding/=UTF-8 7 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 3 | org.eclipse.jdt.core.compiler.compliance=1.6 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.6 6 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | corenlp-examples 2 | ================ 3 | 4 | Stanford Core NLP API usage examples 5 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 4.0.0 6 | drew 7 | corenlp-example 8 | 1 9 | Simple Stanford CoreNLP/Parser examples 10 | Examples of Stanford Core NLP and Stanford Parser API usage 11 | 12 | 13 | UTF-8 14 | 15 | 16 | 17 | 18 | 19 | org.apache.maven.plugins 20 | maven-compiler-plugin 21 | 22 | 1.6 23 | 1.6 24 | 25 | 3.0 26 | 27 | 28 | 29 | 30 | 31 | edu.stanford.nlp 32 | stanford-corenlp 33 | 4.4.0 34 | 35 | 36 | edu.stanford.nlp 37 | stanford-corenlp 38 | 4.4.0 39 | models 40 | 41 | 42 | edu.stanford.nlp 43 | stanford-parser 44 | 3.9.2 45 | 46 | 47 | com.google.guava 48 | guava 49 | 31.0.1-jre 50 | 51 | 52 | org.slf4j 53 | slf4j-simple 54 | 1.7.12 55 | runtime 56 | true 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /src/main/java/drew/corenlp/SimpleExample.java: -------------------------------------------------------------------------------- 1 | package drew.corenlp; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.nio.charset.Charset; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Properties; 9 | 10 | import com.google.common.io.Files; 11 | 12 | import edu.stanford.nlp.dcoref.CorefChain; 13 | import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; 14 | import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; 15 | import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; 16 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; 17 | import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; 18 | import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; 19 | import edu.stanford.nlp.ling.CoreLabel; 20 | import edu.stanford.nlp.pipeline.Annotation; 21 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 22 | import edu.stanford.nlp.semgraph.SemanticGraph; 23 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation; 24 | import edu.stanford.nlp.trees.Tree; 25 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation; 26 | import edu.stanford.nlp.util.CoreMap; 27 | 28 | /** A simple corenlp example ripped directly from the Stanford CoreNLP website using text from wikinews. */ 29 | public class SimpleExample { 30 | 31 | public static void main(String[] args) throws IOException { 32 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 33 | Properties props = new Properties(); 34 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); 35 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 36 | 37 | // read some text from the file.. 38 | File inputFile = new File("src/test/resources/sample-content.txt"); 39 | String text = Files.asCharSource(inputFile, Charset.forName("UTF-8")).read(); 40 | 41 | // create an empty Annotation just with the given text 42 | Annotation document = new Annotation(text); 43 | 44 | // run all Annotators on this text 45 | pipeline.annotate(document); 46 | 47 | // these are all the sentences in this document 48 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 49 | List sentences = document.get(SentencesAnnotation.class); 50 | 51 | for(CoreMap sentence: sentences) { 52 | // traversing the words in the current sentence 53 | // a CoreLabel is a CoreMap with additional token-specific methods 54 | for (CoreLabel token: sentence.get(TokensAnnotation.class)) { 55 | // this is the text of the token 56 | String word = token.get(TextAnnotation.class); 57 | // this is the POS tag of the token 58 | String pos = token.get(PartOfSpeechAnnotation.class); 59 | // this is the NER label of the token 60 | String ne = token.get(NamedEntityTagAnnotation.class); 61 | 62 | System.out.println("word: " + word + " pos: " + pos + " ne:" + ne); 63 | } 64 | 65 | // this is the parse tree of the current sentence 66 | Tree tree = sentence.get(TreeAnnotation.class); 67 | System.out.println("parse tree:\n" + tree); 68 | 69 | // this is the Stanford dependency graph of the current sentence 70 | SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); 71 | System.out.println("dependency graph:\n" + dependencies); 72 | } 73 | 74 | // This is the coreference link graph 75 | // Each chain stores a set of mentions that link to each other, 76 | // along with a method for getting the most representative mention 77 | // Both sentence and token offsets start at 1! 78 | Map graph = 79 | document.get(CorefChainAnnotation.class); 80 | 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/drew/corenlp/TruecaseExample.java: -------------------------------------------------------------------------------- 1 | package drew.corenlp; 2 | 3 | import com.google.common.io.Files; 4 | import edu.stanford.nlp.dcoref.CorefChain; 5 | import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; 6 | import edu.stanford.nlp.ling.CoreAnnotations.*; 7 | import edu.stanford.nlp.ling.CoreLabel; 8 | import edu.stanford.nlp.pipeline.Annotation; 9 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 10 | import edu.stanford.nlp.semgraph.SemanticGraph; 11 | import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation; 12 | import edu.stanford.nlp.trees.Tree; 13 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation; 14 | import edu.stanford.nlp.util.CoreMap; 15 | 16 | import java.io.File; 17 | import java.io.IOException; 18 | import java.nio.charset.Charset; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.Properties; 23 | 24 | /** A simple corenlp example ripped directly from the Stanford CoreNLP website using text from wikinews. */ 25 | public class TruecaseExample { 26 | 27 | public static void main(String[] args) throws IOException { 28 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 29 | Properties props = new Properties(); 30 | props.put("annotators", "tokenize, ssplit, pos, lemma, truecase"); 31 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 32 | 33 | // read some text from the file.. 34 | File inputFile = new File("src/test/resources/sample-content.txt"); 35 | String input = Files.toString(inputFile, Charset.forName("UTF-8")); 36 | String lcInput = input.toLowerCase(); // downcase everything. 37 | 38 | // create an empty Annotation with just the downcased text. 39 | Annotation document = new Annotation(lcInput); 40 | 41 | // run all Annotators on this text 42 | pipeline.annotate(document); 43 | 44 | // these are all the sentences in this document 45 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 46 | List sentences = document.get(SentencesAnnotation.class); 47 | 48 | // capture the true cased tokens for evaluation. 49 | List tcTokens = new ArrayList(); 50 | 51 | System.out.println("------ begin truecase output -----"); 52 | for (CoreMap sentence : sentences) { 53 | // traversing the words in the current sentence 54 | // a CoreLabel is a CoreMap with additional token-specific methods 55 | for (CoreLabel token : sentence.get(TokensAnnotation.class)) { 56 | // this is the text of the token 57 | String text = token.get(TextAnnotation.class); 58 | String trueCase = token.get(TrueCaseAnnotation.class); 59 | String trueCaseText = token.get(TrueCaseTextAnnotation.class); 60 | System.out.printf("input:%s state:%s output:%s\n", text, trueCase, trueCaseText); 61 | tcTokens.add(trueCaseText); 62 | } 63 | } 64 | System.out.println("------ end truecase otuput -----"); 65 | 66 | 67 | // create an empty Annotation with just the standard text. 68 | document = new Annotation(input); 69 | 70 | // run all Annotators on this text 71 | pipeline.annotate(document); 72 | sentences = document.get(SentencesAnnotation.class); 73 | 74 | // capture the standard tokens for evaluation - note this assumes that 75 | // the pipeline won't generate additional tokens for the same input. 76 | List stdTokens = new ArrayList(); 77 | 78 | for (CoreMap sentence : sentences) { 79 | // traversing the words in the current sentence 80 | // a CoreLabel is a CoreMap with additional token-specific methods 81 | for (CoreLabel token : sentence.get(TokensAnnotation.class)) { 82 | // this is the text of the token 83 | String word = token.get(TextAnnotation.class); 84 | stdTokens.add(word); 85 | } 86 | } 87 | 88 | // compare the output of the tc and the original to see how well we've done 89 | int match = 0; 90 | int sz = tcTokens.size(); 91 | 92 | System.out.println("------ begin evaluation output -----"); 93 | 94 | for (int i=0; i < sz; i++) { 95 | String tcToken = tcTokens.get(i); 96 | String stdToken = stdTokens.get(i); 97 | if (tcToken.equals(stdToken)) { 98 | match++; 99 | } 100 | else { 101 | System.out.printf("Truecase mismatch: input:'%s' output:'%s' @ %d\n", stdToken, tcToken, i); 102 | } 103 | } 104 | 105 | float errorRate = ((float) sz - match) / sz; 106 | System.out.println("Error Rate: " + errorRate); 107 | 108 | System.out.println("------ end evaluation output -----"); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/test/resources/sample-content.txt: -------------------------------------------------------------------------------- 1 | Last night, Adly Mansour, the interim leader of Egypt, announced plans to 2 | reform Egypt's constitution and hold a new round of parliamentary and 3 | presidential elections. The interim president also announced a judicial 4 | investigation into yesterday's shooting of at least 51 supporters of deposed 5 | president Mohamed Morsi. 6 | 7 | Mansour plans to form a panel within fifteen days to review and suggest changes 8 | to the now-suspended constitution. Those amendments would be voted on in a 9 | referendum within four months. Parliamentary elections would then be held, 10 | perhaps in early 2014, followed by presidential elections upon the forming 11 | of a new parliament."" 12 | --------------------------------------------------------------------------------