embeddings) {
37 | this.embeddings = embeddings;
38 | }
39 |
40 | public void updateCost(double replace, double insert, double delete) {
41 | weights.put("replace", replace);
42 | weights.put("insert", insert);
43 | weights.put("delete", delete);
44 | }
45 |
46 | public double matchPaths(String path1, String path2) {
47 | MatcherPath matcherPath1 = new MatcherPath(path1);
48 | MatcherPath matcherPath2 = new MatcherPath(path2);
49 |
50 | return matchPaths(matcherPath1, matcherPath2);
51 | }
52 |
53 | public double matchPaths(MatcherPath matcherPath1, MatcherPath matcherPath2) {
54 | int len1 = matcherPath1.nodes.size();
55 | int len2 = matcherPath2.nodes.size();
56 | if (len1 == 1 && len2 == 1) {
57 | return matcherPath1.nodes.get(0).label.equals(matcherPath2.nodes.get(0).label)
58 | && matcherPath1.arg1Type.equals(matcherPath2.arg1Type)
59 | && matcherPath1.arg2Type.equals(matcherPath2.arg2Type) ?
60 | 0 : 1;
61 | }
62 |
63 | double[][] dp = new double[len1 + 1][len2 + 1];
64 |
65 | for (int i = 0; i <= len1; i++) {
66 | dp[i][0] = i;
67 | }
68 |
69 | for (int j = 0; j <= len2; j++) {
70 | dp[0][j] = j;
71 | }
72 |
73 | //iterate though, and check last char
74 | for (int i = 0; i < len1; i++) {
75 | MatcherNode c1 = matcherPath1.nodes.get(i);
76 | for (int j = 0; j < len2; j++) {
77 | MatcherNode c2 = matcherPath2.nodes.get(j);
78 |
79 | //if last two chars equal
80 | if (c1.equals(c2)) {
81 | //update dp value for +1 length
82 | dp[i + 1][j + 1] = dp[i][j];
83 | } else {
84 | double labelWeight = labelWeights.containsKey(c2.label) ?
85 | labelWeights.get(c2.label) : 1;
86 | double insertLabelWeight = labelWeights.containsKey(c1.label) ?
87 | labelWeights.get(c1.label) : 1;
88 | double replacePenalty = c1.label.equals(c2.label) ?
89 | 1 : LABEL_MISMATCH_PENALTY;
90 | double replaceCost = 1 - WordEmbedding.similarity(c1.token, c2.token);
91 | // if (c1.token.equals("distribute") || c2.token.equals("distribute")) {
92 | // System.err.println("[LOG] " + c1.token + " " + c2.token + " " + replaceCost);
93 | // }
94 | double replace = dp[i][j] + weights.get("replace") * replacePenalty
95 | * replaceCost
96 | * labelWeight;
97 | double insert = dp[i][j + 1] + weights.get("insert") * insertLabelWeight;
98 | double delete = dp[i + 1][j] + weights.get("delete") * labelWeight;
99 |
100 | double min = replace > insert ? insert : replace;
101 | min = delete > min ? min : delete;
102 | dp[i + 1][j + 1] = min;
103 | }
104 | }
105 | }
106 |
107 | return matcherPath1.arg1Type.equals(matcherPath2.arg1Type) &&
108 | matcherPath1.arg2Type.equals(matcherPath2.arg2Type) ?
109 | dp[len1][len2] : Math.max(matcherPath1.length(), matcherPath2.length());
110 | }
111 |
112 | }
113 |
114 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/models/RelationFinder.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.models;
2 |
3 | import edu.nyu.jet.ice.uicomps.Ice;
4 | import edu.nyu.jet.ice.utils.ProgressMonitorI;
5 | import edu.nyu.jet.ice.utils.SwingProgressMonitor;
6 |
7 | import javax.swing.*;
8 | import java.io.IOException;
9 |
10 | /**
11 | * A Thread for running DepPaths: counts all dependency paths in corpus.
12 | */
13 | public class RelationFinder extends Thread {
14 |
15 | String[] args;
16 | String types;
17 | JTextArea area;
18 | int numberOfDocs;
19 | ProgressMonitorI relationProgressMonitor = null;
20 |
21 | public RelationFinder(String docListFileName, String directory, String filter,
22 | String instances, String types, JTextArea area, int numberOfDocs,
23 | ProgressMonitorI relationProgressMonitor) {
24 | args = new String[4];
25 | args[0] = "parseprops";
26 | args[1] = docListFileName;
27 | args[2] = directory;
28 | args[3] = filter;
29 | this.types = types;
30 | this.area = area;
31 | this.numberOfDocs = numberOfDocs;
32 | this.relationProgressMonitor = relationProgressMonitor;
33 | }
34 |
35 | public void run() {
36 | try {
37 | // force monitor to display during long initialization
38 | try {
39 | Thread.sleep(1000);
40 | } catch (InterruptedException ignore) {
41 | }
42 | if (null != relationProgressMonitor) {
43 | relationProgressMonitor.setProgress(2);
44 | }
45 | DepPathMap depPathMap = DepPathMap.getInstance();
46 | depPathMap.unpersist();
47 | DepPaths.progressMonitor = relationProgressMonitor;
48 | System.out.println("$$$ types = " + types);
49 | DepPaths.main(args);
50 | String sortedTypes = types + ".sorted";
51 | Corpus.sort(types, sortedTypes);
52 | System.out.println("$$$ types = " + types);
53 | depPathMap.loadPaths(true);
54 | if(area != null) {
55 | Corpus.displayTerms(types, 40, area, Corpus.relationFilter);
56 | }
57 | } catch (IOException e) {
58 | System.out.println("IOException in DepPaths " + e);
59 | e.printStackTrace(System.err);
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | Principal Classes of ICE
3 |
4 | Corpus analysis
5 |
6 | The corpus analysis performed by ICE can be divided into two types:
7 | analysis which is domain-independent and need only be done once,
8 | and analysis which is domain-dependent and may be done repeatedly
9 | as part of a bootstrapping process.
10 |
11 | The domain-independent processing includes
12 |
13 | - part-of-speech tagging
14 | - dependency parsing
15 | - coreference analysis
16 | - name tagging with respect to a set of generic name models
17 | (for people, places, and organizations)
18 | - numeric and time expressions
19 |
20 | All of this analysis is performed as part of preprocessing
21 | by the IcePreprocessor class and stored in the
22 | cache directory, which has one subdirectory for
23 | each corpus being preprocessed by ICE.
24 |
25 | Note that this preprocessing could be made more accurate
26 | by making use of domain-specific information, but we do
27 | not do so at this time.
28 |
29 | The domain-specific processing involves finding in
30 | the corpus all dependency paths which connect two
31 | entities (words which are members of an entity set). A
32 | relation is defined as a set of dependency paths, so
33 | this process collects the candidate paths to be used
34 | in relation bootstrapping. As the entity sets grow during IE
35 | customization, this set of candidate paths also grows and so needs
36 | to be recomputed. This analysis is performed by the
37 | RelationFinder class, which invokes DepPaths.
38 | To speed processing, DepPaths makes use of the
39 | information saved in the cache by preprocessing.
40 |
41 | One additional step of corpus analysis involves the computation
42 | of term context vectors, which record the dependency contexts of
43 | each term in the corpus. This information, which is used to guide
44 | the creation of entity sets, is computed by class EntitySetIndexer.
45 |
46 |
Dependency Paths
47 |
48 | Representation
49 |
50 | ICE relations (class IceRelation) are specified in terms of
51 | the types of its arguments (entity sets) and a set of lexicalized
52 | dependency paths (LDPs). An LDP specifies a particular sequence of
53 | words and dependency relations. For communicating with the user we
54 | want to accept and generate English phrases. Methods in class
55 | DepPath perform the generation of phrases; the
56 | correspondence between the internal representation, the phrase,
57 | and a complete sentence with an example of this path is
58 | captured in instances of class IcePath.
59 |
60 | We are currently experimenting in Jet with set-generalized
61 | LDPs. where the words are constrained to be members of a set rather
62 | than taking on single values.
63 |
64 |
Matching
65 |
66 | Exact match of two LDPs an be done by simple sring match.
67 | To determine whether a document has an instance of an LDP, we
68 | can generate all the LDPs from a document and see if any one
69 | matches.
70 |
71 | For better recall we may want to allow approximate (soft) matching.
72 | Class PathMatcher provides edit-distance-based matching
73 | between two LDPs.
74 |
75 |
Exporting
76 |
77 | After some entity sets and relations have been defined using Ice,
78 | class JetEngineBuilder is used to write thes out in
79 | a format which is accepted by Jet. It is represented in Jet
80 | using classes AnchoredPath and AnchoredPathSet.
81 |
82 | Bootstrapping
83 |
84 | The bootstrapping of relations is managed by class Bootstrap.
85 | The basic process starts with a seed provided by the user and ranks
86 | the candidate paths with respect to this seed using an elaborate
87 | combination of scores
88 |
89 | To reduce the manual input required when conducting repeated evaluations
90 | for the same relation, class RelationOracle captures the user's
91 | classifications on the initial run and generates automatic responses
92 | to the same queries on subsequent runs.
93 | */
94 | package edu.nyu.jet.ice;
95 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/relation/PathRelationExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.ice.models.MatcherPath;
4 | import edu.nyu.jet.ice.models.PathMatcher;
5 | import edu.nyu.jet.ice.models.WordEmbedding;
6 | import opennlp.model.Event;
7 |
8 | import java.io.BufferedReader;
9 | import java.io.FileReader;
10 | import java.io.IOException;
11 | import java.util.ArrayList;
12 | import java.util.List;
13 |
14 | /**
15 | * Tag relations in a document by using positive and negative dependency path rules created with ICE
16 | *
17 | * @author yhe
18 | */
19 | public class PathRelationExtractor {
20 |
21 | public static double minThreshold = 0.5;
22 |
23 | public void setNegDiscount(double nDiscount) {
24 | negDiscount = nDiscount;
25 | }
26 |
27 | public static double negDiscount = 0.8;
28 |
29 | private PathMatcher pathMatcher = new PathMatcher();
30 |
31 | private static List ruleTable = new ArrayList();
32 |
33 | private static List negTable = new ArrayList();
34 |
35 | public void updateCost(double replace, double insert, double delete) {
36 | pathMatcher.updateCost(replace, insert, delete);
37 | }
38 |
39 | public static void loadModelForSoftMatch (String rulesFile) throws IOException {
40 | BufferedReader br = new BufferedReader(new FileReader(rulesFile));
41 | String line = null;
42 | while ((line = br.readLine()) != null) {
43 | System.out.println("Loading rule " + line);
44 | String[] parts = line.split(" = ");
45 | MatcherPath path = new MatcherPath(parts[0]);
46 | if (parts[0].contains("EMPTY")) {
47 | continue;
48 | }
49 | if (!path.isEmpty()) {
50 | path.setRelationType(parts[1]);
51 | }
52 | ruleTable.add(path);
53 | }
54 | }
55 |
56 | public void loadNeg(String negRulesFile) throws IOException {
57 | BufferedReader br = new BufferedReader(new FileReader(negRulesFile));
58 | String line = null;
59 | while ((line = br.readLine()) != null) {
60 | String[] parts = line.split(" = ");
61 | MatcherPath path = new MatcherPath(parts[0]);
62 | if (parts[0].contains("EMPTY")) {
63 | continue;
64 | }
65 | if (!path.isEmpty()) {
66 | path.setRelationType(parts[1]);
67 | }
68 | negTable.add(path);
69 | }
70 | }
71 |
72 | public void loadEmbeddings(String embeddingFile) throws IOException {
73 | WordEmbedding.loadWordEmbedding(embeddingFile);
74 | }
75 |
76 | /**
77 | * Predict the relation type of an Event. The context[] array of the Event
78 | * should have the format [dependency path, arg1 type, arg2 type]
79 | * @param e An OpenNLP context[]:label pair
80 | * @return
81 | */
82 | public String predict(Event e) {
83 | String[] context = e.getContext();
84 | String depPath = context[0];
85 | String arg1Type = context[1];
86 | String arg2Type = context[2];
87 | String fullDepPath = arg1Type + "--" + depPath + "--" + arg2Type;
88 | MatcherPath matcherPath = new MatcherPath(fullDepPath);
89 | double minScore = 1;
90 | double minNegScore = 1;
91 | MatcherPath minRule = null;
92 | for (MatcherPath rule : ruleTable) {
93 | double score = pathMatcher.matchPaths(matcherPath, rule)/
94 | rule.length();
95 | if (arg1Type.equals("PERSON") && arg2Type.equals("DRUGS")) {
96 | // System.err.println("\tScore:"+ score);
97 | // System.err.println("\tRule:" + rule);
98 | // System.err.println("\tCurrent:" + matcherPath);
99 | //System.err.println("Gold:" + e.getOutcome()
100 | // + "\tPredicted:" + rule.getRelationType());
101 | }
102 | if (score < minScore) {
103 | minScore = score;
104 | minRule = rule;
105 | }
106 | }
107 | MatcherPath minNegRule = null;
108 | if (minScore < minThreshold) {
109 |
110 | for (MatcherPath rule : negTable) {
111 | if (!rule.getRelationType().equals(minRule.getRelationType())) {
112 | continue;
113 | }
114 | double score = pathMatcher.matchPaths(matcherPath, rule) / rule.length();
115 | if (score < minNegScore) {
116 | minNegScore = score;
117 | minNegRule = rule;
118 | }
119 | }
120 | }
121 | else {
122 | return null;
123 | }
124 |
125 |
126 | if (minScore < minThreshold && minScore < minNegScore* negDiscount) {
127 | System.err.println("Score:"+ minScore);
128 | System.err.println("Rule:" + minRule);
129 | System.err.println("Current:" + matcherPath);
130 | System.err.println("Gold:" + e.getOutcome()
131 | + "\tPredicted:" + minRule.getRelationType());
132 |
133 | return minRule.getRelationType();
134 | }
135 | if (minScore > minNegScore* negDiscount) {
136 | System.err.println("[REJ] Score:"+ minScore);
137 | System.err.println("[REJ] Neg Score:"+ minNegScore* negDiscount);
138 | System.err.println("[REJ] Rule:" + minRule);
139 | System.err.println("[REJ] Neg Rule:" + minNegRule);
140 | System.err.println("[REJ] Current:" + matcherPath);
141 | System.err.println("[REJ] Gold:" + e.getOutcome()
142 | + "\tPredicted:" + minRule.getRelationType());
143 | }
144 | return null;
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/terminology/Term.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.terminology;
2 |
3 | /**
4 | * Summarizes information related to a term/entity, also supports sorting
5 | *
6 | * @author yhe
7 | * @version 1.0
8 | */
9 | public class Term implements Comparable{
10 | private String text;
11 | private int positiveDocFreq;
12 | private int positiveFreq;
13 | private int negativeDocFreq;
14 | private int negativeFreq;
15 | private double score;
16 | private int[] rawFreq;
17 |
18 | public Term(String text, int positiveDocFreq, int positiveFreq, int negativeDocFreq, int negativeFreq) {
19 | this.text = text;
20 | this.positiveDocFreq = positiveDocFreq;
21 | this.positiveFreq = positiveFreq;
22 | this.negativeDocFreq = negativeDocFreq;
23 | this.negativeFreq = negativeFreq;
24 | }
25 |
26 | public String getText() {
27 | return text;
28 | }
29 |
30 | public void setText(String text) {
31 | this.text = text;
32 | }
33 |
34 | public int getPositiveDocFreq() {
35 | return positiveDocFreq;
36 | }
37 |
38 |
39 | public int getPositiveFreq() {
40 | return positiveFreq;
41 | }
42 |
43 |
44 | public int getNegativeDocFreq() {
45 | return negativeDocFreq;
46 | }
47 |
48 |
49 | public int getNegativeFreq() {
50 | return negativeFreq;
51 | }
52 |
53 | public double getScore() {
54 | return score;
55 | }
56 |
57 | public void setScore(double score) {
58 | this.score = score;
59 | }
60 |
61 | public int[] getRawFreq() {
62 | return rawFreq;
63 | }
64 |
65 | public void setRawFreq(int[] rawFreq) {
66 | this.rawFreq = rawFreq;
67 | }
68 |
69 | public int compareTo(Term term) {
70 | if (this.score - term.score < 0) return -1;
71 | if (this.score - term.score > 0) return 1;
72 | return 0;
73 | }
74 |
75 | @Override
76 | public String toString() {
77 | return String.format("%.2f\t%s", score, text);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/terminology/TermRanker.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.terminology;
2 |
3 | import edu.nyu.jet.ice.uicomps.Ice;
4 | import edu.nyu.jet.ice.utils.IceUtils;
5 |
6 | import java.io.FileWriter;
7 | import java.io.IOException;
8 | import java.io.PrintWriter;
9 | import java.util.*;
10 |
11 | /**
12 | * Ranker for extracted terms.
13 | *
14 | * @author yhe
15 | * @version 1.0
16 | */
17 | public class TermRanker {
18 | private List terms;
19 |
20 | /**
21 | * Rank multi-word terms using the following score:
22 | *
23 | * Score = POSITIVE_FREQ * log(POSITIVE_FREQ) ^ alpha / NEGATIVE_FREQ
24 | *
25 | * where POSITIVE_FREQ is the document frequency in the foreground corpus
26 | * and NEGATIVE_FREQ is the document frequency in the background corpus.
27 | * alpha can be set in iceprops with the Ice.TermRanker.alpha property
28 | *
29 | * @param foregroundCountFile Name of the word count file for the foreground corpus
30 | * @param backgroundCountFile Name of the word count file for the background corpus
31 | * @throws IOException
32 | */
33 | public TermRanker(String foregroundCountFile, String backgroundCountFile)
34 | throws IOException {
35 | String[] positiveWords = IceUtils.readLines(foregroundCountFile);
36 | String[] negativeWords = IceUtils.readLines(backgroundCountFile);
37 | //Map foregroundWordFreq = new HashMap();
38 | Map foregroundDocFreq = new HashMap();
39 | //Map backgroundWordFreq = new HashMap();
40 | Map backgroundDocFreq = new HashMap();
41 | int i = 0;
42 | for (String w : positiveWords) {
43 | if (i < 3) {
44 | i++;
45 | continue;
46 | }
47 | String[] parts = w.split("\\t");
48 | if (parts[0].equals("Contact/nn") ||
49 | parts[0].equals("today/nn") ||
50 | parts[0].equals("yesterday/nn")) {
51 | continue;
52 | }
53 | foregroundDocFreq.put(parts[0], parts.length - 1);
54 | }
55 | i = 0;
56 | for (String w : negativeWords) {
57 | if (i < 3) {
58 | i++;
59 | continue;
60 | }
61 | String[] parts = w.split("\\t");
62 | backgroundDocFreq.put(parts[0], parts.length - 1);
63 | i++;
64 | }
65 | terms = new ArrayList();
66 | double pow = 1.0;
67 | try {
68 | pow = Double.valueOf(Ice.iceProperties.getProperty("Ice.TermRanker.alpha"));
69 | System.err.println("Trying to use alpha: " + pow);
70 | } catch (Exception e) {
71 | //e.printStackTrace();
72 | }
73 | for (String w : foregroundDocFreq.keySet()) {
74 | // int negativeWordCount = backgroundWordFreq.containsKey(w) ?
75 | // backgroundWordFreq.get(w) + 1 : 1;
76 | int negativeDocCount = backgroundDocFreq.containsKey(w) ?
77 | backgroundDocFreq.get(w) + 1 : 1;
78 | Term term = new Term(w,
79 | foregroundDocFreq.get(w),
80 | 0,
81 | negativeDocCount,
82 | 0
83 | );
84 | term.setScore((double) term.getPositiveDocFreq() *
85 | Math.pow(Math.log(term.getPositiveDocFreq()), pow)
86 | /
87 | term.getNegativeDocFreq());
88 | terms.add(term);
89 | }
90 | Collections.sort(terms);
91 | Collections.reverse(terms);
92 | }
93 |
94 | /**
95 | * Write a ranked list of terms (top-ranked term first) to file
96 | * outputFileName
.
97 | */
98 |
99 | public void writeRankedList(String outputFileName) throws IOException {
100 | PrintWriter pw = new PrintWriter(new FileWriter(outputFileName));
101 | for (Term term : terms) {
102 | pw.println(term);
103 | }
104 | pw.close();
105 | }
106 |
107 | /**
108 | * Rank terms using term count files foregroundCountFile
and
109 | * backgroundCountFile
, writing result to outputFile
110 | * and returning a ranked list.
111 | */
112 |
113 | public static List rankTerms(String foregroundCountFile,
114 | String backgroundCountFile,
115 | String outputFile) throws IOException {
116 | TermRanker ranker = new TermRanker(foregroundCountFile, backgroundCountFile);
117 | ranker.writeRankedList(outputFile);
118 | return ranker.terms;
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/Ice.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;// -*- tab-width: 4 -*-
2 | //Title: JET-ICE
3 | //Version: 1.72
4 | //Copyright: Copyright (c) 2014
5 | //Author: Ralph Grishman
6 | //Description: A Java-based Information Extraction Tool -- Customization Environment
7 |
8 | import java.awt.*;
9 | import java.util.*;
10 | import javax.swing.*;
11 | import javax.swing.border.*;
12 | import java.awt.event.*;
13 |
14 | import edu.nyu.jet.ice.models.Corpus;
15 | import edu.nyu.jet.ice.models.IceEntitySet;
16 | import edu.nyu.jet.ice.models.IceRelation;
17 | import edu.nyu.jet.ice.models.DepPathMap;
18 | import edu.nyu.jet.ice.events.IceEvent;
19 | import edu.nyu.jet.ice.events.DepTreeMap;
20 | import edu.nyu.jet.concepts.ConceptHierarchy;
21 |
22 | import edu.nyu.jet.Logger;
23 | import edu.nyu.jet.LoggerFactory;
24 |
25 | /**
26 | * Top-level objects for ICE
27 | */
28 |
29 | public class Ice {
30 |
31 | static final Logger logger = LoggerFactory.getLogger(Ice.class);
32 |
33 | public static SortedMap corpora = new TreeMap ();
34 | public static SortedMap entitySets = new TreeMap();
35 | public static SortedMap relations = new TreeMap();
36 | public static SortedMap events = new TreeMap();
37 | public static Corpus selectedCorpus = null;
38 | public static String selectedCorpusName = null;
39 |
40 | public static Properties iceProperties = new Properties();
41 |
42 | public static JFrame mainFrame;
43 |
44 | public static void selectCorpus (String corpus) {
45 | selectedCorpusName = corpus;
46 | selectedCorpus = corpora.get(selectedCorpusName);
47 | DepPathMap depPathMap = DepPathMap.getInstance();
48 | depPathMap.loadPaths(false);
49 | DepTreeMap depTreeMap = DepTreeMap.getInstance();
50 | depTreeMap.loadTrees(false);
51 | }
52 |
53 | public static ConceptHierarchy ontology = null;;
54 |
55 | public static void addEntitySet (IceEntitySet entitySet) {
56 | entitySets.put(entitySet.getType(), entitySet);
57 | }
58 |
59 | public static IceEntitySet getEntitySet (String type) {
60 | return entitySets.get(type);
61 | }
62 |
63 | public static void removeEntitySet (String type) {
64 | entitySets.remove(type);
65 | }
66 |
67 | public static void addRelation (IceRelation relation) {
68 | relations.put(relation.getName(), relation);
69 | }
70 |
71 | public static IceRelation getRelation (String type) {
72 | return relations.get(type);
73 | }
74 |
75 | public static void removeRelation (String type) {
76 | if (relations.get(type) == null)
77 | logger.warn("Relation to be deleted does not exist.");
78 | else relations.remove(type);
79 | }
80 |
81 | public static void addEvent (IceEvent event) {
82 | events.put(event.getName(), event);
83 | }
84 |
85 | public static IceEvent getEvent (String type) {
86 | return events.get(type);
87 | }
88 |
89 | public static void removeEvent (String type) {
90 | events.remove(type);
91 | }
92 | }
93 |
94 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/IceCellRenderer.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;
2 |
3 | import edu.nyu.jet.ice.events.IceTree;
4 | import edu.nyu.jet.ice.models.IcePath;
5 |
6 | import javax.swing.*;
7 | import java.awt.*;
8 |
9 | /**
10 | * This cell renderer is intended to support displays of Lists
11 | * where the list elements are IcePaths or IceTrees. What is
12 | * displayed is the "repr" (English phrase) in either case and
13 | * the choice made by the user regarding that phrase.
14 | */
15 |
16 | public class IceCellRenderer extends JLabel implements ListCellRenderer {
17 |
18 | boolean showYesNo;
19 |
20 | public IceCellRenderer (boolean showYesNo) {
21 | setOpaque(true);
22 | this.showYesNo = showYesNo;
23 | }
24 |
25 | public Component getListCellRendererComponent(JList list,
26 | Object value,
27 | int index,
28 | boolean isSelected,
29 | boolean cellHasFocus) {
30 |
31 | if (value == null) {
32 | System.out.println ("CellRenderer got null");
33 | } else if (value instanceof IceTree) {
34 | IceTree t = (IceTree) value;
35 | String repr = t.getRepr();
36 | if (repr == null) System.out.println ("CellRenderer getting trees with null repr");
37 | IceTree.IceTreeChoice choice = t.getChoice();
38 | if (showYesNo && choice == IceTree.IceTreeChoice.YES)
39 | repr += " / YES";
40 | else if (showYesNo && choice == IceTree.IceTreeChoice.NO)
41 | repr += " / NO";
42 | setText(repr);
43 | } else if (value instanceof IcePath) {
44 | IcePath t = (IcePath) value;
45 | String repr = t.getRepr();
46 | if (repr == null) System.out.println ("CellRenderer getting paths with null repr");
47 | IcePath.IcePathChoice choice = t.getChoice();
48 | if (showYesNo && choice == IcePath.IcePathChoice.YES)
49 | repr += " / YES";
50 | else if (showYesNo && choice == IcePath.IcePathChoice.NO)
51 | repr += " / NO";
52 | setText(repr);
53 | } else System.out.println ("Cell renderer got " + value);
54 |
55 | Color background;
56 | Color foreground;
57 |
58 | // check if this cell is selected
59 | if (isSelected) {
60 | background = Color.BLUE;
61 | foreground = Color.WHITE;
62 | } else {
63 | background = Color.WHITE;
64 | foreground = Color.BLACK;
65 | };
66 |
67 | setBackground(background);
68 | setForeground(foreground);
69 |
70 | // this.setToolTipText("hi" + t.getExample());
71 | return this;
72 | }
73 | }
74 |
75 |
76 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/ListFilter.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;
2 |
3 | import javax.swing.Box;
4 |
5 | /**
6 | * a filter for displaying or skipping items, controlled by a GUI
7 | */
8 |
9 | public abstract class ListFilter {
10 |
11 | public abstract boolean filter(String item);
12 |
13 | public abstract Box makeBox();
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/RelationBuilderThread.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;
2 |
3 | import edu.nyu.jet.ice.relation.Bootstrap;
4 | import edu.nyu.jet.ice.views.swing.SwingRelationsPanel;
5 |
6 | /**
7 | * When the 'expand' button om the relation frame is pushed, the SwingRelationPanel
8 | * creates a RelationBuilderThread to perform the computations required to
9 | * generate a list of candidate relation patterns (which are then reviewed
10 | * by the user).
11 | *
12 | * Created by yhe on 10/14/14.
13 | */
14 | public class RelationBuilderThread extends Thread {
15 |
16 | String[] args;
17 | // RelationBuilder builder;
18 | Bootstrap bootstrap;
19 | String arg1;
20 | String arg2;
21 | RelationBuilderFrame frame;
22 | SwingRelationsPanel swingRelationsPanel;
23 |
24 | public RelationBuilderThread(
25 | String seed,
26 | String relationInstanceFileName,
27 | String pathListFileName,
28 | // RelationBuilder builder,
29 | Bootstrap bootstrap,
30 | RelationBuilderFrame frame,
31 | SwingRelationsPanel swingRelationsPanel) {
32 | args = new String[3];
33 | args[0] = seed;
34 | String[] parts = seed.trim().toLowerCase().split(" ");
35 | if (parts.length > 1) {
36 | arg1 = parts[0].toUpperCase();
37 | arg2 = parts[parts.length - 1].toUpperCase();
38 | }
39 | args[1] = relationInstanceFileName;
40 | args[2] = pathListFileName;
41 | // this.builder = builder;
42 | this.bootstrap = bootstrap;
43 | this.frame = frame;
44 | this.swingRelationsPanel = swingRelationsPanel;
45 | }
46 |
47 | public void run() {
48 | try {
49 | bootstrap.initialize(args[0], args[1]);
50 | frame.updateList();
51 | frame.setLocationRelativeTo(null);
52 | frame.setVisible(true);
53 | frame.listPane.revalidate();
54 | frame.listPane.repaint();
55 | frame.rankedList.revalidate();
56 | frame.rankedList.repaint();
57 | } catch (Exception e) {
58 | System.err.println("Exception in Jet.RelationAL.Bootstrap: ");
59 | e.printStackTrace();
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/RelationFilter.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;// -*- tab-width: 4 -*-
2 | //Title: JET-ICE
3 | //Version: 1.72
4 | //Copyright: Copyright (c) 2014
5 | //Author: Ralph Grishman
6 | //Description: A Java-based Information Extraction Tool -- Customization Environment
7 |
8 | import edu.nyu.jet.ice.models.Corpus;
9 |
10 | import javax.swing.*;
11 | import java.awt.event.*;
12 |
13 | /**
14 | * a filter for selecting only sentential patterns (those with
15 | * an nsubj and dobj) or passing all patterns
16 | */
17 |
18 | public class RelationFilter extends ListFilter {
19 |
20 | public boolean onlySententialPatterns;
21 | public JCheckBox sententialPatternCheckBox;
22 | JTextArea area = null;
23 |
24 |
25 | /**
26 | * return true if 'term' is a selected part of speech
27 | */
28 | public boolean filter (String term) {
29 | if (onlySententialPatterns)
30 | return term.matches(".*nsubj-1:.*:dobj.*");
31 | else
32 | return true;
33 | }
34 |
35 | public void setArea(JTextArea area) {
36 | this.area = area;
37 | }
38 |
39 | /**
40 | * draw a Box with the check box for selecting sentential patterns
41 | */
42 |
43 | public Box makeBox () {
44 | Box box = Box.createHorizontalBox();
45 | sententialPatternCheckBox = new JCheckBox("show only sentential patterns");
46 | box.add(sententialPatternCheckBox);
47 |
48 | // listener -----
49 |
50 | sententialPatternCheckBox.addActionListener(new ActionListener() {
51 | public void actionPerformed(ActionEvent ev) {
52 | onlySententialPatterns = sententialPatternCheckBox.isSelected();
53 | try {
54 | Corpus.displayTerms(Ice.selectedCorpus.relationTypesFileName,
55 | 40,
56 | area,
57 | Corpus.relationFilter);
58 | }
59 | catch (Exception e) {
60 | e.printStackTrace();
61 | }
62 | }
63 | });
64 |
65 | return box;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/uicomps/TermFilter.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.uicomps;// -*- tab-width: 4 -*-
2 | //Title: JET-ICE
3 | //Version: 1.72
4 | //Copyright: Copyright (c) 2014
5 | //Author: Ralph Grishman
6 | //Description: A Java-based Information Extraction Tool -- Customization Environment
7 |
8 | import javax.swing.*;
9 | import java.awt.event.*;
10 |
11 | /**
12 | * part-of-speech filter for high-frequency words;
13 | * distinguishes nouns, names, verbs, and other
14 | */
15 |
16 | public class TermFilter extends ListFilter {
17 |
18 | boolean showNouns;
19 | boolean showNames;
20 | boolean showVerbs;
21 | boolean showOther;
22 |
23 | /**
24 | * return true if 'term' is a selected part of speech
25 | */
26 |
27 | public boolean filter (String term) {
28 | return (showNouns && termIsType(term, "nn")) ||
29 | (showNames && termIsType(term, "nnp")) ||
30 | (showVerbs && termIsType(term, "vb")) ||
31 | (showOther && termIsType(term, "o"));
32 | }
33 |
34 | private boolean termIsType(String term, String type) {
35 | String[] parts = term.split("/");
36 | if (parts.length < 2) return false;
37 | return parts[1].equals(type);
38 | }
39 |
40 | /**
41 | * draw a Box including check boxes for the different parts of speech
42 | */
43 |
44 | public Box makeBox () {
45 | Box box = Box.createHorizontalBox();
46 | box.add(new JLabel("show"));
47 | JCheckBox nounButton = new JCheckBox("nouns");
48 | nounButton.setSelected(showNouns);
49 | box.add(nounButton);
50 | JCheckBox nameButton = new JCheckBox("names");
51 | nameButton.setSelected(showNames);
52 | box.add(nameButton);
53 | JCheckBox verbButton = new JCheckBox("verbs");
54 | verbButton.setSelected(showVerbs);
55 | box.add(verbButton);
56 | JCheckBox otherButton = new JCheckBox("other");
57 | otherButton.setSelected(showOther);
58 | box.add(otherButton);
59 |
60 | // -------- listeners
61 | nounButton.addItemListener (new ItemListener() {
62 | public void itemStateChanged (ItemEvent ev) {
63 | showNouns = ev.getStateChange() == ItemEvent.SELECTED;
64 | }
65 | });
66 | nameButton.addItemListener (new ItemListener() {
67 | public void itemStateChanged (ItemEvent ev) {
68 | showNames = ev.getStateChange() == ItemEvent.SELECTED;
69 | }
70 | });
71 | verbButton.addItemListener (new ItemListener() {
72 | public void itemStateChanged (ItemEvent ev) {
73 | showVerbs = ev.getStateChange() == ItemEvent.SELECTED;
74 | }
75 | });
76 | otherButton.addItemListener (new ItemListener() {
77 | public void itemStateChanged (ItemEvent ev) {
78 | showOther = ev.getStateChange() == ItemEvent.SELECTED;
79 | }
80 | });
81 |
82 | return box;
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/AnnotationStartComparator.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | import edu.nyu.jet.tipster.Annotation;
4 |
5 | import java.util.Comparator;
6 |
7 | /**
8 | * Compares 2 Jet annotations by their start offsets
9 | *
10 | * @author yhe
11 | * @version 1.0
12 | */
13 | public class AnnotationStartComparator implements Comparator {
14 | public int compare(Annotation annotation, Annotation annotation2) {
15 | return annotation.start() - annotation2.start();
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/FileNameSchema.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | //import java.nio.file.Files;
6 | //import java.nio.file.Paths;
7 |
8 | /**
9 | * Created with IntelliJ IDEA.
10 | * User: joelsieh
11 | * Date: 9/9/14
12 | * Time: 10:26 AM
13 | * To change this template use File | Settings | File Templates.
14 | */
15 | public class FileNameSchema {
16 | private static String CACHE_ROOT = "cache";
17 |
18 | static {
19 | File cacheFile = new File(CACHE_ROOT);
20 |
21 | if(!cacheFile.exists()) {
22 | try {
23 | //Files.createDirectory(Paths.get(CACHE_ROOT));
24 | cacheFile.mkdirs();
25 | } catch (Exception e) {
26 | e.printStackTrace();
27 | }
28 | }
29 | }
30 |
31 |
32 | public static String getCacheRoot() {
33 | return CACHE_ROOT;
34 | }
35 |
36 | public static String getPreprocessCacheDir(String corpusName) {
37 | return CACHE_ROOT + File.separator + corpusName + File.separator + "preprocess";
38 | }
39 |
40 | public static String getWordCountFileName(String corpusName) {
41 | return CACHE_ROOT + File.separator + corpusName + File.separator + "counts";
42 | }
43 |
44 | public static String getDocListFileName(String corpusName) {
45 | return CACHE_ROOT + File.separator + corpusName + File.separator + "docList";
46 | }
47 |
48 | public static String getTermsFileName(String corpusName) {
49 | return CACHE_ROOT + File.separator + corpusName + File.separator + "terms";
50 | }
51 |
52 | public static String getRelationsFileName(String corpusName) {
53 | return CACHE_ROOT + File.separator + corpusName + File.separator + "Relations";
54 | }
55 |
56 | public static String getRelationTypesFileName(String corpusName) {
57 | return CACHE_ROOT + File.separator + corpusName + File.separator + "RelationTypes";
58 | }
59 |
60 | public static String getRelationReprFileName(String corpusName) {
61 | return CACHE_ROOT + File.separator + corpusName + File.separator + "RelationRepr";
62 | }
63 |
64 | public static String getEventsFileName(String corpusName) {
65 | return CACHE_ROOT + File.separator + corpusName + File.separator + "Events";
66 | }
67 |
68 | public static String getEventTypesFileName(String corpusName) {
69 | return CACHE_ROOT + File.separator + corpusName + File.separator + "EventTypes";
70 | }
71 |
72 | public static String getEventReprFileName(String corpusName) {
73 | return CACHE_ROOT + File.separator + corpusName + File.separator + "EventRepr";
74 | }
75 |
76 | public static String getCorpusInfoDirectory(String corpusName) {
77 | return CACHE_ROOT + File.separatorChar + corpusName;
78 | }
79 |
80 | public static String getDependencyEventFileName(String corpusName) {
81 | return CACHE_ROOT + File.separatorChar + corpusName + File.separator + "DepEvents";
82 | }
83 |
84 | public static String getEntitySetIndexFileName(String corpusName, String inType) {
85 | return CACHE_ROOT + File.separatorChar + corpusName + File.separator + "EntitySetIndex_" + inType;
86 | }
87 |
88 | public static String getPatternRatioFileName(String corpusName, String bgCorpusName) {
89 | return CACHE_ROOT + File.separatorChar + corpusName + File.separator + bgCorpusName + "-Pattern-Ratio";
90 | }
91 |
92 | public static String getSortedPatternRatioFileName(String corpusName, String bgCorpusName) {
93 | return getPatternRatioFileName(corpusName, bgCorpusName) + ".sorted";
94 | }
95 |
96 | public static String getPreprocessCacheMapFileName(String corpusName) {
97 | return CACHE_ROOT + File.separator + corpusName + File.separator + "preprocessCacheMap";
98 | }
99 |
100 | public static String getDepPathsLogFileName(String corpusName) {
101 | return CACHE_ROOT + File.separator + corpusName + File.separator + "DepPathsLog";
102 | }
103 |
104 | public static String getDepPathsPriorLogFileName(String corpusName) {
105 | return CACHE_ROOT + File.separator + corpusName + File.separator + "DepPathsPriorLog";
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/ProcessFarm.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | import org.apache.commons.exec.*;
4 |
5 | import java.util.ArrayList;
6 |
7 | /**
8 | * Spawn and manage processes
9 | *
10 | * @author yhe
11 | * @version 1.0
12 | */
13 | public class ProcessFarm {
14 | ArrayList tasks = new ArrayList();
15 | ArrayList processes = new ArrayList();
16 |
17 | /**
18 | * Reset the tasks to be spawned
19 | */
20 | synchronized public void reset() {
21 | tasks = new ArrayList();
22 | processes = new ArrayList();
23 | }
24 |
25 | /**
26 | * Submit the current list for execution
27 | */
28 | synchronized public void submit() {
29 | try {
30 | for (String line : tasks) {
31 | System.err.println("Submit: " + line);
32 | CommandLine cmdLine = CommandLine.parse(line);
33 |
34 | DefaultExecuteResultHandler resultHandler = new DefaultExecuteResultHandler();
35 |
36 | ExecuteWatchdog watchdog = new ExecuteWatchdog(ExecuteWatchdog.INFINITE_TIMEOUT);
37 | Executor executor = new DefaultExecutor();
38 | executor.setExitValue(0);
39 | executor.setWatchdog(watchdog);
40 | executor.execute(cmdLine, resultHandler);
41 | processes.add(resultHandler);
42 | }
43 | }
44 | catch (Exception e) {
45 | e.printStackTrace();
46 | }
47 | }
48 |
49 | /**
50 | * Hold the host thread until all spawned processes complete execution
51 | *
52 | * @return true if all tasks completed successfully, false otherwise
53 | */
54 | synchronized public boolean waitFor() {
55 | boolean success = true;
56 | int i = 0;
57 | for (DefaultExecuteResultHandler p : processes) {
58 | try {
59 | p.waitFor();
60 | int returnVal = p.getExitValue();
61 | if (returnVal != 0) {
62 | System.err.println(tasks.get(i) + String.format(" (return code %d)", returnVal));
63 | success = false;
64 | }
65 | }
66 | // catch (InterruptedException e) {
67 | catch (Exception e) {
68 | System.err.println(tasks.get(i) + " encountered interrupted exception:");
69 | e.printStackTrace();
70 | success = false;
71 | }
72 | i++;
73 | }
74 | return success;
75 | }
76 |
77 | /**
78 | * Add a shell command to list waiting to be executed. Use submit() to execute
79 | * all commands in the list
80 | *
81 | * @param s A shell command string to be executed
82 | */
83 | synchronized public void addTask(String s) {
84 | tasks.add(s);
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/ProgressMonitorI.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | /**
4 | * Created with IntelliJ IDEA.
5 | * User: joelsieh
6 | * Date: 7/9/14
7 | * Time: 4:48 PM
8 | * To change this template use File | Settings | File Templates.
9 | */
10 | public interface ProgressMonitorI {
11 |
12 | boolean isCanceled();
13 |
14 | void setProgress(int docCount);
15 |
16 | void setMaximum(int maximum);
17 |
18 | int getMaximum();
19 |
20 | void setNote(String s);
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/Ratio.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | import java.util.*;
4 | import java.io.*;
5 |
6 | /**
7 | * given two document profiles, in the form of word frequencies,
8 | * dependency triple frequencies, etc., computes
9 | * f log f / f'
10 | * where f is the frequency in corpus 1, and f' the frequency
11 | * in corpus2
12 | */
13 |
14 | public class Ratio {
15 |
16 | static Map count1 = new TreeMap();
17 | static Map count2 = new TreeMap();
18 |
19 | public static void main (String[] args) throws IOException {
20 | String countFile1 = args[0];
21 | String countFile2 = args[1];
22 | String ratioFile = args[2];
23 |
24 | readCounts (countFile1, count1);
25 | readCounts (countFile2, count2);
26 | computeRatios(new PrintWriter (new FileWriter (ratioFile)));
27 | }
28 |
29 | public static void readCounts (String file, Map counts) throws IOException {
30 | counts.clear();
31 | BufferedReader reader = new BufferedReader (new FileReader (file));
32 | String line;
33 | while ((line = reader.readLine()) != null) {
34 | String[] field = line.trim().split("\t");
35 | if (field.length == 2)
36 | counts.put(field[1], Integer.valueOf(field[0]));
37 | }
38 | }
39 |
40 | public static void computeRatios (PrintWriter writer) throws IOException {
41 | for (String w : count1.keySet()) {
42 | Integer f1 = count1.get(w);
43 | Integer f2 = count2.get(w);
44 | f1++;
45 | f2 = (f2 == null) ? 1 : f2 + 1;
46 | float ratio = (float) f1 / f2 * (float) Math.log((float) f1);
47 | writer.printf ("%8.1f\t%s\n", ratio, w);
48 | }
49 | writer.close();
50 | }
51 | }
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/utils/SwingProgressMonitor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.utils;
2 |
3 | import javax.swing.*;
4 | import java.awt.*;
5 |
6 | /**
7 | * Created with IntelliJ IDEA.
8 | * User: joelsieh
9 | * Date: 7/9/14
10 | * Time: 4:49 PM
11 | * To change this template use File | Settings | File Templates.
12 | */
13 | public class SwingProgressMonitor extends ProgressMonitor implements ProgressMonitorI {
14 |
15 | public SwingProgressMonitor(Component parentComponent,
16 | Object message,
17 | String note,
18 | int min,
19 | int max) {
20 | super(parentComponent, message, note, min, max);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/views/Refreshable.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.views;
2 |
3 | /**
4 | * Created by yhe on 10/19/14.
5 | */
6 | public interface Refreshable {
7 | public void refresh();
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/views/cli/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Command-line interface for ICE.
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/views/swing/SwingEntitiesPanel.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.views.swing;
2 |
3 | import edu.nyu.jet.ice.models.Corpus;
4 | import edu.nyu.jet.ice.models.DepPathMap;
5 | import edu.nyu.jet.ice.models.IcePreprocessor;
6 | import edu.nyu.jet.ice.terminology.TermCounter;
7 | import edu.nyu.jet.ice.terminology.TermRanker;
8 | import edu.nyu.jet.ice.uicomps.Ice;
9 | import edu.nyu.jet.ice.uicomps.ListFilter;
10 | import edu.nyu.jet.ice.uicomps.RelationFilter;
11 | import edu.nyu.jet.ice.uicomps.TermFilter;
12 | import edu.nyu.jet.ice.utils.FileNameSchema;
13 | import edu.nyu.jet.ice.utils.IceUtils;
14 | import edu.nyu.jet.ice.utils.ProgressMonitorI;
15 | import edu.nyu.jet.ice.utils.SwingProgressMonitor;
16 | import edu.nyu.jet.ice.entityset.EntityIndexerBox;
17 | import edu.nyu.jet.ice.views.Refreshable;
18 | import net.miginfocom.swing.MigLayout;
19 |
20 | import javax.swing.*;
21 | import javax.swing.border.TitledBorder;
22 | import java.awt.*;
23 | import java.awt.List;
24 | import java.awt.event.ActionEvent;
25 | import java.awt.event.ActionListener;
26 | import java.io.BufferedReader;
27 | import java.io.File;
28 | import java.io.FileReader;
29 | import java.io.IOException;
30 | import java.util.*;
31 |
32 | /**
33 | * Panel that manages the entity/terminology extraction.
34 | *
35 | * Currently, the entity index functionality comes from EntitySetBuilder.makeSwingBox()
36 | *
37 | * @author yhe
38 | */
39 |
40 | public class SwingEntitiesPanel extends JPanel implements Refreshable {
41 | public final SwingIceStatusPanel statusPanel = new SwingIceStatusPanel();
42 | public final JTextArea textArea = new JTextArea(11, 35);
43 |
44 | /**
45 | * create entities panel and display top-ranked entities in response
46 | * to "Find Entities" button.
47 | */
48 |
49 | public SwingEntitiesPanel() {
50 | super();
51 | this.setLayout(new MigLayout());
52 | this.setOpaque(true);
53 | this.removeAll();
54 | JPanel termBox = new JPanel(new MigLayout());
55 | TitledBorder border = new TitledBorder("Entities");
56 | termBox.setBorder(border);
57 | termBox.setOpaque(true);
58 | termBox.setMinimumSize(new Dimension(480, 270));
59 | JScrollPane scrollPane = new JScrollPane(textArea);
60 | // if (termFileName != null)
61 | // displayTerms(termFileName, 100, textArea, termFilter);
62 | termBox.add(scrollPane, "wrap");
63 | textArea.setEditable(false);
64 |
65 | JButton findEntitiesButton = new JButton("Find Entities");
66 | findEntitiesButton.addActionListener(new ActionListener() {
67 | public void actionPerformed(ActionEvent e) {
68 | findTerms();
69 | Ice.selectedCorpus.termFileName = FileNameSchema.getTermsFileName(Ice.selectedCorpusName);
70 | java.util.List terms =
71 | getTerms(FileNameSchema.getTermsFileName(Ice.selectedCorpusName), 100);
72 | StringBuilder areaTextBuilder = new StringBuilder();
73 | for (String t : terms) {
74 | areaTextBuilder.append(t).append("\n");
75 | }
76 | textArea.setText(areaTextBuilder.toString());
77 | }
78 | });
79 |
80 | termBox.add(findEntitiesButton);
81 |
82 | EntityIndexerBox eib = new EntityIndexerBox();
83 | Box indexBox = eib.makeSwingBox();
84 | this.add(termBox, "cell 0 0");
85 | this.add(statusPanel, "cell 1 0 1 2");
86 | this.add(indexBox, "cell 0 1");
87 | refresh();
88 | }
89 |
90 | /**
91 | * returns a list of (at most limit
) terms from
92 | * file termFile
.
93 | */
94 |
95 | public static java.util.List getTerms(String termFile, int limit) {
96 | java.util.List topTerms = new ArrayList();
97 | try {
98 | BufferedReader reader = new BufferedReader(new FileReader(termFile));
99 | int k = 0;
100 | while (true) {
101 | String term = reader.readLine();
102 | if (term == null) break;
103 | if (term.length() < 4 || !termIsType(term, "nn")) continue;
104 | term = term.substring(0, term.length() - 3);
105 | topTerms.add(term);
106 | k++;
107 | if (k >= limit) break;
108 | }
109 | } catch (IOException e) {
110 | e.printStackTrace();
111 | }
112 | return topTerms;
113 | }
114 |
115 | public void refresh() {
116 | statusPanel.refresh();
117 | }
118 |
119 | /**
120 | * invokes TermRanker
to rank terms by relative frequency,
121 | * writing ranked list to file.
122 | */
123 |
124 | public void findTerms() {
125 | String termFileName = FileNameSchema.getTermsFileName(Ice.selectedCorpusName);
126 | try {
127 | File f = new File(FileNameSchema.getWordCountFileName(Ice.selectedCorpusName));
128 | if (!f.exists() || !f.isFile()) {
129 | if (SwingPathsPanel.preprocessedTextsAvailable(Ice.selectedCorpusName)) {
130 | IcePreprocessor.countWords(false);
131 | } else {
132 | JOptionPane.showMessageDialog(Ice.mainFrame, "Source text not available, cannot rebuild term set");
133 | return;
134 | }
135 | }
136 | TermRanker.rankTerms(FileNameSchema.getWordCountFileName(Ice.selectedCorpusName),
137 | Ice.corpora.get(Ice.selectedCorpus.backgroundCorpus).wordCountFileName,
138 | termFileName);
139 | }
140 | catch (IOException e) {
141 | e.printStackTrace(System.err);
142 | return;
143 | }
144 | }
145 |
146 | private static boolean termIsType(String term, String type) {
147 | String[] parts = term.split("/");
148 | if (parts.length < 2) return false;
149 | return parts[1].equals(type);
150 | }
151 | }
152 |
--------------------------------------------------------------------------------
/src/main/java/edu/nyu/jet/ice/views/swing/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Swing-based graphical interface for ICE.
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/main/python/extract_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | for l in open(sys.argv[1]):
4 | print(l.split('\t')[0].strip())
5 |
--------------------------------------------------------------------------------
/src/main/python/weight_gold.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | try:
4 | count_file = sys.argv[1]
5 | gold_file = sys.argv[2]
6 | except:
7 | print("weight_gold.py count_file gold_file", sys.stderr)
8 | sys.exit(-1)
9 |
10 | d = {}
11 | skip = 0
12 | for l in open(count_file):
13 | if skip < 3:
14 | skip += 1
15 | continue
16 | count = reduce(lambda x, y: x + y, map(lambda x: int(x), l.strip().split('\t')[1:]))
17 | key = l.split('\t')[0].split('/')[0]
18 |
19 | d[l.split('\t')[0].split('/')[0]] = count
20 |
21 | # print(d)
22 | for l in open(gold_file):
23 | print("%s\t%d" % (l.strip(), d[l.strip()]))
--------------------------------------------------------------------------------
/src/models/data/QuantifierPatterns.txt:
--------------------------------------------------------------------------------
1 | // handle gram of drugs
2 |
3 | pattern set quantifiers;
4 |
5 | quantifier := "gram" | "grams" | "kilogram" | "kilograms" | "pound" | "pounds" | "ounce" | "ounces";
6 |
7 | name-with-quantifier := (quantifier):Arg2 "of" [enamex]:Arg1;
8 |
9 | when name-with-quantifier add [qenamex name=Arg1];
--------------------------------------------------------------------------------
/src/props/ice.yml:
--------------------------------------------------------------------------------
1 | --- !java.util.TreeMap {}
2 | --- !java.util.TreeMap {}
3 | --- !java.util.TreeMap {}
--------------------------------------------------------------------------------
/src/props/iceprops:
--------------------------------------------------------------------------------
1 | Ice.TermRanker.alpha = 0
2 | # Ice.DepEmbeddings.fileName = deps.words
3 | Ice.Bootstrapper.diversify = false
4 | Ice.Bootstrapper.debug = false
5 | Ice.IcePreprocessor.parseprops = parseprops
--------------------------------------------------------------------------------
/src/props/onomaprops:
--------------------------------------------------------------------------------
1 | # JET properties file for dependency counter
2 | #
3 | Jet.batch = t
4 | Jet.dataPath = data
5 | EnglishLex.fileName1 = Jet4.dict
6 | EnglishLex.fileName2 = titles.dict
7 | Gazetteer.fileName = loc.dict
8 | NameGender.fileName = gender.dict
9 | Tags.fileName = pos_hmm.txt
10 | NameTags.ME.fileName = ../acedata/AceOntoMeneModel
11 | WordClusters.fileName = brownClusters10-2014.txt
12 | Pattern.fileName1 = MEchunkPatterns.txt
13 | Ace.EDTtype.fileName = ../acedata/EDT type dict 05.txt
14 | Ace.NameSubtypeModel.fileName = ../acedata/ACEnameSubtypeModel 05.txt
15 | Pattern.quantifierFileName = QuantifierPatterns.txt
16 | Timex.refTime = 2015-10-01
17 | Timex.rule = time_rules.yaml
18 | ##### ICE USER CONFIG FILES #####
19 | Onoma.fileName = ../acedata/ice_onoma.dict
20 | Ace.EDTtype.auxFileName = ../acedata/EDTypesFromUser.dict
21 | Ace.RelationModel.fileName = ../acedata/iceRelationModel
22 | #################################
23 | #
24 | # processDocument = sentenceSplit, sentence:processSentence
25 | processSentence = tokenize, lexLookup, tagNamesFromOnoma, tagTimex
26 |
--------------------------------------------------------------------------------
/src/props/parseprops:
--------------------------------------------------------------------------------
1 | # JET properties file for dependency counter
2 | #
3 | Jet.batch = t
4 | Jet.dataPath = data
5 | EnglishLex.fileName1 = Jet4.dict
6 | EnglishLex.fileName2 = titles.dict
7 | Gazetteer.fileName = loc.dict
8 | NameGender.fileName = gender.dict
9 | Tags.fileName = pos_hmm.txt
10 | Chunker.fileName = chunkModel.txt
11 | NameTags.ME.fileName = ../acedata/AceOntoMeneModel
12 | WordClusters.fileName = brownClusters10-2014.txt
13 | Pattern.fileName1 = MEchunkPatterns.txt
14 | Ace.EDTtype.fileName = ../acedata/EDT type dict 05.txt
15 | Ace.NameSubtypeModel.fileName = ../acedata/ACEnameSubtypeModel 05.txt
16 | DepParser.model.fileName = parseModel.gz
17 | DepParser.transformations = yes
18 | Ace.generic.fileName = ../acedata/generic dict 05.txt
19 | ##### ICE USER CONFIG FILES #####
20 | Onoma.fileName = ../acedata/ice_onoma.dict
21 | Ace.EDTtype.auxFileName = ../acedata/EDTypesFromUser.dict
22 | Ace.RelationModel.fileName = ../acedata/iceRelationModel
23 | #################################
24 | #
25 | processDocument = sentenceSplit, sentence:processSentence
26 | processSentence = tokenize, lexLookup, pruneTags, tagPOS, \
27 | tagNames, chunk, \
28 | pat(names), pat(othernames), ng:processNG, pat(fusePossessive), \
29 | pat(vgroups), pat(particles), pat(np), pat(np), pat(conj), \
30 | pat(vp), pat(rnv), pat(s), depParse, resolve
31 | processNG = pat(ng-chunks)
32 |
--------------------------------------------------------------------------------
/src/props/props:
--------------------------------------------------------------------------------
1 | # JET properties file to run ACE with ICE-generated entity classes and relation patterns
2 | Jet.batch = t
3 | Jet.dataPath = data
4 | EnglishLex.fileName1 = Jet4.dict
5 | EnglishLex.fileName2 = titles.dict
6 | Gazetteer.fileName = loc.dict
7 | NameGender.fileName = gender.dict
8 | DepParser.model.fileName = parseModel.gz
9 | DepParser.transformations = t
10 | Time.fileName = time_rules.yaml
11 | Ace.EDTtype.fileName = ../acedata/EDT type dict 05.txt
12 | Ace.generic.fileName = ../acedata/generic dict 05.txt
13 | Ace.NameSubtypeModel.fileName = ../acedata/ACEnameSubtypeModel 05.txt
14 | Ace.Value.fileName = ../acedata/values.dict
15 | Tags.fileName = pos_hmm.txt
16 | Pattern.fileName1 = MEchunkPatterns.txt
17 | Pattern.fileName2 = NPpatterns.txt
18 | Chunker.fileName = chunkModel.txt
19 | NameTags.ME.fileName = ../acedata/AceOntoMeneModel
20 | WordClusters.fileName = brownClusters10-2014.txt
21 | ##### ICE GENERATED FILES #####
22 | Onoma.fileName = ../acedata/ice_onoma.dict
23 | Ace.EDTtype.auxFileName = ../acedata/EDTypesFromUser.dict
24 | Ace.RelationDepPaths.fileName = ../acedata/iceRelationModel
25 | #################################
26 | processDocument = sentenceSplit, sentence:processSentence
27 | processSentence = tokenize, lexLookup, pruneTags, tagNames, tagNamesFromOnoma, chunk, \
28 | pat(names), pat(othernames), ng:processNG, depParse, resolve
29 | processNG = pat(ng-chunks)
30 |
--------------------------------------------------------------------------------
/src/retired/BatchMaeToApf.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.ice.utils.IceUtils;
4 |
5 | import java.io.IOException;
6 |
7 | /**
8 | * A utility class to convert MAE annotated files to APF files.
9 | *
10 | * This is not used in the current ICE GUI / CLI.
11 | *
12 | * @author yhe
13 | */
14 | public class BatchMaeToApf {
15 |
16 | public static void main(String[] args) throws IOException {
17 | if (args.length != 3) {
18 | System.err.println("Jet.RelationAL.BatchMaeToApf maeFileList txtFileList apfFileList");
19 | System.exit(-1);
20 | }
21 | String[] maeFiles = IceUtils.readLines(args[0]);
22 | String[] txtFiles = IceUtils.readLines(args[1]);
23 | String[] apfFiles = IceUtils.readLines(args[2]);
24 | if (maeFiles.length != apfFiles.length ||
25 | maeFiles.length != txtFiles.length) {
26 | System.err.println("Mae, txt, and apf file list should have the same length.");
27 | }
28 | for (int i = 0; i < maeFiles.length; i++) {
29 | System.err.println("Mae file:" + maeFiles[i]);
30 | MaeToApf.main(new String[]{maeFiles[i], txtFiles[i], apfFiles[i]});
31 | }
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/retired/DepPathFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.aceJet.EventSyntacticPattern;
7 | import edu.nyu.jet.parser.SyntacticRelationSet;
8 | import edu.nyu.jet.tipster.Annotation;
9 | import edu.nyu.jet.tipster.Document;
10 | import opennlp.model.Event;
11 |
12 | import java.util.List;
13 |
14 | /**
15 | * A Feature extractor using dependency path features for supervised/simulated active learning relation extraction.
16 | *
17 | * This class is not used by ICE GUI/CLI.
18 | */
19 | public class DepPathFeatureExtractor implements RelationFeatureExtractor {
20 | public Event extractFeatures(AceEntityMention m1,
21 | AceEntityMention m2,
22 | AceRelationMention r,
23 | Annotation sentence,
24 | SyntacticRelationSet paths,
25 | List mentions,
26 | AceDocument aceDoc,
27 | Document doc) {
28 | String label = r == null ? "NONE" : r.relation.type;
29 | int h1 = m1.getJetHead().start();
30 | int h2 = m2.getJetHead().start();
31 | if (h1 >= h2) {
32 | int tmp = h1;
33 | h1 = h2;
34 | h2 = tmp;
35 | }
36 | String path = EventSyntacticPattern.buildSyntacticPath(h1, h2, paths);
37 | path = path == null ? "EMPTY" : path.replaceAll("\\s+", "_");
38 | String type1 = m1.entity.type;
39 | String type2 = m2.entity.type;
40 | String concatTypes = type1 + ":::" + type2;
41 | String concatAll = type1 + ":::" + path + ":::" + type2;
42 | return new Event(label, new String[]{path});
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/retired/DepPathSameConstitsFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.aceJet.EventSyntacticPattern;
7 | import edu.nyu.jet.parser.SyntacticRelationSet;
8 | import edu.nyu.jet.tipster.Annotation;
9 | import edu.nyu.jet.tipster.Document;
10 | import opennlp.model.Event;
11 |
12 | import java.util.ArrayList;
13 | import java.util.List;
14 |
15 | /**
16 | * A Feature extractor using dependency path and a flag for whether the two arguments belong to a same syntactic
17 | * constituent as features for supervised/simulated active learning relation extraction.
18 | *
19 | * This class is not used by ICE GUI/CLI.
20 | *
21 | * @author yhe
22 | */
23 | public class DepPathSameConstitsFeatureExtractor implements RelationFeatureExtractor {
24 | public Event extractFeatures(AceEntityMention m1,
25 | AceEntityMention m2,
26 | AceRelationMention r,
27 | Annotation sentence,
28 | SyntacticRelationSet paths,
29 | List mentions,
30 | AceDocument aceDoc,
31 | Document doc) {
32 | String label = r == null ? "NONE" : r.relation.type;
33 | int h1 = m1.getJetHead().start();
34 | int h2 = m2.getJetHead().start();
35 | int h1Start = m1.getJetHead().start();
36 | int h2End = m2.getJetHead().end();
37 | if (h1 >= h2) {
38 | int tmp = h1;
39 | h1 = h2;
40 | h2 = tmp;
41 | h1Start = m2.getJetHead().start();
42 | h2End = m1.getJetHead().end();
43 | }
44 | String path = EventSyntacticPattern.buildSyntacticPath(h1, h2, paths);
45 | path = path == null ? "EMPTY" : path.replaceAll("\\s+", "_");
46 | String type1 = m1.entity.type;
47 | String type2 = m2.entity.type;
48 | String concatTypes = type1 + ":::" + type2;
49 | String concatAll = type1 + ":::" + path + ":::" + type2;
50 | List feats = SameConstitFeatureExtractor.extractSameConstits(h1Start, h2End, doc);
51 | int pathLength = path.split(":").length;
52 | //System.err.println("Path length = " + pathLength);
53 | List updatedFeats = new ArrayList();
54 | for (String feat : feats) {
55 | updatedFeats.add("CONJ_SAME_LENGTH=" + feat + ":::" + pathLength);
56 | }
57 | updatedFeats.add("PATH_LENGTH=" + pathLength);
58 | updatedFeats.add("PATH_WITH_TYPE=" + concatAll);
59 | return new Event(label, updatedFeats.toArray(new String[updatedFeats.size()]));
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/retired/DepPathTypeFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.aceJet.EventSyntacticPattern;
7 | import edu.nyu.jet.parser.SyntacticRelationSet;
8 | import edu.nyu.jet.tipster.Annotation;
9 | import edu.nyu.jet.tipster.Document;
10 | import opennlp.model.Event;
11 |
12 | import java.util.List;
13 |
14 | /**
15 | * A Feature extractor using dependency path and entity types as features for supervised/simulated active learning
16 | * relation extraction.
17 | *
18 | * This class is not used by ICE GUI/CLI.
19 | *
20 | * @author yhe
21 | */
22 | public class DepPathTypeFeatureExtractor implements RelationFeatureExtractor {
23 | public Event extractFeatures(AceEntityMention m1,
24 | AceEntityMention m2,
25 | AceRelationMention r,
26 | Annotation sentence,
27 | SyntacticRelationSet paths,
28 | List mentions,
29 | AceDocument aceDoc,
30 | Document doc) {
31 | String label = r == null || r.relation.type.toLowerCase().equals("null") ? "NONE" : r.relation.type;
32 | int h1 = m1.getJetHead().start();
33 | int h2 = m2.getJetHead().start();
34 | if (h1 >= h2) {
35 | int tmp = h1;
36 | h1 = h2;
37 | h2 = tmp;
38 | }
39 | String path = EventSyntacticPattern.buildSyntacticPath(h1, h2, paths);
40 | path = path == null ? "EMPTY" : path.replaceAll("\\s+", "_");
41 | String type1 = m1.entity.type;
42 | String type2 = m2.entity.type;
43 | return new Event(label, new String[]{path, type1, type2});
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/retired/EventItem.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import java.util.*;
4 |
5 | /**
6 | * EventItem is a wrapper around the OpenNLP Event class (feature+label for a training instance), to simplify training
7 | * of supervised relation extraction models
8 | *
9 | * Not used in ICE GUI/CLI.
10 | *
11 | * @author yhe
12 | * @version 1.0
13 | */
14 | public class EventItem implements Comparable {
15 | public static final String NOT_RELATION_LABEL = "NOT_RELATION";
16 | public static final String UNKNOWN_LABEL = "UNDECIDED";
17 | private String sentence;
18 | private String path;
19 | private String type1;
20 | private String type2;
21 | private boolean sameNP;
22 | private List wordsInBetween;
23 | private String outcome;
24 | private String predictedOutcome;
25 | private double score;
26 |
27 | public double getUncertainty() {
28 | return uncertainty;
29 | }
30 |
31 | private double uncertainty;
32 |
33 | public EventItem(String sentence, String path, String type1, String type2, boolean sameNP, List wordsInBetween) {
34 | this.sentence = sentence;
35 | this.path = path;
36 | this.type1 = type1;
37 | this.type2 = type2;
38 | this.sameNP = sameNP;
39 | this.wordsInBetween = wordsInBetween;
40 | this.outcome = UNKNOWN_LABEL;
41 | }
42 |
43 | public String[] context() {
44 | List contextList = new ArrayList();
45 | contextList.add("PATH=" + path.trim().replaceAll("\\s+", "_"));
46 | contextList.add("sameNP=" + sameNP);
47 | for (String w : wordsInBetween) {
48 | contextList.add("wordInBetween=" + w.trim().replaceAll("\\s+", "_"));
49 | }
50 | return contextList.toArray(new String[contextList.size()]);
51 | }
52 |
53 | public static EventItem fromLine(String line) {
54 | String[] parts = line.trim().split("\\|\\|\\|");
55 | System.err.println(line);
56 | System.err.println(parts.length);
57 | String sentence = parts[0];
58 | String path = parts[1];
59 | String types = parts[2];
60 | String sameNP = parts[3];
61 | String wordsInSentence = parts[4];
62 | String[] wordsInBetweenArr = parts.length == 6 ? parts[5].trim().split(" ") : new String[]{};
63 | String type1 = types.trim().split("\\+\\+\\+")[0];
64 | String type2 = types.trim().split("\\+\\+\\+")[1];
65 | boolean sameNPBool = Boolean.valueOf(sameNP);
66 | Set wordsInBetweenSet = new TreeSet();
67 | for (String wordInBetween : wordsInBetweenArr) {
68 | wordsInBetweenSet.add(wordInBetween);
69 | }
70 | List wordsInBetween = new ArrayList(wordsInBetweenSet);
71 | return new EventItem(sentence.trim(), path.trim(), type1.trim(), type2.trim(), sameNPBool, wordsInBetween);
72 | }
73 |
74 | public boolean sameTypesAs(String type1, String type2) {
75 | return (this.type1.equals(type1) && this.type2.equals(type2)) ||
76 | (this.type1.equals(type2) && this.type2.equals(type1));
77 | }
78 |
79 | public boolean outcomeUNK() {
80 | return outcome.equals(UNKNOWN_LABEL);
81 | }
82 |
83 | public String getSentence() {
84 | return sentence;
85 | }
86 |
87 | public void setSentence(String sentence) {
88 | this.sentence = sentence;
89 | }
90 |
91 | public String getPath() {
92 | return path;
93 | }
94 |
95 | public void setPath(String path) {
96 | this.path = path;
97 | }
98 |
99 | public String getType1() {
100 | return type1;
101 | }
102 |
103 | public void setType1(String type1) {
104 | this.type1 = type1;
105 | }
106 |
107 | public String getType2() {
108 | return type2;
109 | }
110 |
111 | public void setType2(String type2) {
112 | this.type2 = type2;
113 | }
114 |
115 | public boolean isSameNP() {
116 | return sameNP;
117 | }
118 |
119 | public void setSameNP(boolean sameNP) {
120 | this.sameNP = sameNP;
121 | }
122 |
123 | public List getWordsInBetween() {
124 | return wordsInBetween;
125 | }
126 |
127 | public void setWordsInBetween(List wordsInBetween) {
128 | this.wordsInBetween = wordsInBetween;
129 | }
130 |
131 | public String getOutcome() {
132 | return outcome;
133 | }
134 |
135 | public void setOutcome(String outcome) {
136 | this.outcome = outcome;
137 | }
138 |
139 | @Override
140 | public String toString() {
141 | return sentence.trim().replaceAll("\\s+", " ");
142 | }
143 |
144 |
145 | public int compareTo(EventItem eventItem) {
146 | if (this.uncertainty - eventItem.uncertainty < 0) {
147 | return -1;
148 | }
149 | else if (this.uncertainty - eventItem.uncertainty > 0) {
150 | return 1;
151 | }
152 | else {
153 | return 0;
154 | }
155 | }
156 |
157 | public String getPredictedOutcome() {
158 | return predictedOutcome;
159 | }
160 |
161 | public void setPredictedOutcome(String predictedOutcome) {
162 | this.predictedOutcome = predictedOutcome;
163 | }
164 |
165 | public void setScore(double score) {
166 | this.score = score;
167 | this.uncertainty = Math.abs(0.5 - score);
168 | }
169 |
170 | public double getScore() {
171 | return score;
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/src/retired/RelationFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.parser.SyntacticRelationSet;
7 | import edu.nyu.jet.tipster.Annotation;
8 | import edu.nyu.jet.tipster.Document;
9 | import opennlp.model.Event;
10 |
11 | import java.util.List;
12 |
13 | /**
14 | * Interface for a feature extractor for relation classifiers.
15 | *
16 | * Used for supervised/simulated active learning; not used by ICE GUI/CLI.
17 | */
18 | public interface RelationFeatureExtractor {
19 | public Event extractFeatures(AceEntityMention m1,
20 | AceEntityMention m2,
21 | AceRelationMention r,
22 | Annotation sentence,
23 | SyntacticRelationSet paths,
24 | List mentions,
25 | AceDocument aceDoc,
26 | Document doc);
27 | }
28 |
--------------------------------------------------------------------------------
/src/retired/RelationOracle.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;// -*- tab-width: 4 -*-
2 |
3 | import edu.nyu.jet.ice.models.IcePath;
4 | import edu.nyu.jet.ice.models.IcePath.IcePathChoice;
5 |
6 | import java.util.*;
7 | import java.io.*;
8 |
9 | /**
10 | * Provides a mechanism for simulated active learning of relations, avoiding the
11 | * need to label the same dependency paths repeatedly by hand.
12 | *
13 | * If there is a local file relationOracle
, use that file to label
14 | * candidate paths. If there is no entry for a particular candidate, ask the
15 | * user to label it and record that label for future use in file
16 | * newRelationOracle
.
17 | */
18 |
19 | public class RelationOracle {
20 |
21 | static String status = "UNKNOWN";
22 |
23 | // each line has a repr and YES or NO
24 | static Set knownRelations = new HashSet();
25 |
26 | public static boolean exists () {
27 | if (status.equals("YES"))
28 | return true;
29 | else if (status.equals("NO"))
30 | return false;
31 | else try {
32 | if (new File("relationOracle").exists()) {
33 | BufferedReader reader = new BufferedReader (new FileReader ("relationOracle"));
34 | String line;
35 | while ((line = reader.readLine()) != null) {
36 | knownRelations.add(line);
37 | }
38 | status = "YES";
39 | return true;
40 | } else {
41 | status = "NO";
42 | return false;
43 | }
44 | } catch (IOException e) {
45 | System.err.println("IOException in RelationOracle");
46 | return false;
47 | }
48 | }
49 |
50 | /**
51 | * If a relation oracle table has been loaded, use that table to label the
52 | * candidate paths on foundPatterns
. If a candidate path
53 | * is not in the table, ask the user for a label, apply that label
54 | * and record that label for future use.
55 | *
56 | * At the end, write a file newRelationOracle
with
57 | * an updated table.
58 | */
59 |
60 | public static void label (List foundPatterns) {
61 | try {
62 | BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
63 | for (IcePath fp : foundPatterns) {
64 | String repr = fp.getRepr();
65 | if (knownRelations.contains(repr + " YES")) {
66 | fp.setChoice(IcePathChoice.YES);
67 | } else if (knownRelations.contains(repr + " NO")) {
68 | fp.setChoice(IcePathChoice.NO);
69 | } else
70 | while (true) {
71 | System.out.print (repr + "?");
72 | String response = reader.readLine();
73 | if (response.equals("Y")) {
74 | fp.setChoice(IcePathChoice.YES);
75 | knownRelations.add(repr + " YES");
76 | break;
77 | } else if (response.equals("N")) {
78 | fp.setChoice(IcePathChoice.NO);
79 | knownRelations.add(repr + " NO");
80 | break;
81 | } else {
82 | System.out.println("Type Y or N");
83 | }
84 | }
85 | }
86 | PrintWriter writer = new PrintWriter (new FileWriter ("newRelationOracle"));
87 | for (String repr : knownRelations) {
88 | writer.println(repr);
89 | }
90 | writer.close();
91 | } catch (IOException e) {
92 | System.err.println("IOException in RelationOracle");
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/src/retired/SameConstitFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.parser.SyntacticRelationSet;
7 | import edu.nyu.jet.tipster.Annotation;
8 | import edu.nyu.jet.tipster.Document;
9 | import opennlp.model.Event;
10 |
11 | import java.util.ArrayList;
12 | import java.util.List;
13 |
14 | /**
15 | * A Feature extractor that checks if both arguments belong to the same syntactic constituent for supervised/simulated
16 | * active learning relation extraction.
17 | *
18 | * This class is not used by ICE GUI/CLI.
19 | *
20 | * @author yhe
21 | */
22 | public class SameConstitFeatureExtractor implements RelationFeatureExtractor {
23 | public Event extractFeatures(AceEntityMention m1,
24 | AceEntityMention m2,
25 | AceRelationMention r,
26 | Annotation sentence,
27 | SyntacticRelationSet paths,
28 | List mentions,
29 | AceDocument aceDoc,
30 | Document doc) {
31 | String label = r == null ? "NONE" : r.relation.type;
32 | int h1Start = m1.getJetHead().start();
33 | int h2End = m2.getJetHead().end();
34 | if (h1Start >= h2End) {
35 | h1Start = m2.getJetHead().start();
36 | h2End = m1.getJetHead().end();
37 | }
38 | List sameConstits = extractSameConstits(h1Start, h2End, doc);
39 | return new Event(label,
40 | sameConstits.toArray(new String[sameConstits.size()]));
41 | }
42 |
43 | public static List extractSameConstits(int start, int end, Document doc) {
44 | List result = new ArrayList();
45 | List constits = doc.annotationsOfType("constit");
46 | if (constits != null) {
47 | for (Annotation constit : constits) {
48 | if (constit.start() < start && constit.end() > end) {
49 | String cat = ((String)constit.get("cat")).toUpperCase();
50 | if (cat.equals("NP")) {
51 | String feat = "SameConstit=" + cat.toUpperCase();
52 | if (!result.contains(feat)) {
53 | result.add(feat);
54 | }
55 | }
56 | }
57 | }
58 | }
59 | // if (result.size() == 0) {
60 | // result.add("SameConstit=NONE");
61 | // }
62 | return result;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/retired/TokenFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.parser.SyntacticRelationSet;
7 | import edu.nyu.jet.tipster.Annotation;
8 | import edu.nyu.jet.tipster.Document;
9 | import opennlp.model.Event;
10 |
11 | import java.util.ArrayList;
12 | import java.util.List;
13 |
14 | /**
15 | * A Feature extractor using token sequence as features for supervised/simulated active learning
16 | * relation extraction.
17 | *
18 | * This class is not used by ICE GUI/CLI.
19 | *
20 | * @author yhe
21 | */
22 | public class TokenFeatureExtractor implements RelationFeatureExtractor {
23 | public Event extractFeatures(AceEntityMention m1,
24 | AceEntityMention m2,
25 | AceRelationMention r,
26 | Annotation sentence,
27 | SyntacticRelationSet paths,
28 | List mentions,
29 | AceDocument aceDoc,
30 | Document doc) {
31 | String label = r == null ? "NONE" : r.relation.type;
32 | int h1Start = m1.getJetHead().end();
33 | int h2End = m2.getJetHead().start();
34 | if (h1Start >= h2End) {
35 | h1Start = m2.getJetHead().end();
36 | h2End = m1.getJetHead().start();
37 | }
38 | List sameTokens = extractSameTokens(h1Start, h2End, doc);
39 | return new Event(label,
40 | sameTokens.toArray(new String[sameTokens.size()]));
41 | }
42 |
43 | public static List extractSameTokens(int start, int end, Document doc) {
44 | List result = new ArrayList();
45 | List tokens = doc.annotationsOfType("token");
46 | if (tokens != null) {
47 | StringBuilder b = new StringBuilder();
48 | for (Annotation token : tokens) {
49 | if (token.start() > start && token.end() < end) {
50 | String word = doc.text(token).toLowerCase().trim().replaceAll("\\s+", "_");
51 | b.append(word + ":");
52 | }
53 | }
54 | if (b.length() > 0) {
55 | String feat = "Tokens=" + b.toString().substring(0, b.length() - 1);
56 | if (!result.contains(feat)) {
57 | result.add(feat);
58 | }
59 | }
60 | }
61 | if (result.size() == 0) {
62 | result.add("Tokens=NONE");
63 | }
64 | return result;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/retired/TokenTypeFeatureExtractor.java:
--------------------------------------------------------------------------------
1 | package edu.nyu.jet.ice.relation;
2 |
3 | import edu.nyu.jet.aceJet.AceDocument;
4 | import edu.nyu.jet.aceJet.AceEntityMention;
5 | import edu.nyu.jet.aceJet.AceRelationMention;
6 | import edu.nyu.jet.aceJet.EventSyntacticPattern;
7 | import edu.nyu.jet.parser.SyntacticRelationSet;
8 | import edu.nyu.jet.tipster.Annotation;
9 | import edu.nyu.jet.tipster.Document;
10 | import opennlp.model.Event;
11 |
12 | import java.util.ArrayList;
13 | import java.util.List;
14 |
15 | /**
16 | * A Feature extractor using token sequence and entity types as features for supervised/simulated active learning
17 | * relation extraction.
18 | *
19 | * This class is not used by ICE GUI/CLI.
20 | *
21 | * @author yhe
22 | */
23 | public class TokenTypeFeatureExtractor implements RelationFeatureExtractor {
24 | public Event extractFeatures(AceEntityMention m1,
25 | AceEntityMention m2,
26 | AceRelationMention r,
27 | Annotation sentence,
28 | SyntacticRelationSet paths,
29 | List mentions,
30 | AceDocument aceDoc,
31 | Document doc) {
32 | String label = r == null ? "NONE" : r.relation.type;
33 | int h1 = m1.getJetHead().start();
34 | int h2 = m2.getJetHead().start();
35 | int h1Start = m1.getJetHead().end();
36 | int h2End = m2.getJetHead().start();
37 | if (h1 >= h2) {
38 | int tmp = h1;
39 | h1 = h2;
40 | h2 = tmp;
41 | h1Start = m2.getJetHead().end();
42 | h2End = m1.getJetHead().start();
43 | }
44 | String path = EventSyntacticPattern.buildSyntacticPath(h1, h2, paths);
45 | path = path == null ? "EMPTY" : path.replaceAll("\\s+", "_");
46 | String type1 = m1.entity.type;
47 | String type2 = m2.entity.type;
48 | String concatTypes = type1 + ":::" + type2;
49 | //String concatAll = type1 + ":::" + path + ":::" + type2;
50 | List feats = TokenFeatureExtractor.extractSameTokens(h1Start, h2End, doc);
51 | //int pathLength = path.split(":").length;
52 | //System.err.println("Path length = " + pathLength);
53 | List updatedFeats = new ArrayList();
54 | for (String feat : feats) {
55 | updatedFeats.add(feat);
56 | updatedFeats.add("CONJ=" + feat + ":::" + concatTypes);
57 | }
58 | if (updatedFeats.size() == 0) {
59 | updatedFeats.add("TOKEN=NONE");
60 | updatedFeats.add("CONJ=TOKEN=NONE:::" + concatTypes);
61 | }
62 | //updatedFeats.add("PATH_LENGTH=" + pathLength);
63 | updatedFeats.add(type1);
64 | updatedFeats.add(type2);
65 | updatedFeats.add(concatTypes);
66 | return new Event(label, updatedFeats.toArray(new String[updatedFeats.size()]));
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/scripts/icecli:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | java -Xmx4g -Dfile.encoding=UTF-8 -cp "$ICE_HOME/ice-all.jar" -DjetHome=$JET_HOME -DiceHome=$ICE_HOME edu.nyu.jet.ice.views.cli.IceCLI $@
3 |
--------------------------------------------------------------------------------
/src/scripts/icecli6:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ICE_HOME=.
3 | ICE_LIB_HOME=.
4 | java -Xmx4g -Dfile.encoding=UTF-8 -cp "$ICE_HOME/ice-all.jar" edu.nyu.jet.ice.views.cli.IceCLI6 $@
5 |
--------------------------------------------------------------------------------
/src/scripts/runice.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | java -Xmx4g -Dfile.encoding=UTF-8 -cp "$ICE_HOME/ice-all.jar" -DjetHome=$JET_HOME -DiceHome=$ICE_HOME edu.nyu.jet.ice.controllers.Nice
3 |
--------------------------------------------------------------------------------
/src/scripts/runtagger.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ICE_HOME=.
3 | ICE_LIB_HOME=.
4 | java -Xmx4g -Dfile.encoding=UTF-8 -cp "$ICE_HOME/ice-all.jar" AceJet.IceTagger $1 $2 $3
5 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus2/doc1.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Fred Smith lives in Chicago.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus2/doc2.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Harriet Smith lives in Seattle.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus2/doc3.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Francoise Smith lives in Paris.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus2/doc4.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Karl Smith lives in Berlin.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus3/doc1.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Fred Smith lives in Chicago.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus3/doc2.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Harriet Smith lives in Seattle.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus3/doc3.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Francoise Smith lives in Paris.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus3/doc4.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Karl Smith lives in Berlin.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus4/doc1.txt:
--------------------------------------------------------------------------------
1 | Here is today's news. Fred Smith died yesterday in Chicago, Illinois.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus4/doc2.txt:
--------------------------------------------------------------------------------
1 | Harriet Smith died yesterday in Seattle, Washington.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus4/doc3.txt:
--------------------------------------------------------------------------------
1 | Francoise Smith died yesterday in Paris, France.
2 |
--------------------------------------------------------------------------------
/src/test/resources/tinyCorpus4/doc4.txt:
--------------------------------------------------------------------------------
1 | Karl Smith died yesterday in Berlin, Germany.
2 |
--------------------------------------------------------------------------------
/src/test/scripts/checkCount:
--------------------------------------------------------------------------------
1 | #!/bin/tcsh
2 | set var=`ls $1 | wc -l`
3 | set var=$var[1]
4 | if ($var != $2) then
5 | echo "error: size of $1 is $var , should be $2"
6 | endif
7 |
--------------------------------------------------------------------------------
/src/test/scripts/checkLength:
--------------------------------------------------------------------------------
1 | #!/bin/tcsh
2 | set var=`wc -l $1`
3 | set var=$var[1]
4 | if ($var != $2) then
5 | echo "error: length of $1 is $var , should be $2"
6 | endif
7 |
--------------------------------------------------------------------------------
/src/test/scripts/validateCLI:
--------------------------------------------------------------------------------
1 | #!/bin/tcsh
2 | #
3 | # validation script for icecli
4 | #
5 | pwd
6 | rm ice.yml
7 | \rm -r cache
8 | #
9 | # create a few small corpora (2 and 3 are identical)
10 | #
11 | icecli addCorpus tinyCorpus2 --inputDir /misc/proteus107/grishman/ice/ice/src/test/resources/tinyCorpus2 --filter txt
12 | icecli addCorpus tinyCorpus3 --inputDir /misc/proteus107/grishman/ice/ice/src/test/resources/tinyCorpus3 --filter txt
13 | icecli addCorpus tinyCorpus4 --inputDir /misc/proteus107/grishman/ice/ice/src/test/resources/tinyCorpus4 --filter txt
14 | #
15 | # test mergeCorporaInto
16 | #
17 | icecli mergeCorporaInto mergedCorpus1 --targetDir mergedDocs1 --filter txt --fromCorpora tinyCorpus2,tinyCorpus3
18 | icecli mergeCorporaInto mergedCorpus2 --targetDir mergedDocs2 --filter txt --fromCorpora tinyCorpus2,tinyCorpus4
19 | #
20 | # test addCorpus with multiple processes
21 | #
22 | icecli addCorpus tinyCorpusPar --inputDir /misc/proteus107/grishman/ice/ice/src/test/resources/tinyCorpus2 --filter txt --processes 2
23 | #
24 | checkLength cache/tinyCorpus2/docList 4
25 | checkLength cache/tinyCorpusPar/docList 4
26 | checkCount cache/tinyCorpus2/preprocess 25
27 | checkCount cache/tinyCorpusPar/preprocess 25
28 | checkLength cache/mergedCorpus1/docList 8
29 | checkLength cache/mergedCorpus2/docList 8
30 | checkLength cache/mergedCorpus1/counts 13 # 3 header + 4 gpe + 4 person + 2 nn
31 | checkLength cache/mergedCorpus2/counts 18 # 3 header + 8 gpe + 4 person + 3 nn
32 | checkLength cache/mergedCorpus1/Relations 4 # one 'lives' relation in each doc
33 | checkCount mergedDocs1 8
34 | checkCount mergedDocs2 8
35 | checkCount cache/mergedCorpus1/preprocess 49
36 |
--------------------------------------------------------------------------------