├── .gitignore
├── LICENSE
├── README.md
├── build.gradle
├── run.sh
├── settings.gradle
└── src
    └── main
        └── java
            └── se
                └── liu
                    └── ida
                        └── nlp
                            └── sdp
                                └── toolkit
                                    ├── graph
                                        ├── DFS.java
                                        ├── Edge.java
                                        ├── Graph.java
                                        ├── InspectedGraph.java
                                        └── Node.java
                                    ├── io
                                        ├── Constants.java
                                        ├── GraphReader.java
                                        ├── GraphReader2014.java
                                        ├── GraphReader2015.java
                                        ├── GraphWriter.java
                                        ├── GraphWriter2014.java
                                        ├── GraphWriter2015.java
                                        └── ParagraphReader.java
                                    └── tools
                                        ├── Analyzer.java
                                        ├── BasicAnalyzer.java
                                        ├── IOTest.java
                                        ├── PairedBootstrap.java
                                        ├── Scorer.java
                                        └── Splitter.java


/.gitignore:
--------------------------------------------------------------------------------
1 | /build/
2 | /.gradle/
3 | /.nb-gradle/
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Marco Kuhlmann <marco.kuhlmann@liu.se>
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted, provided that the above
 5 | copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Semantic Dependency Parsing Toolkit
 2 | 
 3 | This repository contains a Java toolkit for semantic dependency parsing. It has been developed in connection with two shared tasks:
 4 | 
 5 | * [SemEval-2014 Task on Broad-Coverage Semantic Dependency Parsing](http://alt.qcri.org/semeval2014/task8/)
 6 | * [SemEval-2015 Task on Broad-Coverage Semantic Dependency Parsing](http://alt.qcri.org/semeval2015/task18/)
 7 | 
 8 | Detailed information about the tasks can be found at the respective websites.
 9 | 
10 | ## Downloading
11 | 
12 | The primary form of distribution for the project is via Git. See the [Releases](https://github.com/semantic-dependency-parsing/toolkit/releases) page for precompiled jar files.
13 | 
14 | ## Building
15 | 
16 | After checking out the project from the repository, you should be able to build it using [Gradle](http://www.gradle.org/).
17 | 
18 | 	$ cd toolkit
19 | 	$ gradle build
20 | 
21 | This will create a file `build/libs/sdp.jar` with the compiled classes. The jar can then be added to your classpath, whereby you will be able to use the provided classes in your own project. To see what is there, build the documentation:
22 | 
23 | 	$ gradle javadoc
24 | 
25 | The entry page for the documentation is `build/docs/javadoc/index.html`.
26 | 
27 | ## Command-line tools
28 | 
29 | Some of the tools implemented in the project can be called from the command line. The most revelant example is the `Scorer` tool, which is run as follows:
30 | 
31 | 	$ java -cp se.liu.ida.nlp.sdp.toolkit.tools.Scorer gold.sdp system.sdp representation=DM
32 | 
33 | This will evaluate the parser output in the file `system.sdp` based on the gold-standard analyses in the file `gold.sdp` based on the assumption that the data is given in the `DM` representation; other possible representations are `PAS` and `PSD`. The evaluation metrics used are defined on the [Evaluation page](http://alt.qcri.org/semeval2015/task18/index.php?id=evaluation).
34 | 
35 | The Git repository contains a convenience shell script called `run.sh` that allows you to use an abbreviated form of the above command, assuming that the jar file is in `build/libs/sdp.jar`:
36 | 
37 | 	$ sh run.sh Scorer gold.sdp system.sdp representation=DM
38 | 
39 | Abbreviations:
40 | 
41 | 	LP: labeled precision
42 | 	LR: labeled recall
43 | 	LF: labeled F1
44 | 	LM: labeled exact match
45 | 	
46 | 	UP: unlabeled precision
47 | 	UR: unlabeled recall
48 | 	UF: unlabeled F1
49 | 	UM: unlabeled exact match
50 | 	
51 | 	PP: precision with respect to complete predications
52 | 	PR: recall with respect to complete predications
53 | 	PF: F1 with respect to complete predications
54 | 	
55 | 	FP: precision with respect to semantic frames
56 | 	FR: recall with respect to semantic frames
57 | 	FF: F1 with respect to semantic frames
58 | 	
59 | 


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
 1 | apply plugin: 'java'
 2 | apply plugin: 'maven-publish'
 3 | 
 4 | sourceCompatibility = '1.8'
 5 | 
 6 | repositories {
 7 | 	mavenCentral()
 8 | 	mavenLocal()
 9 | }
10 | 
11 | publishing {
12 | 	publications {
13 | 		mavenJava(MavenPublication) {
14 | 			groupId 'se.liu.ida.nlp.sdp'
15 | 			artifactId 'toolkit'
16 | 			version '2.0-SNAPSHOT'
17 | 			from components.java
18 | 		}
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | set -e
4 | 
5 | root=$(cd "$(dirname "$0")"; pwd)
6 | pkg=se.liu.ida.nlp.sdp.toolkit.tools.
7 | 
8 | java -cp $root/build/libs/sdp.jar $pkg$@
9 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'sdp'
2 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/DFS.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.graph;
  5 | 
  6 | /**
  7 |  * Depth-first graph search.
  8 |  *
  9 |  * @author Marco Kuhlmann
 10 |  */
 11 | public class DFS {
 12 | 
 13 | 	/**
 14 | 	 * Whether to search the graph as an undirected graph.
 15 | 	 */
 16 | 	private final boolean undirected;
 17 | 
 18 | 	/**
 19 | 	 * The graph.
 20 | 	 */
 21 | 	private final Graph graph;
 22 | 
 23 | 	/**
 24 | 	 * The index of the run during which each node was (first) visited.
 25 | 	 */
 26 | 	private final int[] run;
 27 | 
 28 | 	/**
 29 | 	 * The number of runs.
 30 | 	 */
 31 | 	private int nRuns;
 32 | 
 33 | 	/**
 34 | 	 * The preorder timestamps of each node.
 35 | 	 */
 36 | 	private final int[] enter;
 37 | 
 38 | 	/**
 39 | 	 * The postorder timestamps of each node.
 40 | 	 */
 41 | 	private final int[] leave;
 42 | 
 43 | 	/**
 44 | 	 * A depth-first search on the specified graph.
 45 | 	 *
 46 | 	 * @param graph a graph
 47 | 	 * @param undirected if {@code true}, the graph will be searched as an
 48 | 	 * undirected graph
 49 | 	 */
 50 | 	public DFS(Graph graph, boolean undirected) {
 51 | 		this.undirected = undirected;
 52 | 
 53 | 		this.graph = graph;
 54 | 
 55 | 		int nNodes = graph.getNNodes();
 56 | 		this.run = new int[nNodes];
 57 | 		this.enter = new int[nNodes];
 58 | 		this.leave = new int[nNodes];
 59 | 		computeTimestamps();
 60 | 	}
 61 | 
 62 | 	/**
 63 | 	 * A depth-first search on the specified graph.
 64 | 	 *
 65 | 	 * @param graph a graph
 66 | 	 */
 67 | 	public DFS(Graph graph) {
 68 | 		this(graph, false);
 69 | 	}
 70 | 
 71 | 	/**
 72 | 	 * Computes the preorder and postorder timestamps for the inspected graph.
 73 | 	 */
 74 | 	private void computeTimestamps() {
 75 | 		for (Node node : graph.getNodes()) {
 76 | 			enter[node.id] = -1;
 77 | 		}
 78 | 		Timer timer = new Timer();
 79 | 		for (Node node : graph.getNodes()) {
 80 | 			if (enter[node.id] == -1) {
 81 | 				computeTimestamps(node, timer);
 82 | 				nRuns++;
 83 | 			}
 84 | 		}
 85 | 	}
 86 | 
 87 | 	/**
 88 | 	 * Computes the preorder and postorder timestamps for the subgraph starting
 89 | 	 * at the specified node.
 90 | 	 *
 91 | 	 * @param node the entry point for the subgraph
 92 | 	 * @param timer the global timer
 93 | 	 */
 94 | 	private void computeTimestamps(Node node, Timer timer) {
 95 | 		run[node.id] = nRuns;
 96 | 		enter[node.id] = timer.tick();
 97 | 		for (Edge outgoingEdge : node.getOutgoingEdges()) {
 98 | 			// Only visit nodes that have not been visited before.
 99 | 			if (enter[outgoingEdge.target] == -1) {
100 | 				computeTimestamps(graph.getNode(outgoingEdge.target), timer);
101 | 			}
102 | 		}
103 | 		if (undirected) {
104 | 			for (Edge incomingEdge : node.getIncomingEdges()) {
105 | 				if (enter[incomingEdge.source] == -1) {
106 | 					computeTimestamps(graph.getNode(incomingEdge.source), timer);
107 | 				}
108 | 			}
109 | 		}
110 | 		leave[node.id] = timer.tick();
111 | 	}
112 | 
113 | 	/**
114 | 	 * Timer used in depth-first search.
115 | 	 */
116 | 	private static final class Timer {
117 | 
118 | 		/**
119 | 		 * The current time.
120 | 		 */
121 | 		private int time;
122 | 
123 | 		/**
124 | 		 * Returns the current time, then increments it.
125 | 		 *
126 | 		 * @return the current time
127 | 		 */
128 | 		public int tick() {
129 | 			return time++;
130 | 		}
131 | 	}
132 | 
133 | 	/**
134 | 	 * Returns the number of runs of the search. This is the number of times the
135 | 	 * recursive search was called to cover all nodes in the graph.
136 | 	 *
137 | 	 * @return the number of runs of the search
138 | 	 */
139 | 	public int getNRuns() {
140 | 		return nRuns;
141 | 	}
142 | 
143 | 	/**
144 | 	 * Test whether the specified edge is a self-loop.
145 | 	 *
146 | 	 * @param edge an edge in the searched graph
147 | 	 * @return {@code true} if the specified edge is a self-loop
148 | 	 */
149 | 	public boolean isSelfLoop(Edge edge) {
150 | 		return edge.source == edge.target;
151 | 	}
152 | 
153 | 	/**
154 | 	 * Test whether the specified edge is a back edge.
155 | 	 *
156 | 	 * @param edge an edge in the searched graph
157 | 	 * @return {@code true} if the specified edge is a back edge
158 | 	 */
159 | 	public boolean isBackEdge(Edge edge) {
160 | 		return enter[edge.target] < enter[edge.source] && leave[edge.source] < leave[edge.target];
161 | 	}
162 | 
163 | 	/**
164 | 	 * Test whether the searched graph is cyclic.
165 | 	 *
166 | 	 * @return {@code true} if the searched graph is cyclic
167 | 	 */
168 | 	public boolean isCyclic() {
169 | 		for (Edge edge : graph.getEdges()) {
170 | 			if (isSelfLoop(edge) || isBackEdge(edge)) {
171 | 				return true;
172 | 			}
173 | 		}
174 | 		return false;
175 | 	}
176 | }
177 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Edge.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.graph;
 5 | 
 6 | /**
 7 |  * An edge in a semantic dependency graph.
 8 |  *
 9 |  * @author Marco Kuhlmann
10 |  */
11 | public class Edge implements Comparable<Edge> {
12 | 
13 |     /**
14 |      * The unique ID of this edge.
15 |      */
16 |     public final int id;
17 |     /**
18 |      * The ID of the source node of this edge.
19 |      */
20 |     public final int source;
21 |     /**
22 |      * The ID of the target node of this edge.
23 |      */
24 |     public final int target;
25 |     /**
26 |      * The label of this edge.
27 |      */
28 |     public final String label;
29 | 
30 |     /**
31 |      * Construct a new edge.
32 |      *
33 |      * @param id the unique ID of the new edge
34 |      * @param source the ID of the source node of the new edge
35 |      * @param target the ID of the target node of the new edge
36 |      * @param label the label of the new edge
37 |      */
38 |     public Edge(int id, int source, int target, String label) {
39 |         this.id = id;
40 |         this.source = source;
41 |         this.target = target;
42 |         this.label = label;
43 |     }
44 | 
45 |     /**
46 |      * Compares this edge with the specified edge for order. The order used is
47 |      * the lexicographical order on the (target, source) pairs.
48 |      *
49 |      * @param otherEdge the edge to be compared to this edge
50 |      * @return a negative integer, zero, or a positive integer as this edge is
51 |      * less than, equal to, or greater than the specified edge
52 |      */
53 |     @Override
54 |     public int compareTo(Edge otherEdge) {
55 |         if (this.target == otherEdge.target) {
56 |             return this.source - otherEdge.source;
57 |         } else {
58 |             return this.target - otherEdge.target;
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Graph.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.graph;
  5 | 
  6 | import java.util.ArrayList;
  7 | import java.util.LinkedList;
  8 | import java.util.List;
  9 | 
 10 | /**
 11 |  * A semantic dependency graph.
 12 |  *
 13 |  * @author Marco Kuhlmann
 14 |  */
 15 | public class Graph {
 16 | 
 17 | 	/**
 18 | 	 * The list of nodes of this graph.
 19 | 	 */
 20 | 	private final List<Node> nodes;
 21 | 	/**
 22 | 	 * The list of edges of this graph.
 23 | 	 */
 24 | 	private final List<Edge> edges;
 25 | 	/**
 26 | 	 * The unique ID of this graph.
 27 | 	 */
 28 | 	public final String id;
 29 | 
 30 | 	/**
 31 | 	 * Construct an empty graph.
 32 | 	 *
 33 | 	 * @param id the unique ID of the new graph
 34 | 	 */
 35 | 	public Graph(String id) {
 36 | 		this.id = id;
 37 | 		this.nodes = new ArrayList<Node>();
 38 | 		this.edges = new ArrayList<Edge>();
 39 | 	}
 40 | 
 41 | 	/**
 42 | 	 * Adds a new node to this graph.
 43 | 	 *
 44 | 	 * @param form the word form to be associated with the new node
 45 | 	 * @param lemma the lemma to be associated with the new node
 46 | 	 * @param pos the part-of-speech tag to be associated with the new node
 47 | 	 * @param isTop a flag indicating whether the new node is a TOP node
 48 | 	 * @param isPred a flag indicating whether the new node represents a
 49 | 	 * predicate
 50 | 	 * @param sense the sense or frame to be associated with the new node
 51 | 	 * @return the newly added node
 52 | 	 */
 53 | 	public Node addNode(String form, String lemma, String pos, boolean isTop, boolean isPred, String sense) {
 54 | 		Node node = new Node(nodes.size(), form, lemma, pos, isTop, isPred, sense);
 55 | 		nodes.add(node);
 56 | 		return node;
 57 | 	}
 58 | 
 59 | 	/**
 60 | 	 * Adds a new edge to this graph.
 61 | 	 *
 62 | 	 * @param source the ID of the source node of the new edge
 63 | 	 * @param target the ID of the target node of the new edge
 64 | 	 * @param label the label of the new edge
 65 | 	 * @return the newly added edge
 66 | 	 */
 67 | 	public Edge addEdge(int source, int target, String label) {
 68 | 		assert 0 <= source && source < nodes.size();
 69 | 		assert 0 <= target && target < nodes.size();
 70 | 		Edge edge = new Edge(edges.size(), source, target, label);
 71 | 		edges.add(edge);
 72 | 		nodes.get(source).addOutgoingEdge(edge);
 73 | 		nodes.get(target).addIncomingEdge(edge);
 74 | 		return edge;
 75 | 	}
 76 | 
 77 | 	/**
 78 | 	 * Returns the number of nodes of this graph.
 79 | 	 *
 80 | 	 * @return the number of nodes of this graph
 81 | 	 */
 82 | 	public int getNNodes() {
 83 | 		return nodes.size();
 84 | 	}
 85 | 
 86 | 	/**
 87 | 	 * Returns the nodes of this graph. This returns a list whose elements are
 88 | 	 * sorted in increasing order of their IDs.
 89 | 	 *
 90 | 	 * @return the nodes of this graph
 91 | 	 */
 92 | 	public List<Node> getNodes() {
 93 | 		return nodes;
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * Returns the node of this graph with the specified ID.
 98 | 	 *
 99 | 	 * @param node the ID of the node to return
100 | 	 * @return the node with the specified ID
101 | 	 */
102 | 	public Node getNode(int node) {
103 | 		assert 0 <= node && node < nodes.size();
104 | 		return nodes.get(node);
105 | 	}
106 | 
107 | 	/**
108 | 	 * Returns the number of edges of this graph.
109 | 	 *
110 | 	 * @return the number of edges of this graph
111 | 	 */
112 | 	public int getNEdges() {
113 | 		return edges.size();
114 | 	}
115 | 
116 | 	/**
117 | 	 * Returns the edges of this graph. This returns a list whose elements are
118 | 	 * sorted in increasing order of their IDs.
119 | 	 *
120 | 	 * @return the edges of this graph
121 | 	 */
122 | 	public List<Edge> getEdges() {
123 | 		return edges;
124 | 	}
125 | 
126 | 	/**
127 | 	 * Returns the edge of this graph with the specified ID.
128 | 	 *
129 | 	 * @param edge the ID of the edge of return
130 | 	 * @return the edge with the specified ID
131 | 	 */
132 | 	public Edge getEdge(int edge) {
133 | 		assert 0 <= edge && edge < edges.size();
134 | 		return edges.get(edge);
135 | 	}
136 | 
137 | 	/**
138 | 	 * Returns the top nodes of this graph. This returns a list whose elements
139 | 	 * are sorted in increasing order of their IDs.
140 | 	 *
141 | 	 * @return the top nodes of this graph
142 | 	 */
143 | 	public List<Node> getTops() {
144 | 		List<Node> roots = new LinkedList<Node>();
145 | 		for (Node node : nodes) {
146 | 			if (node.isTop) {
147 | 				roots.add(node);
148 | 			}
149 | 		}
150 | 		return roots;
151 | 	}
152 | 
153 | 	/**
154 | 	 * Returns the predicates of this graph. This returns a list whose elements
155 | 	 * are sorted in increasing order of their IDs.
156 | 	 *
157 | 	 * @return the predicates of this graph
158 | 	 */
159 | 	public List<Node> getPreds() {
160 | 		List<Node> preds = new LinkedList<Node>();
161 | 		for (Node node : nodes) {
162 | 			if (node.isPred) {
163 | 				preds.add(node);
164 | 			}
165 | 		}
166 | 		return preds;
167 | 	}
168 | }
169 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/InspectedGraph.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.graph;
  5 | 
  6 | /**
  7 |  * Inspect graph-theoretic properties.
  8 |  *
  9 |  * @author Marco Kuhlmann
 10 |  */
 11 | public class InspectedGraph {
 12 | 
 13 | 	/**
 14 | 	 * The analyzed graph.
 15 | 	 */
 16 | 	private final Graph graph;
 17 | 
 18 | 	/**
 19 | 	 * The number of non-wall node of this graph.
 20 | 	 */
 21 | 	private final int nNonWallNodes;
 22 | 
 23 | 	/**
 24 | 	 * DFS of the graph.
 25 | 	 */
 26 | 	private final DFS directedDFS;
 27 | 
 28 | 	/**
 29 | 	 * DFS of the undirected graph.
 30 | 	 */
 31 | 	private final DFS undirectedDFS;
 32 | 
 33 | 	/**
 34 | 	 * Flags indicating whether a node is a singleton.
 35 | 	 */
 36 | 	private final boolean[] isSingleton;
 37 | 
 38 | 	/**
 39 | 	 * The number of singleton nodes in this graph.
 40 | 	 */
 41 | 	private final int nSingletons;
 42 | 
 43 | 	/**
 44 | 	 * Construct a new inspector for the specified graph.
 45 | 	 *
 46 | 	 * @param graph the graph to be inspected
 47 | 	 */
 48 | 	public InspectedGraph(Graph graph) {
 49 | 		this.graph = graph;
 50 | 
 51 | 		int nNodes = graph.getNNodes();
 52 | 		this.nNonWallNodes = nNodes - 1;
 53 | 		this.isSingleton = new boolean[nNodes];
 54 | 		this.nSingletons = computeSingletons();
 55 | 
 56 | 		this.directedDFS = new DFS(graph);
 57 | 		this.undirectedDFS = new DFS(graph, true);
 58 | 	}
 59 | 
 60 | 	/**
 61 | 	 * Returns the number of non-wall nodes of the inspected graph.
 62 | 	 *
 63 | 	 * @return the number of non-wall nodes of the inspected graph
 64 | 	 */
 65 | 	public int getNNonWallNodes() {
 66 | 		return nNonWallNodes;
 67 | 	}
 68 | 
 69 | 	/**
 70 | 	 * Returns the number of weakly connected components of the inspected graph.
 71 | 	 *
 72 | 	 * @return The number of weakly connected components of the inspected graph
 73 | 	 */
 74 | 	public int getNComponents() {
 75 | 		return undirectedDFS.getNRuns();
 76 | 	}
 77 | 
 78 | 	/**
 79 | 	 * Tests whether the inspected graph contains a cycle.
 80 | 	 *
 81 | 	 * @return {@code true} if and only if the inspected graph contains a cycle
 82 | 	 */
 83 | 	public boolean isCyclic() {
 84 | 		return directedDFS.isCyclic();
 85 | 	}
 86 | 
 87 | 	/**
 88 | 	 * Computes flags indicating whether a node is a singleton.
 89 | 	 */
 90 | 	private int computeSingletons() {
 91 | 		int n = 0;
 92 | 		for (Node node : graph.getNodes()) {
 93 | 			if (node.id != 0 && !node.hasIncomingEdges() && !node.hasOutgoingEdges() && !node.isTop) {
 94 | 				isSingleton[node.id] = true;
 95 | 				n++;
 96 | 			}
 97 | 		}
 98 | 		return n;
 99 | 	}
100 | 
101 | 	/**
102 | 	 * Tests whether the specified node is a singleton.
103 | 	 *
104 | 	 * @param id a node id
105 | 	 * @return {@code true} if the specified node is a singleton
106 | 	 */
107 | 	public boolean isSingleton(int id) {
108 | 		return isSingleton[id];
109 | 	}
110 | 
111 | 	/**
112 | 	 * Returns the number of singleton nodes of this graph. A node is a
113 | 	 * singleton if it has no neighbors and is not a top node.
114 | 	 *
115 | 	 * @return the number of singleton nodes of this graph
116 | 	 */
117 | 	public int getNSingletons() {
118 | 		return nSingletons;
119 | 	}
120 | 
121 | 	/**
122 | 	 * Computes the maximal indegree of the nodes in the inspected graph.
123 | 	 *
124 | 	 * @return the maximal indegree of the nodes in the inspected graph
125 | 	 */
126 | 	public int getMaximalIndegree() {
127 | 		int max = 0;
128 | 		for (Node node : graph.getNodes()) {
129 | 			max = Math.max(max, node.getNIncomingEdges());
130 | 		}
131 | 		return max;
132 | 	}
133 | 
134 | 	/**
135 | 	 * Computes the maximal outdegree of the nodes in the inspected graph.
136 | 	 *
137 | 	 * @return the maximal outdegree of the nodes in the inspected graph
138 | 	 */
139 | 	public int getMaximalOutdegree() {
140 | 		int max = 0;
141 | 		for (Node node : graph.getNodes()) {
142 | 			max = Math.max(max, node.getNOutgoingEdges());
143 | 		}
144 | 		return max;
145 | 	}
146 | 
147 | 	/**
148 | 	 * Returns the number of root nodes in the inspected graph. A
149 | 	 * <em>root node</em> is a node without incoming edges. The wall node is not
150 | 	 * considered to be a root node.
151 | 	 *
152 | 	 * @return the number of root nodes in the inspected graph
153 | 	 */
154 | 	public int getNRootNodes() {
155 | 		int nRootNodes = 0;
156 | 		for (Node node : graph.getNodes()) {
157 | 			nRootNodes += node.hasIncomingEdges() ? 0 : 1;
158 | 		}
159 | 		return nRootNodes - 1; // the wall node
160 | 	}
161 | 
162 | 	/**
163 | 	 * Returns the number of leaf nodes in the inspected graph. A
164 | 	 * <em>leaf node</em> is a node without outgoing edges. The wall node is not
165 | 	 * considered to be a leaf node.
166 | 	 *
167 | 	 * @return the number of leaf nodes in the inspected graph
168 | 	 */
169 | 	public int getNLeafNodes() {
170 | 		int nLeafNodes = 0;
171 | 		for (Node node : graph.getNodes()) {
172 | 			nLeafNodes += node.hasOutgoingEdges() ? 0 : 1;
173 | 		}
174 | 		return nLeafNodes - 1; // the wall node
175 | 	}
176 | 
177 | 	/**
178 | 	 * Tests whether the inspected graph is a forest. A forest is an acyclic
179 | 	 * graph in which every node has at most one incoming edge.
180 | 	 *
181 | 	 * @return {@code true} if and only if the inspected graph is a forest
182 | 	 */
183 | 	public boolean isForest() {
184 | 		return !isCyclic() && getMaximalIndegree() <= 1;
185 | 	}
186 | 
187 | 	/**
188 | 	 * Tests whether the inspected graph is a tree. A tree is a forest with
189 | 	 * exactly one root node.
190 | 	 *
191 | 	 * @return {@code true} if and only if the inspected graph is a tree
192 | 	 */
193 | 	public boolean isTree() {
194 | 		return isForest() && getNRootNodes() - getNSingletons() == 1;
195 | 	}
196 | 
197 | 	/**
198 | 	 * Tests whether the inspected graph is noncrossing. A graph is noncrossing
199 | 	 * if there are no overlapping edges.
200 | 	 *
201 | 	 * @return {@code true} if and only if the inspected graph is noncrossing
202 | 	 */
203 | 	public boolean isNoncrossing() {
204 | 		for (Edge edge1 : graph.getEdges()) {
205 | 			int min1 = Math.min(edge1.source, edge1.target);
206 | 			int max1 = Math.max(edge1.source, edge1.target);
207 | 			for (Edge edge2 : graph.getEdges()) {
208 | 				int min2 = Math.min(edge2.source, edge2.target);
209 | 				int max2 = Math.max(edge2.source, edge2.target);
210 | 				if (overlap(min1, max1, min2, max2)) {
211 | 					return false;
212 | 				}
213 | 			}
214 | 		}
215 | 		return true;
216 | 	}
217 | 
218 | 	/**
219 | 	 * Tests whether the specified edges overlap (cross).
220 | 	 *
221 | 	 * @param min1 the position of the left node of the first edge
222 | 	 * @param max1 the position of the right node of the first edge
223 | 	 * @param min2 the position of the left node of the second edge
224 | 	 * @param max2 the position of the right node of the second edge
225 | 	 * @return {@code true} if and only if the specified edges overlap
226 | 	 */
227 | 	private static boolean overlap(int min1, int max1, int min2, int max2) {
228 | 		return min1 < min2 && min2 < max1 && max1 < max2 || min2 < min1 && min1 < max2 && max2 < max1;
229 | 	}
230 | 
231 | 	/**
232 | 	 * Tests whether the inspected graph is projective. A graph is projective if
233 | 	 * it is noncrossing and there are no covered roots. In the context of
234 | 	 * semantic dependency graphs, a <em>root</em> is defined as a non-singleton
235 | 	 * node without incoming edges.
236 | 	 *
237 | 	 * @return {@code true} if and only if the inspected graph is projective
238 | 	 */
239 | 	public boolean isProjective() {
240 | 		if (!isNoncrossing()) {
241 | 			return false;
242 | 		} else {
243 | 			for (Edge edge : graph.getEdges()) {
244 | 				int min = Math.min(edge.source, edge.target);
245 | 				int max = Math.max(edge.source, edge.target);
246 | 				for (int i = min + 1; i < max; i++) {
247 | 					Node node = graph.getNode(i);
248 | 					if (!isSingleton(i) && !node.hasIncomingEdges()) {
249 | 						return false;
250 | 					}
251 | 				}
252 | 			}
253 | 			return true;
254 | 		}
255 | 	}
256 | }
257 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Node.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.graph;
  5 | 
  6 | import java.util.LinkedList;
  7 | import java.util.List;
  8 | 
  9 | /**
 10 |  * A node in a semantic dependency graph.
 11 |  *
 12 |  * @author Marco Kuhlmann
 13 |  */
 14 | public class Node {
 15 | 
 16 | 	/**
 17 | 	 * The unique ID of this node.
 18 | 	 */
 19 | 	public final int id;
 20 | 	/**
 21 | 	 * The list of incoming edges of this node.
 22 | 	 */
 23 | 	public final List<Edge> incomingEdges;
 24 | 	/**
 25 | 	 * The list of outgoing edges of this node.
 26 | 	 */
 27 | 	public final List<Edge> outgoingEdges;
 28 | 	/**
 29 | 	 * The word form associated with this node.
 30 | 	 */
 31 | 	public final String form;
 32 | 	/**
 33 | 	 * The lemma associated with this node.
 34 | 	 */
 35 | 	public final String lemma;
 36 | 	/**
 37 | 	 * The part-of-speech tag associated with this node.
 38 | 	 */
 39 | 	public final String pos;
 40 | 	/**
 41 | 	 * Whether this node is a top node.
 42 | 	 */
 43 | 	public final boolean isTop;
 44 | 	/**
 45 | 	 * Whether this node is a predicate.
 46 | 	 */
 47 | 	public final boolean isPred;
 48 | 	/**
 49 | 	 * The sense or frame of this predicate.
 50 | 	 */
 51 | 	public final String sense;
 52 | 
 53 | 	/**
 54 | 	 * Construct a new node.
 55 | 	 *
 56 | 	 * @param id the unique ID of the new node
 57 | 	 * @param form the word form to be associated with the new node
 58 | 	 * @param lemma the lemma to be associated with the new node
 59 | 	 * @param pos the part-of-speech tag to be associated with the new node
 60 | 	 * @param isTop a flag indicating whether the new node is a top node
 61 | 	 * @param isPred a flag indicating whether the new node is a predicate
 62 | 	 * @param sense the sense or frame to be associated with the new node
 63 | 	 */
 64 | 	public Node(int id, String form, String lemma, String pos, boolean isTop, boolean isPred, String sense) {
 65 | 		this.id = id;
 66 | 		this.incomingEdges = new LinkedList<Edge>();
 67 | 		this.outgoingEdges = new LinkedList<Edge>();
 68 | 		this.form = form;
 69 | 		this.lemma = lemma;
 70 | 		this.pos = pos;
 71 | 		this.isTop = isTop;
 72 | 		this.isPred = isPred;
 73 | 		this.sense = sense;
 74 | 	}
 75 | 
 76 | 	/**
 77 | 	 * Adds the specified edge as an incoming edge of this node.
 78 | 	 *
 79 | 	 * @param edge the edge to be added as an incoming edge
 80 | 	 * @return the newly added edge
 81 | 	 */
 82 | 	public Edge addIncomingEdge(Edge edge) {
 83 | 		incomingEdges.add(edge);
 84 | 		return edge;
 85 | 	}
 86 | 
 87 | 	/**
 88 | 	 * Tests whether this node has any incoming edges.
 89 | 	 *
 90 | 	 * @return {@code true} if this node has incoming edges; {@code false}
 91 | 	 * otherwise
 92 | 	 */
 93 | 	public boolean hasIncomingEdges() {
 94 | 		return !incomingEdges.isEmpty();
 95 | 	}
 96 | 
 97 | 	/**
 98 | 	 * Returns the number of incoming edges of this node.
 99 | 	 *
100 | 	 * @return the number of incoming edges of this node
101 | 	 */
102 | 	public int getNIncomingEdges() {
103 | 		return incomingEdges.size();
104 | 	}
105 | 
106 | 	/**
107 | 	 * Returns the incoming edges of this node.
108 | 	 *
109 | 	 * @return the incoming edges of this node
110 | 	 */
111 | 	public List<Edge> getIncomingEdges() {
112 | 		return incomingEdges;
113 | 	}
114 | 
115 | 	/**
116 | 	 * Adds the specified edge as an outgoing edge of this node.
117 | 	 *
118 | 	 * @param edge the edge to be added as an outgoing edge
119 | 	 * @return the newly added edge
120 | 	 */
121 | 	public Edge addOutgoingEdge(Edge edge) {
122 | 		outgoingEdges.add(edge);
123 | 		return edge;
124 | 	}
125 | 
126 | 	/**
127 | 	 * Tests whether this node has any outgoing edges.
128 | 	 *
129 | 	 * @return {@code true} if this node has any outgoing edges; {@code false}
130 | 	 * otherwise
131 | 	 */
132 | 	public boolean hasOutgoingEdges() {
133 | 		return !outgoingEdges.isEmpty();
134 | 	}
135 | 
136 | 	/**
137 | 	 * Returns the number of outgoing edges of this node.
138 | 	 *
139 | 	 * @return the number of outgoing edges of this node
140 | 	 */
141 | 	public int getNOutgoingEdges() {
142 | 		return outgoingEdges.size();
143 | 	}
144 | 
145 | 	/**
146 | 	 * Returns the outgoing edges of this node.
147 | 	 *
148 | 	 * @return the outgoing edges of this node.
149 | 	 */
150 | 	public List<Edge> getOutgoingEdges() {
151 | 		return outgoingEdges;
152 | 	}
153 | }
154 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/Constants.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.io;
 5 | 
 6 | /**
 7 |  * Constants related to CoNLL-type data formats.
 8 |  *
 9 |  * @author Marco Kuhlmann
10 |  */
11 | public class Constants {
12 | 
13 | 	/**
14 | 	 * The string that is used to separate data columns.
15 | 	 */
16 | 	public static final String COLUMN_SEPARATOR = "\t";
17 | 	/**
18 | 	 * The string that represents undefined values.
19 | 	 */
20 | 	public static final String UNDEFINED = "_";
21 | 	/**
22 | 	 * The word form associated with the wall token.
23 | 	 */
24 | 	public static final String WALL_FORM = "$$_FORM";
25 | 	/**
26 | 	 * The lemma associated with the wall token.
27 | 	 */
28 | 	public static final String WALL_LEMMA = "$$_LEMMA";
29 | 	/**
30 | 	 * The part-of-speech tag associated with the wall token.
31 | 	 */
32 | 	public static final String WALL_POS = "$$_POS";
33 | 	/**
34 | 	 * The sense or frame associated with the wall token.
35 | 	 */
36 | 	public static final String WALL_SENSE = "$$_SENSE";
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.io;
 5 | 
 6 | import java.io.IOException;
 7 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 8 | 
 9 | /**
10 |  * Read semantic dependency graphs from a file.
11 |  *
12 |  * @author Marco Kuhlmann
13 |  */
14 | public interface GraphReader {
15 | 
16 | 	/**
17 | 	 * Reads a single graph.
18 | 	 *
19 | 	 * @return the graph read, or {@code null} if the end of the stream has been
20 | 	 * reached
21 | 	 * @throws IOException if an I/O error occurs
22 | 	 */
23 | 	abstract public Graph readGraph() throws IOException;
24 | 
25 | 	/**
26 | 	 * Closes the stream and releases any system resources associated with it.
27 | 	 *
28 | 	 * @throws IOException if an I/O error occurs
29 | 	 */
30 | 	abstract public void close() throws IOException;
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader2014.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.io;
  5 | 
  6 | import java.io.File;
  7 | import java.io.FileNotFoundException;
  8 | import java.io.IOException;
  9 | import java.io.Reader;
 10 | import java.util.ArrayList;
 11 | import java.util.List;
 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 14 | 
 15 | /**
 16 |  * Read semantic dependency graphs in the SDP 2014 format. The format is
 17 |  * specified
 18 |  * <a href="http://alt.qcri.org/semeval2014/task8/index.php?id=dependency-formats">here</a>.
 19 |  *
 20 |  * @author Marco Kuhlmann
 21 |  */
 22 | public class GraphReader2014 extends ParagraphReader implements GraphReader {
 23 | 
 24 | 	/**
 25 | 	 * Create a graph reader, using the default input-buffer size.
 26 | 	 *
 27 | 	 * @param reader a Reader object to provide the underlying stream
 28 | 	 */
 29 | 	public GraphReader2014(Reader reader) {
 30 | 		super(reader);
 31 | 	}
 32 | 
 33 | 	/**
 34 | 	 * Create a graph reader that reads from the specified file. The file will
 35 | 	 * be read using the default input-buffer size.
 36 | 	 *
 37 | 	 * @param file the file to read from
 38 | 	 * @throws FileNotFoundException if the specified file does not exist, is a
 39 | 	 * directory rather than a regular file, or for some other reason cannot be
 40 | 	 * opened for reading
 41 | 	 */
 42 | 	public GraphReader2014(File file) throws FileNotFoundException {
 43 | 		super(file);
 44 | 	}
 45 | 
 46 | 	/**
 47 | 	 * Create a graph reader that reads from the specified file. The file will
 48 | 	 * be read using the default input-buffer size.
 49 | 	 *
 50 | 	 * @param fileName the name of the file to read from
 51 | 	 * @throws FileNotFoundException if the specified file does not exist, is a
 52 | 	 * directory rather than a regular file, or for some other reason cannot be
 53 | 	 * opened for reading
 54 | 	 */
 55 | 	public GraphReader2014(String fileName) throws FileNotFoundException {
 56 | 		super(fileName);
 57 | 	}
 58 | 
 59 | 	/**
 60 | 	 * Reads a single graph.
 61 | 	 *
 62 | 	 * @return the graph read, or {@code null} if the end of the stream has been
 63 | 	 * reached
 64 | 	 * @throws IOException if an I/O error occurs
 65 | 	 */
 66 | 	@Override
 67 | 	public Graph readGraph() throws IOException {
 68 | 		List<String> lines = super.readParagraph();
 69 | 		if (lines == null) {
 70 | 			return null;
 71 | 		} else {
 72 | 			// Every graph should contain at least one token.
 73 | 			assert lines.size() >= 2;
 74 | 			// Assert the format of the graph ID.
 75 | 			assert lines.get(0).matches("#2[0-9]{7}$");
 76 | 
 77 | 			Graph graph = new Graph(lines.get(0));
 78 | 
 79 | 			// Add the wall node.
 80 | 			graph.addNode(Constants.WALL_FORM, Constants.WALL_LEMMA, Constants.WALL_POS, false, false, Constants.WALL_SENSE);
 81 | 
 82 | 			// Add the token nodes to the graph and collect a list of predicates.
 83 | 			List<Integer> predicates = new ArrayList<Integer>();
 84 | 			for (String line : lines.subList(1, lines.size())) {
 85 | 				String[] tokens = line.split(Constants.COLUMN_SEPARATOR);
 86 | 
 87 | 				// There should be at least six columns: ID, FORM, LEMMA, POS, TOP, PRED
 88 | 				assert tokens.length >= 6;
 89 | 				// Enforce valid values for the TOP column.
 90 | 				assert tokens[4].equals("+") || tokens[4].equals("-");
 91 | 				// Enforce valid values for the PRED column.
 92 | 				assert tokens[5].equals("+") || tokens[5].equals("-");
 93 | 
 94 | 				String form = tokens[1];
 95 | 				String lemma = tokens[2];
 96 | 				String pos = tokens[3];
 97 | 				boolean isTop = tokens[4].equals("+");
 98 | 				boolean isPred = tokens[5].equals("+");
 99 | 
100 | 				Node node = graph.addNode(form, lemma, pos, isTop, isPred, Constants.UNDEFINED);
101 | 				// Make sure that the node ID equals the value of the ID column.
102 | 				assert node.id == Integer.parseInt(tokens[0]);
103 | 
104 | 				if (node.isPred) {
105 | 					predicates.add(node.id);
106 | 				}
107 | 			}
108 | 
109 | 			// Add the edges to the graph.
110 | 			int id = 1;
111 | 			for (String line : lines.subList(1, lines.size())) {
112 | 				String[] tokens = line.split(Constants.COLUMN_SEPARATOR);
113 | 
114 | 				// There should be exactly 6 + number of predicates many columns.
115 | 				assert tokens.length == 6 + predicates.size();
116 | 
117 | 				for (int i = 6; i < tokens.length; i++) {
118 | 					if (!tokens[i].equals(Constants.UNDEFINED)) {
119 | 						graph.addEdge(predicates.get(i - 6), id, tokens[i]);
120 | 					}
121 | 				}
122 | 				id++;
123 | 			}
124 | 
125 | 			// If a node is labeled as a PRED, it should have outgoing edges.
126 | 			for (Node node : graph.getNodes()) {
127 | 				assert !node.isPred || node.hasOutgoingEdges();
128 | 			}
129 | 
130 | 			return graph;
131 | 		}
132 | 	}
133 | }
134 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader2015.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.io;
  5 | 
  6 | import java.io.File;
  7 | import java.io.FileNotFoundException;
  8 | import java.io.IOException;
  9 | import java.io.Reader;
 10 | import java.io.UncheckedIOException;
 11 | import java.util.ArrayList;
 12 | import java.util.List;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 15 | 
 16 | /**
 17 |  * Read semantic dependency graphs in the SDP 2015 format. The format is
 18 |  * specified
 19 |  * <a href="http://alt.qcri.org/semeval2015/task18/index.php?id=data-and-tools">here</a>.
 20 |  *
 21 |  * @author Marco Kuhlmann
 22 |  */
 23 | public class GraphReader2015 extends ParagraphReader implements GraphReader {
 24 | 
 25 | 	/**
 26 | 	 * Create a graph reader, using the default input-buffer size.
 27 | 	 *
 28 | 	 * @param reader a Reader object to provide the underlying stream
 29 | 	 */
 30 | 	public GraphReader2015(Reader reader) {
 31 | 		super(reader);
 32 | 		readFirstLine();
 33 | 	}
 34 | 
 35 | 	/**
 36 | 	 * Create a graph reader that reads from the specified file. The file will
 37 | 	 * be read using the default input-buffer size.
 38 | 	 *
 39 | 	 * @param file the file to read from
 40 | 	 * @throws FileNotFoundException if the specified file does not exist, is a
 41 | 	 * directory rather than a regular file, or for some other reason cannot be
 42 | 	 * opened for reading
 43 | 	 */
 44 | 	public GraphReader2015(File file) throws FileNotFoundException {
 45 | 		super(file);
 46 | 		readFirstLine();
 47 | 	}
 48 | 
 49 | 	/**
 50 | 	 * Create a graph reader that reads from the specified file. The file will
 51 | 	 * be read using the default input-buffer size.
 52 | 	 *
 53 | 	 * @param fileName the name of the file to read from
 54 | 	 * @throws FileNotFoundException if the specified file does not exist, is a
 55 | 	 * directory rather than a regular file, or for some other reason cannot be
 56 | 	 * opened for reading
 57 | 	 */
 58 | 	public GraphReader2015(String fileName) throws FileNotFoundException {
 59 | 		super(fileName);
 60 | 		readFirstLine();
 61 | 	}
 62 | 
 63 | 	/**
 64 | 	 * Reads the format identifier line.
 65 | 	 */
 66 | 	private void readFirstLine() {
 67 | 		try {
 68 | 			String line = super.readLine();
 69 | 			assert line.equals("#SDP 2015");
 70 | 		} catch (IOException e) {
 71 | 			throw new UncheckedIOException(e);
 72 | 		}
 73 | 	}
 74 | 
 75 | 	/**
 76 | 	 * Reads a single graph.
 77 | 	 *
 78 | 	 * @return the graph read, or {@code null} if the end of the stream has been
 79 | 	 * reached
 80 | 	 * @throws IOException if an I/O error occurs
 81 | 	 */
 82 | 	@Override
 83 | 	public Graph readGraph() throws IOException {
 84 | 		List<String> lines = super.readParagraph();
 85 | 		if (lines == null) {
 86 | 			return null;
 87 | 		} else {
 88 | 			// Every graph should contain at least one token.
 89 | 			assert lines.size() >= 2;
 90 | 			// Assert the format of the graph ID.
 91 | 			assert lines.get(0).matches("#2[0-9]{7}$");
 92 | 
 93 | 			Graph graph = new Graph(lines.get(0));
 94 | 
 95 | 			// Add the wall node.
 96 | 			graph.addNode(Constants.WALL_FORM, Constants.WALL_LEMMA, Constants.WALL_POS, false, false, Constants.WALL_SENSE);
 97 | 
 98 | 			// Add the token nodes to the graph and collect a list of predicates.
 99 | 			List<Integer> predicates = new ArrayList<Integer>();
100 | 			for (String line : lines.subList(1, lines.size())) {
101 | 				String[] tokens = line.split(Constants.COLUMN_SEPARATOR);
102 | 
103 | 				// There should be at least seven columns: ID, FORM, LEMMA, POS, TOP, PRED, SENSE
104 | 				assert tokens.length >= 7;
105 | 				// Enforce valid values for the TOP column.
106 | 				assert tokens[4].equals("+") || tokens[4].equals("-");
107 | 				// Enforce valid values for the PRED column.
108 | 				assert tokens[5].equals("+") || tokens[5].equals("-");
109 | 
110 | 				String form = tokens[1];
111 | 				String lemma = tokens[2];
112 | 				String pos = tokens[3];
113 | 				boolean isTop = tokens[4].equals("+");
114 | 				boolean isPred = tokens[5].equals("+");
115 | 				String sense = tokens[6];
116 | 
117 | 				Node node = graph.addNode(form, lemma, pos, isTop, isPred, sense);
118 | 				// Make sure that the node ID equals the value of the ID column.
119 | 				assert node.id == Integer.parseInt(tokens[0]);
120 | 
121 | 				if (node.isPred) {
122 | 					predicates.add(node.id);
123 | 				}
124 | 			}
125 | 
126 | 			// Add the edges to the graph.
127 | 			int id = 1;
128 | 			for (String line : lines.subList(1, lines.size())) {
129 | 				String[] tokens = line.split(Constants.COLUMN_SEPARATOR);
130 | 
131 | 				// There should be exactly 7 + number of predicates many columns.
132 | 				assert tokens.length == 7 + predicates.size();
133 | 
134 | 				for (int i = 7; i < tokens.length; i++) {
135 | 					if (!tokens[i].equals(Constants.UNDEFINED)) {
136 | 						graph.addEdge(predicates.get(i - 7), id, tokens[i]);
137 | 					}
138 | 				}
139 | 				id++;
140 | 			}
141 | 
142 | 			// If a node is labeled as a PRED, it should have outgoing edges.
143 | 			for (Node node : graph.getNodes()) {
144 | 				assert !node.isPred || node.hasOutgoingEdges();
145 | 			}
146 | 
147 | 			return graph;
148 | 		}
149 | 	}
150 | }
151 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.io;
 5 | 
 6 | import java.io.IOException;
 7 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 8 | 
 9 | /**
10 |  * Write semantic dependency graphs to a file.
11 |  *
12 |  * @author Marco Kuhlmann
13 |  */
14 | public interface GraphWriter {
15 | 
16 | 	/**
17 | 	 * Writes a single graph.
18 | 	 *
19 | 	 * @param graph the graph to be written
20 | 	 * @throws IOException if an I/O error occurs
21 | 	 */
22 | 	abstract public void writeGraph(Graph graph) throws IOException;
23 | 
24 | 	/**
25 | 	 * Closes the stream and releases any system resources associated with it.
26 | 	 *
27 | 	 * @throws IOException if an I/O error occurs
28 | 	 */
29 | 	abstract public void close() throws IOException;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter2014.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.io;
  5 | 
  6 | import java.io.BufferedWriter;
  7 | import java.io.File;
  8 | import java.io.FileWriter;
  9 | import java.io.IOException;
 10 | import java.io.PrintWriter;
 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge;
 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 14 | 
 15 | /**
 16 |  * Write semantic dependency graphs in the SDP 2014 format. The format is
 17 |  * specified
 18 |  * <a href="http://alt.qcri.org/semeval2014/task8/index.php?id=dependency-formats">here</a>.
 19 |  *
 20 |  * @author Marco Kuhlmann
 21 |  */
 22 | public class GraphWriter2014 implements GraphWriter {
 23 | 
 24 | 	/**
 25 | 	 * The low-level writer.
 26 | 	 */
 27 | 	private final PrintWriter writer;
 28 | 
 29 | 	/**
 30 | 	 * Create a graph writer that writes to the specified PrintWriter.
 31 | 	 *
 32 | 	 * @param writer the PrintWriter to be written to
 33 | 	 */
 34 | 	public GraphWriter2014(PrintWriter writer) {
 35 | 		this.writer = writer;
 36 | 	}
 37 | 
 38 | 	/**
 39 | 	 * Create a graph writer that writes to the specified file.
 40 | 	 *
 41 | 	 * @param file the file to write to
 42 | 	 * @throws IOException if the specified file does not exist, is a directory
 43 | 	 * rather than a regular file, or for some other reason cannot be opened for
 44 | 	 * writing
 45 | 	 */
 46 | 	public GraphWriter2014(File file) throws IOException {
 47 | 		this(new PrintWriter(new BufferedWriter(new FileWriter(file))));
 48 | 	}
 49 | 
 50 | 	/**
 51 | 	 * Create a graph writer that writes to the specified file.
 52 | 	 *
 53 | 	 * @param fileName the name of the file to read from
 54 | 	 * @throws IOException if the specified file does not exist, is a directory
 55 | 	 * rather than a regular file, or for some other reason cannot be opened for
 56 | 	 * writing
 57 | 	 */
 58 | 	public GraphWriter2014(String fileName) throws IOException {
 59 | 		this(new File(fileName));
 60 | 	}
 61 | 
 62 | 	/**
 63 | 	 * Writes a single graph.
 64 | 	 *
 65 | 	 * @param graph the graph to be written
 66 | 	 * @throws IOException if an I/O error occurs
 67 | 	 */
 68 | 	@Override
 69 | 	public void writeGraph(Graph graph) throws IOException {
 70 | 		int nNodes = graph.getNNodes();
 71 | 
 72 | 		String[][] labels = new String[nNodes][nNodes];
 73 | 		for (Edge edge : graph.getEdges()) {
 74 | 			labels[edge.source][edge.target] = edge.label;
 75 | 		}
 76 | 
 77 | 		writer.println(graph.id);
 78 | 
 79 | 		for (Node node : graph.getNodes()) {
 80 | 			if (node.id > 0) {
 81 | 				StringBuilder sb = new StringBuilder();
 82 | 				// Field 1: ID
 83 | 				sb.append(Integer.toString(node.id));
 84 | 				sb.append(Constants.COLUMN_SEPARATOR);
 85 | 				// Field 2: FORM
 86 | 				sb.append(node.form);
 87 | 				sb.append(Constants.COLUMN_SEPARATOR);
 88 | 				// Field 3: LEMMA
 89 | 				sb.append(node.lemma);
 90 | 				sb.append(Constants.COLUMN_SEPARATOR);
 91 | 				// Field 4: POS
 92 | 				sb.append(node.pos);
 93 | 				sb.append(Constants.COLUMN_SEPARATOR);
 94 | 				// Field 5: TOP
 95 | 				sb.append(node.isTop ? "+" : "-");
 96 | 				sb.append(Constants.COLUMN_SEPARATOR);
 97 | 				// Field 6: PRED
 98 | 				sb.append(node.isPred ? "+" : "-");
 99 | 
100 | 				for (Node source : graph.getNodes().subList(1, nNodes)) {
101 | 					if (source.isPred) {
102 | 						sb.append(Constants.COLUMN_SEPARATOR);
103 | 						String label = labels[source.id][node.id];
104 | 						if (label == null) {
105 | 							sb.append(Constants.UNDEFINED);
106 | 						} else {
107 | 							sb.append(label);
108 | 						}
109 | 					}
110 | 				}
111 | 
112 | 				writer.println(sb.toString());
113 | 			}
114 | 		}
115 | 
116 | 		writer.println();
117 | 	}
118 | 
119 | 	/**
120 | 	 * Closes the stream and releases any system resources associated with it.
121 | 	 *
122 | 	 * @throws IOException if an I/O error occurs
123 | 	 */
124 | 	@Override
125 | 	public void close() throws IOException {
126 | 		writer.close();
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter2015.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.io;
  5 | 
  6 | import java.io.BufferedWriter;
  7 | import java.io.File;
  8 | import java.io.FileWriter;
  9 | import java.io.IOException;
 10 | import java.io.PrintWriter;
 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge;
 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 14 | 
 15 | /**
 16 |  * Write semantic dependency graphs in the SDP 2015 format. The format is
 17 |  * specified
 18 |  * <a href="http://alt.qcri.org/semeval2015/task18/index.php?id=data-and-tools">here</a>.
 19 |  *
 20 |  * @author Marco Kuhlmann
 21 |  */
 22 | public class GraphWriter2015 implements GraphWriter {
 23 | 
 24 | 	/**
 25 | 	 * The low-level writer.
 26 | 	 */
 27 | 	private final PrintWriter writer;
 28 | 
 29 | 	/**
 30 | 	 * Create a graph writer that writes to the specified PrintWriter.
 31 | 	 *
 32 | 	 * @param writer the PrintWriter to be written to
 33 | 	 */
 34 | 	public GraphWriter2015(PrintWriter writer) {
 35 | 		this.writer = writer;
 36 | 		writeFirstLine();
 37 | 	}
 38 | 
 39 | 	/**
 40 | 	 * Create a graph writer that writes to the specified file.
 41 | 	 *
 42 | 	 * @param file the file to write to
 43 | 	 * @throws IOException if the specified file does not exist, is a directory
 44 | 	 * rather than a regular file, or for some other reason cannot be opened for
 45 | 	 * writing
 46 | 	 */
 47 | 	public GraphWriter2015(File file) throws IOException {
 48 | 		this(new PrintWriter(new BufferedWriter(new FileWriter(file))));
 49 | 	}
 50 | 
 51 | 	/**
 52 | 	 * Create a graph writer that writes to the specified file.
 53 | 	 *
 54 | 	 * @param fileName the name of the file to read from
 55 | 	 * @throws IOException if the specified file does not exist, is a directory
 56 | 	 * rather than a regular file, or for some other reason cannot be opened for
 57 | 	 * writing
 58 | 	 */
 59 | 	public GraphWriter2015(String fileName) throws IOException {
 60 | 		this(new File(fileName));
 61 | 	}
 62 | 
 63 | 	/**
 64 | 	 * Writes the format identifier line.
 65 | 	 */
 66 | 	private void writeFirstLine() {
 67 | 		writer.println("#SDP 2015");
 68 | 	}
 69 | 
 70 | 	/**
 71 | 	 * Writes a single graph.
 72 | 	 *
 73 | 	 * @param graph the graph to be written
 74 | 	 * @throws IOException if an I/O error occurs
 75 | 	 */
 76 | 	@Override
 77 | 	public void writeGraph(Graph graph) throws IOException {
 78 | 		int nNodes = graph.getNNodes();
 79 | 
 80 | 		String[][] labels = new String[nNodes][nNodes];
 81 | 		for (Edge edge : graph.getEdges()) {
 82 | 			labels[edge.source][edge.target] = edge.label;
 83 | 		}
 84 | 
 85 | 		writer.println(graph.id);
 86 | 
 87 | 		for (Node node : graph.getNodes()) {
 88 | 			if (node.id > 0) {
 89 | 				StringBuilder sb = new StringBuilder();
 90 | 				// Field 1: ID
 91 | 				sb.append(Integer.toString(node.id));
 92 | 				sb.append(Constants.COLUMN_SEPARATOR);
 93 | 				// Field 2: FORM
 94 | 				sb.append(node.form);
 95 | 				sb.append(Constants.COLUMN_SEPARATOR);
 96 | 				// Field 3: LEMMA
 97 | 				sb.append(node.lemma);
 98 | 				sb.append(Constants.COLUMN_SEPARATOR);
 99 | 				// Field 4: POS
100 | 				sb.append(node.pos);
101 | 				sb.append(Constants.COLUMN_SEPARATOR);
102 | 				// Field 5: TOP
103 | 				sb.append(node.isTop ? "+" : "-");
104 | 				sb.append(Constants.COLUMN_SEPARATOR);
105 | 				// Field 6: PRED
106 | 				sb.append(node.isPred ? "+" : "-");
107 | 				sb.append(Constants.COLUMN_SEPARATOR);
108 | 				// Field 7: SENSE
109 | 				sb.append(node.sense);
110 | 
111 | 				for (Node source : graph.getNodes().subList(1, nNodes)) {
112 | 					if (source.isPred) {
113 | 						sb.append(Constants.COLUMN_SEPARATOR);
114 | 						String label = labels[source.id][node.id];
115 | 						if (label == null) {
116 | 							sb.append(Constants.UNDEFINED);
117 | 						} else {
118 | 							sb.append(label);
119 | 						}
120 | 					}
121 | 				}
122 | 
123 | 				writer.println(sb.toString());
124 | 			}
125 | 		}
126 | 
127 | 		writer.println();
128 | 	}
129 | 
130 | 	/**
131 | 	 * Closes the stream and releases any system resources associated with it.
132 | 	 *
133 | 	 * @throws IOException if an I/O error occurs
134 | 	 */
135 | 	public void close() throws IOException {
136 | 		writer.close();
137 | 	}
138 | }
139 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/io/ParagraphReader.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.io;
 5 | 
 6 | import java.io.File;
 7 | import java.io.FileNotFoundException;
 8 | import java.io.FileReader;
 9 | import java.io.IOException;
10 | import java.io.LineNumberReader;
11 | import java.io.Reader;
12 | import java.util.Collections;
13 | import java.util.LinkedList;
14 | import java.util.List;
15 | 
16 | /**
17 |  * Reads paragraphs separated by blank lines from a character-input stream.
18 |  *
19 |  * @author Marco Kuhlmann
20 |  */
21 | public class ParagraphReader extends LineNumberReader {
22 | 
23 |     /**
24 |      * Create a paragraph reader, using the default input-buffer size.
25 |      *
26 |      * @param reader a Reader object to provide the underlying stream
27 |      */
28 |     public ParagraphReader(Reader reader) {
29 |         super(reader);
30 |     }
31 | 
32 |     /**
33 |      * Create a paragraph reader, reading characters into a buffer of the given
34 |      * size.
35 |      *
36 |      * @param reader a Reader object to provide the underlying stream
37 |      * @param sz the size of the buffer
38 |      */
39 |     public ParagraphReader(Reader reader, int sz) {
40 |         super(reader, sz);
41 |     }
42 | 
43 |     /**
44 |      * Create a paragraph reader that reads from the specified file. The file
45 |      * will be read using the default input-buffer size.
46 |      *
47 |      * @param file the file to read from
48 |      * @throws FileNotFoundException if the specified file does not exist, is a
49 |      * directory rather than a regular file, or for some other reason cannot be
50 |      * opened for reading
51 |      */
52 |     public ParagraphReader(File file) throws FileNotFoundException {
53 |         super(new FileReader(file));
54 |     }
55 | 
56 |     /**
57 |      * Create a paragraph reader that reads from the specified file. The file
58 |      * will be read using the default input-buffer size.
59 |      *
60 |      * @param fileName the name of the file to read from
61 |      * @throws FileNotFoundException if the specified file does not exist, is a
62 |      * directory rather than a regular file, or for some other reason cannot be
63 |      * opened for reading
64 |      */
65 |     public ParagraphReader(String fileName) throws FileNotFoundException {
66 |         super(new FileReader(fileName));
67 |     }
68 | 
69 |     /**
70 |      * Reads a single paragraph. A paragraph is a list of lines terminated by a
71 |      * blank (empty) line, or the end of the stream.
72 |      *
73 |      * @return the lines of the paragraph read, or {@code null} if the end of
74 |      * the stream has been reached
75 |      * @throws IOException if an I/O error occurs
76 |      */
77 |     public List<String> readParagraph() throws IOException {
78 |         String line = super.readLine();
79 |         if (line == null) {
80 |             return null;
81 |         } else {
82 |             if (line.isEmpty()) {
83 |                 return Collections.<String>emptyList();
84 |             } else {
85 |                 List<String> lines = new LinkedList<String>();
86 |                 do {
87 |                     lines.add(line);
88 |                 } while ((line = super.readLine()) != null && !line.isEmpty());
89 |                 return lines;
90 |             }
91 |         }
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Analyzer.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.tools;
  5 | 
  6 | import java.io.InputStreamReader;
  7 | import java.text.NumberFormat;
  8 | import java.util.HashSet;
  9 | import java.util.Locale;
 10 | import java.util.Set;
 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge;
 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.InspectedGraph;
 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 15 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
 16 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
 17 | 
 18 | /**
 19 |  * Print statistics about a collection of graphs.
 20 |  *
 21 |  * @author Marco Kuhlmann
 22 |  */
 23 | public class Analyzer {
 24 | 
 25 |     // The number of graphs read.
 26 |     private int nGraphs;
 27 | 
 28 |     // The number of non-wall nodes seen.
 29 |     private int nNonWallNodes;
 30 | 
 31 |     // The number of edges seen.
 32 |     private int nEdges;
 33 | 
 34 |     // Set of labels seen.
 35 |     private final Set<String> labels = new HashSet<String>();
 36 | 
 37 |     // The number of singleton nodes.
 38 |     private int nSingletons;
 39 | 
 40 |     // Number of cyclic graphs.
 41 |     private int nCyclic;
 42 | 
 43 |     // Number of forests.
 44 |     private int nForests;
 45 | 
 46 |     // Number of trees.
 47 |     private int nTrees;
 48 | 
 49 |     // Number of graphs that are semi-connected.
 50 |     private int nFragmented;
 51 | 
 52 |     // Number of nodes that have more than one incoming edge.
 53 |     private int nReentrantNodes;
 54 | 
 55 |     // Number of topless graphs.
 56 |     private int nToplessGraphs;
 57 | 
 58 |     // Number of top nodes.
 59 |     private int nTopNodes;
 60 | 
 61 |     // Number of special nodes.
 62 |     private int nSpecialNodes;
 63 | 
 64 |     // Number of noncrossing graphs.
 65 |     private int nNoncrossingGraphs;
 66 | 
 67 |     // Number of projective graphs.
 68 |     private int nProjectiveGraphs;
 69 | 
 70 |     // Set of senses seen.
 71 |     private final Set<String> senses = new HashSet<String>();
 72 | 
 73 |     // Number of (non-singleton) nodes with senses.
 74 |     private int nScorablePredicates;
 75 | 
 76 |     /**
 77 |      * Updates the statistics with the specified graph.
 78 |      *
 79 |      * @param graph a semantic dependency graph
 80 |      */
 81 |     public void update(Graph graph) {
 82 |         InspectedGraph inspectedGraph = new InspectedGraph(graph);
 83 | 
 84 |         // number of graphs
 85 |         nGraphs++;
 86 | 
 87 |         // number of non-wall nodes
 88 |         nNonWallNodes += inspectedGraph.getNNonWallNodes();
 89 | 
 90 |         // number of edges
 91 |         nEdges += graph.getNEdges();
 92 | 
 93 |         // distinct labels
 94 |         for (Edge edge : graph.getEdges()) {
 95 |             labels.add(edge.label);
 96 |         }
 97 | 
 98 |         // number of singleton nodes
 99 |         nSingletons += inspectedGraph.getNSingletons();
100 | 
101 |         // number of cyclic graphs
102 |         nCyclic += inspectedGraph.isCyclic() ? 1 : 0;
103 | 
104 |         // number of forests
105 |         nForests += inspectedGraph.isForest() ? 1 : 0;
106 | 
107 |         // number of trees
108 |         nTrees += inspectedGraph.isTree() ? 1 : 0;
109 | 
110 |         // number of graphs that are fragmented
111 |         nFragmented += inspectedGraph.getNComponents() - 1 - inspectedGraph.getNSingletons() == 1 ? 0 : 1;
112 | 
113 |         // number of reentrant nodes
114 |         for (Node node : graph.getNodes()) {
115 |             nReentrantNodes += node.getNIncomingEdges() > 1 ? 1 : 0;
116 |         }
117 | 
118 |         // number of topless graphs
119 |         boolean isTopless = true;
120 |         for (Node node : graph.getNodes()) {
121 |             isTopless = isTopless && !node.isTop;
122 |         }
123 |         nToplessGraphs += isTopless ? 1 : 0;
124 | 
125 |         // number of top nodes
126 |         for (Node node : graph.getNodes()) {
127 |             nTopNodes += node.isTop ? 1 : 0;
128 |         }
129 | 
130 |         // number of special nodes
131 |         for (Node node : graph.getNodes()) {
132 |             nSpecialNodes += node.id != 0 && !inspectedGraph.isSingleton(node.id) && !node.hasIncomingEdges() && !node.isTop ? 1 : 0;
133 |         }
134 | 
135 |         // number of noncrossing graphs
136 |         nNoncrossingGraphs += inspectedGraph.isNoncrossing() ? 1 : 0;
137 | 
138 |         // number of projective graphs
139 |         nProjectiveGraphs += inspectedGraph.isProjective() ? 1 : 0;
140 | 
141 |         // number of senses
142 |         for (Node node : graph.getNodes()) {
143 |             if (node.id != 0 && !inspectedGraph.isSingleton(node.id) && node.isPred && node.pos.startsWith("V") && !node.sense.equals("_")) {
144 |                 senses.add(node.sense);
145 |                 nScorablePredicates += 1;
146 |             }
147 |         }
148 |     }
149 | 
150 |     /**
151 |      * Prints statistics about a set of graphs.
152 |      *
153 |      * @param args names of files from which to read graphs
154 |      * @throws Exception if an I/O exception occurs
155 |      */
156 |     public static void main(String[] args) throws Exception {
157 |         Analyzer analyzer = new Analyzer();
158 |         GraphReader reader = new GraphReader2015(new InputStreamReader(System.in));
159 |         Graph graph;
160 |         while ((graph = reader.readGraph()) != null) {
161 |             analyzer.update(graph);
162 |         }
163 |         reader.close();
164 |         System.err.format("number of labels:\t%d%n", analyzer.labels.size());
165 |         System.err.format("percentage of singletons:\t%s%n", percentage(analyzer.nSingletons, analyzer.nNonWallNodes));
166 |         System.err.format("edge density:\t%s%n", fraction(analyzer.nEdges, analyzer.nNonWallNodes - analyzer.nSingletons, 2));
167 |         System.err.format("percentage of graphs that are trees:\t%s%n", percentage(analyzer.nTrees, analyzer.nGraphs));
168 |         System.err.format("percentage of graphs that are projective:\t%s%n", percentage(analyzer.nProjectiveGraphs, analyzer.nGraphs));
169 |         System.err.format("percentage of graphs that are fragmented:\t%s%n", percentage(analyzer.nFragmented, analyzer.nGraphs));
170 |         System.err.format("percentage of nodes that have reentrancies:\t%s%n", percentage(analyzer.nReentrantNodes, analyzer.nNonWallNodes - analyzer.nSingletons));
171 |         System.err.format("percentage of graphs that are topless:\t%s%n", percentage(analyzer.nToplessGraphs, analyzer.nGraphs));
172 |         System.err.format("number of top nodes per graph:\t%s%n", fraction(analyzer.nTopNodes, analyzer.nGraphs));
173 |         System.err.format("percentage of nodes that are non-top roots:\t%s%n", percentage(analyzer.nSpecialNodes, analyzer.nNonWallNodes - analyzer.nSingletons));
174 |         System.err.format("number of senses:\t%d%n", analyzer.senses.size());
175 |         System.err.format("percentage of predicates with senses:\t%s%n", percentage(analyzer.nScorablePredicates, analyzer.nNonWallNodes - analyzer.nSingletons));
176 |     }
177 | 
178 |     public static String fraction(int a, int b, int digits) {
179 |         return String.format(String.format("%%.%df", digits), (double) a / (double) b);
180 |     }
181 | 
182 |     public static String fraction(int a, int b) {
183 |         return fraction(a, b, 4);
184 |     }
185 | 
186 |     public static String percentage(int enumerator, int denominator) {
187 |         return String.format("%.2f", (double) enumerator / (double) denominator * 100);
188 |     }
189 | }
190 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/BasicAnalyzer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.tools;
 5 | 
 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 7 | import se.liu.ida.nlp.sdp.toolkit.graph.InspectedGraph;
 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
10 | 
11 | /**
12 |  * Print some basic statistics about a collection of graphs.
13 |  *
14 |  * @author Marco Kuhlmann
15 |  */
16 | public class BasicAnalyzer {
17 | 
18 |     public static void main(String[] args) throws Exception {
19 |         for (String arg : args) {
20 |             GraphReader reader = new GraphReader2015(arg);
21 |             int nGraphs = 0;
22 |             int nTokens = 0;
23 |             int nCyclic = 0;
24 |             int nForests = 0;
25 |             int nTrees = 0;
26 |             int nProjective = 0;
27 |             int maxIndegree = 0;
28 |             int maxOutdegree = 0;
29 |             Graph graph;
30 |             while ((graph = reader.readGraph()) != null) {
31 |                 InspectedGraph analyzer = new InspectedGraph(graph);
32 |                 nCyclic += analyzer.isCyclic() ? 1 : 0;
33 |                 nForests += analyzer.isForest() ? 1 : 0;
34 |                 nTrees += analyzer.isTree() ? 1 : 0;
35 |                 nProjective += analyzer.isProjective() ? 1 : 0;
36 |                 maxIndegree = Math.max(maxIndegree, analyzer.getMaximalIndegree());
37 |                 maxOutdegree = Math.max(maxOutdegree, analyzer.getMaximalOutdegree());
38 |                 nGraphs++;
39 |                 nTokens += graph.getNNodes() - 1;
40 |             }
41 |             reader.close();
42 |             System.out.format("%s: %d graphs, %d tokens%n", arg, nGraphs, nTokens);
43 |             System.out.format("  cyclic = %d%n", nCyclic);
44 |             System.out.format("  forests = %d%n", nForests);
45 |             System.out.format("  trees = %d%n", nTrees);
46 |             System.out.format("  projective = %d%n", nProjective);
47 |             System.out.format("  max indegree = %d%n", maxIndegree);
48 |             System.out.format("  max outdegree = %d%n", maxOutdegree);
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/IOTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.tools;
 5 | 
 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 7 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter;
10 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter2015;
11 | 
12 | /**
13 |  * Test the graph I/O.
14 |  *
15 |  * @author Marco Kuhlmann
16 |  */
17 | public class IOTest {
18 | 
19 |     public static void main(String[] args) throws Exception {
20 |         for (String arg : args) {
21 |             GraphReader reader = new GraphReader2015(arg);
22 |             GraphWriter writer = new GraphWriter2015(arg + ".out");
23 |             int nGraphs = 0;
24 |             int nTokens = 0;
25 |             Graph graph;
26 |             while ((graph = reader.readGraph()) != null) {
27 |                 nGraphs++;
28 |                 nTokens += graph.getNNodes() - 1;
29 |                 writer.writeGraph(graph);
30 |             }
31 |             reader.close();
32 |             writer.close();
33 |             System.out.format("%s: %d graphs, %d tokens%n", arg, nGraphs, nTokens);
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/PairedBootstrap.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * See the file "LICENSE" for the full license governing this code.
  3 |  */
  4 | package se.liu.ida.nlp.sdp.toolkit.tools;
  5 | 
  6 | import java.io.FileNotFoundException;
  7 | import java.io.IOException;
  8 | import java.util.ArrayList;
  9 | import java.util.HashSet;
 10 | import java.util.List;
 11 | import java.util.Random;
 12 | import java.util.Set;
 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge;
 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 15 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
 16 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
 17 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
 18 | 
 19 | /**
 20 |  *
 21 |  * @author Marco Kuhlmann
 22 |  */
 23 | public class PairedBootstrap {
 24 | 
 25 |     private static final String VIRTUAL_LABEL = "-VIRTUAL-";
 26 |     private static final int B = 10000;
 27 |     private static final int SEED = 42;
 28 |     private static final Random R = new Random(SEED);
 29 | 
 30 |     public static void main(String[] args) throws Exception {
 31 | 	List<Graph> graphs0 = readGraphs(args[0]);
 32 | 	List<Graph> graphs1 = readGraphs(args[1]); // baseline
 33 | 	List<Graph> graphs2 = readGraphs(args[2]); // system
 34 | 
 35 | 	assert graphs0.size() == graphs1.size();
 36 | 	assert graphs0.size() == graphs2.size();
 37 | 
 38 | 	int n = graphs0.size();
 39 | 
 40 | 	List<Item> items = new ArrayList<Item>(n);
 41 | 	for (int i = 0; i < n; i++) {
 42 | 	    items.add(new Item(graphs0.get(i), graphs1.get(i), graphs2.get(i)));
 43 | 	}
 44 | 
 45 | 	double delta = getDelta(items);
 46 | 	if (delta == 0) {
 47 | 	    System.err.println("Baseline and system have the same performance!");
 48 | 	    System.exit(0);
 49 | 	}
 50 | 	if (delta < 0) {
 51 | 	    System.err.print("Baseline beats system!");
 52 | 	    System.exit(0);
 53 | 	}
 54 | 
 55 | 	System.err.print("System beats baseline by ");
 56 | 	System.err.format("%f LF.%n", Math.abs(delta));
 57 | 
 58 | 	System.err.println("Q: Could it be that the victory was just a random fluke?");
 59 | 
 60 | 	double p = getP(items);
 61 | 
 62 | 	if (p < 0.05) {
 63 | 	    System.err.print("A: No; the difference is most probably real");
 64 | 	} else {
 65 | 	    System.err.print("A: Yes; this is probable");
 66 | 	}
 67 | 	System.err.format(" (p = %f)%n", p);
 68 | 	System.exit(0);
 69 |     }
 70 | 
 71 |     private static List<Graph> readGraphs(String fileName) throws FileNotFoundException, IOException {
 72 | 	List<Graph> graphs = new ArrayList<Graph>();
 73 | 	GraphReader reader = new GraphReader2015(fileName);
 74 | 	Graph graph;
 75 | 	while ((graph = reader.readGraph()) != null) {
 76 | 	    graphs.add(graph);
 77 | 	}
 78 | 	return graphs;
 79 |     }
 80 | 
 81 |     private static List<Item> getSample(List<Item> base) {
 82 | 	int n = base.size();
 83 | 	List<Item> sample = new ArrayList<Item>(n);
 84 | 	for (int i = 0; i < n; i++) {
 85 | 	    int j = R.nextInt(n);
 86 | 	    sample.add(base.get(j));
 87 | 	}
 88 | 	return sample;
 89 |     }
 90 | 
 91 |     private static double getP(List<Item> base) {
 92 | 	double delta0 = getDelta(base);
 93 | 	int s = 0;
 94 | 	double p = 0.0;
 95 | 	for (int i = 0; i < B; i++) {
 96 | 	    s += (getDelta(getSample(base)) > 2 * delta0) ? 1 : 0;
 97 | 	    p = (double) s / (double) B;
 98 | 	    System.err.format("\rComputing ... (no. of samples = %d, p = %f)", i, p);
 99 | 	}
100 | 	System.err.println();
101 | 	return p;
102 |     }
103 | 
104 |     private static double getDelta(List<Item> sample) {
105 | 	Set<MyEdge> edges0 = new HashSet<MyEdge>();
106 | 	Set<MyEdge> edges1 = new HashSet<MyEdge>();
107 | 	Set<MyEdge> edges2 = new HashSet<MyEdge>();
108 | 
109 | 	int graphId = 0;
110 | 	for (Item item : sample) {
111 | 	    addEdges(edges0, item.graph0, graphId);
112 | 	    addEdges(edges1, item.graph1, graphId);
113 | 	    addEdges(edges2, item.graph2, graphId);
114 | 	    graphId++;
115 | 	}
116 | 
117 | 	int nEdgesIn0 = edges0.size();
118 | 	int nEdgesIn1 = edges1.size();
119 | 	int nEdgesIn2 = edges2.size();
120 | 	int nEdgesCorrect1 = getIntersection(edges0, edges1).size();
121 | 	int nEdgesCorrect2 = getIntersection(edges0, edges2).size();
122 | 
123 | 	double precision1 = (double) nEdgesCorrect1 / (double) nEdgesIn1;
124 | 	double recall1 = (double) nEdgesCorrect1 / (double) nEdgesIn0;
125 | 	double fOne1 = 2.0 * precision1 * recall1 / (precision1 + recall1);
126 | 
127 | 	double precision2 = (double) nEdgesCorrect2 / (double) nEdgesIn2;
128 | 	double recall2 = (double) nEdgesCorrect2 / (double) nEdgesIn0;
129 | 	double fOne2 = 2.0 * precision2 * recall2 / (precision2 + recall2);
130 | 
131 | 	return fOne2 - fOne1;
132 |     }
133 | 
134 |     private static void addEdges(Set<MyEdge> edges, Graph graph, int graphId) {
135 | 	for (Node node : graph.getNodes()) {
136 | 	    if (node.isTop) {
137 | 		edges.add(new MyEdge(graphId, 0, node.id, VIRTUAL_LABEL));
138 | 	    }
139 | 	}
140 | 	for (Edge edge : graph.getEdges()) {
141 | 	    edges.add(new MyEdge(graphId, edge.source, edge.target, edge.label));
142 | 	}
143 |     }
144 | 
145 |     private static Set<MyEdge> getIntersection(Set<MyEdge> edges1, Set<MyEdge> edges2) {
146 | 	Set<MyEdge> intersection = new HashSet<MyEdge>(edges1);
147 | 	intersection.retainAll(edges2);
148 | 	return intersection;
149 |     }
150 | 
151 |     private static class Item {
152 | 
153 | 	public final Graph graph0;
154 | 	public final Graph graph1;
155 | 	public final Graph graph2;
156 | 
157 | 	public Item(Graph graph0, Graph graph1, Graph graph2) {
158 | 	    this.graph0 = graph0;
159 | 	    this.graph1 = graph1;
160 | 	    this.graph2 = graph2;
161 | 	}
162 |     }
163 | 
164 |     private static class MyEdge {
165 | 
166 | 	final int graphId;
167 | 	final int src;
168 | 	final int tgt;
169 | 	final String label;
170 | 
171 | 	public MyEdge(int graphId, int src, int tgt, String label) {
172 | 	    this.graphId = graphId;
173 | 	    this.src = src;
174 | 	    this.tgt = tgt;
175 | 	    this.label = label;
176 | 	}
177 | 
178 | 	@Override
179 | 	public int hashCode() {
180 | 	    int hash = 3;
181 | 	    hash = 53 * hash + this.graphId;
182 | 	    hash = 53 * hash + this.src;
183 | 	    hash = 53 * hash + this.tgt;
184 | 	    hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0);
185 | 	    return hash;
186 | 	}
187 | 
188 | 	@Override
189 | 	public boolean equals(Object obj) {
190 | 	    if (obj == null) {
191 | 		return false;
192 | 	    }
193 | 	    if (getClass() != obj.getClass()) {
194 | 		return false;
195 | 	    }
196 | 	    final MyEdge other = (MyEdge) obj;
197 | 	    if (this.graphId != other.graphId) {
198 | 		return false;
199 | 	    }
200 | 	    if (this.src != other.src) {
201 | 		return false;
202 | 	    }
203 | 	    if (this.tgt != other.tgt) {
204 | 		return false;
205 | 	    }
206 | 	    if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) {
207 | 		return false;
208 | 	    }
209 | 	    return true;
210 | 	}
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Scorer.java:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * See the file "LICENSE" for the full license governing this code.
   3 |  */
   4 | package se.liu.ida.nlp.sdp.toolkit.tools;
   5 | 
   6 | import java.io.BufferedReader;
   7 | import java.io.File;
   8 | import java.io.FileNotFoundException;
   9 | import java.io.FileReader;
  10 | import java.io.IOException;
  11 | import java.util.ArrayList;
  12 | import java.util.Collections;
  13 | import java.util.HashSet;
  14 | import java.util.LinkedList;
  15 | import java.util.List;
  16 | import java.util.Set;
  17 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge;
  18 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
  19 | import se.liu.ida.nlp.sdp.toolkit.graph.Node;
  20 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
  21 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
  22 | 
  23 | /**
  24 |  * Score a collection of dependency graphs relative to a gold standard.
  25 |  *
  26 |  * @author Marco Kuhlmann
  27 |  */
  28 | public class Scorer {
  29 | 
  30 | 	/**
  31 | 	 * The label used for unlabeled edges.
  32 | 	 */
  33 | 	private static final String UNLABELED = "-UNLABELED-";
  34 | 
  35 | 	/**
  36 | 	 * The label used for virtual edges.
  37 | 	 */
  38 | 	private static final String VIRTUAL = "-VIRTUAL-";
  39 | 
  40 | 	/**
  41 | 	 * The sense used for core predications.
  42 | 	 */
  43 | 	private static final String NO_SENSE = "-NOSENSE-";
  44 | 
  45 | 	/**
  46 | 	 * A flag indicating whether to include labels when scoring graphs.
  47 | 	 */
  48 | 	private final boolean includeLabels;
  49 | 
  50 | 	/**
  51 | 	 * A flag indicating whether to include top nodes when scoring graphs.
  52 | 	 */
  53 | 	private final boolean includeTopNodes;
  54 | 
  55 | 	/**
  56 | 	 * A flag indicating whether to include punctuation when scoring graphs.
  57 | 	 */
  58 | 	private final boolean includePunctuation;
  59 | 
  60 | 	/**
  61 | 	 * A flag indicating whether to treat edges as undirected when scoring
  62 | 	 * graphs.
  63 | 	 */
  64 | 	private final boolean treatEdgesAsUndirected;
  65 | 
  66 | 	/**
  67 | 	 * Counter to store the number of graphs read.
  68 | 	 */
  69 | 	private int nGraphs;
  70 | 
  71 | 	/**
  72 | 	 * Set containing the edges from the gold standard graphs.
  73 | 	 */
  74 | 	private final Set<ScorerEdge> edgesInGoldStandard;
  75 | 
  76 | 	/**
  77 | 	 * Set containing the edges from the system output graphs.
  78 | 	 */
  79 | 	private final Set<ScorerEdge> edgesInSystemOutput;
  80 | 
  81 | 	/**
  82 | 	 * Counter for the number of exact matches.
  83 | 	 */
  84 | 	private int nExactMatches;
  85 | 
  86 | 	private final Set<SemanticFrame> semanticFramesInGoldStandard;
  87 | 	private final Set<SemanticFrame> semanticFramesInSystemOutput;
  88 | 	private final Set<SemanticFrame> corePredicationsInGoldStandard;
  89 | 	private final Set<SemanticFrame> corePredicationsInSystemOutput;
  90 | 
  91 | 	private final ArgumentFilter labelPredicate;
  92 | 
  93 | 	/**
  94 | 	 * Construct a new scorer.
  95 | 	 *
  96 | 	 * @param includeLabels flag indicating whether the scorer should do labeled
  97 | 	 * scoring
  98 | 	 * @param includeTopNodes flag indicating whether the scorer should include
  99 | 	 * top nodes
 100 | 	 * @param includePunctuation flag indicating whether the scorer should
 101 | 	 * include punctuation
 102 | 	 * @param treatEdgesAsUndirected flag indicating whether the scorer should
 103 | 	 * treat edges as undirected
 104 | 	 */
 105 | 	public Scorer(boolean includeLabels, boolean includeTopNodes, boolean includePunctuation, boolean treatEdgesAsUndirected, ArgumentFilter labelPredicate) {
 106 | 		this.includeLabels = includeLabels;
 107 | 		this.includeTopNodes = includeTopNodes;
 108 | 		this.edgesInGoldStandard = new HashSet<ScorerEdge>();
 109 | 		this.edgesInSystemOutput = new HashSet<ScorerEdge>();
 110 | 		this.includePunctuation = includePunctuation;
 111 | 		this.treatEdgesAsUndirected = treatEdgesAsUndirected;
 112 | 		this.semanticFramesInGoldStandard = new HashSet<>();
 113 | 		this.semanticFramesInSystemOutput = new HashSet<>();
 114 | 		this.corePredicationsInGoldStandard = new HashSet<>();
 115 | 		this.corePredicationsInSystemOutput = new HashSet<>();
 116 | 		this.labelPredicate = labelPredicate;
 117 | 	}
 118 | 
 119 | 	/**
 120 | 	 * Construct a new scorer.
 121 | 	 */
 122 | 	public Scorer() {
 123 | 		this(true, true, true, false, new TrueFilter());
 124 | 	}
 125 | 
 126 | 	/**
 127 | 	 * Updates this scorer with the specified pair of graphs.
 128 | 	 *
 129 | 	 * @param goldStandard the graph that should be considered as the gold
 130 | 	 * standard
 131 | 	 * @param systemOutput the graph that should be considered as the system
 132 | 	 * output
 133 | 	 */
 134 | 	public void update(Graph goldStandard, Graph systemOutput) {
 135 | 		assert goldStandard.getNNodes() == systemOutput.getNNodes();
 136 | 
 137 | 		Set<ScorerEdge> edgesG = getEdges(goldStandard);
 138 | 		Set<ScorerEdge> edgesS = getEdges(systemOutput);
 139 | 
 140 | 		nGraphs++;
 141 | 		nExactMatches += edgesG.equals(edgesS) ? 1 : 0;
 142 | 
 143 | 		edgesInGoldStandard.addAll(edgesG);
 144 | 		edgesInSystemOutput.addAll(edgesS);
 145 | 
 146 | 		Set<SemanticFrame> semanticFramesG = getSemanticFrames(goldStandard);
 147 | 		Set<SemanticFrame> semanticFramesS = getSemanticFrames(systemOutput);
 148 | 
 149 | 		semanticFramesInGoldStandard.addAll(semanticFramesG);
 150 | 		semanticFramesInSystemOutput.addAll(semanticFramesS);
 151 | 
 152 | 		Set<SemanticFrame> corePredicationsG = getCorePredications(goldStandard);
 153 | 		Set<SemanticFrame> corePredicationsS = getCorePredications(systemOutput);
 154 | 
 155 | 		corePredicationsInGoldStandard.addAll(corePredicationsG);
 156 | 		corePredicationsInSystemOutput.addAll(corePredicationsS);
 157 | 	}
 158 | 
 159 | 	/**
 160 | 	 * Tests whether the specified node represents a punctuation token.
 161 | 	 *
 162 | 	 * @param node a node
 163 | 	 * @return {@code true} if the specified node represents a punctuation token
 164 | 	 */
 165 | 	private boolean isPunctuation(Node node) {
 166 | 		return node.pos.equals(".") || node.pos.equals(",") || node.pos.equals(":") || node.pos.equals("(") || node.pos.equals(")");
 167 | 	}
 168 | 
 169 | 	/**
 170 | 	 * Tests whether an edge between the specified nodes is admissible.
 171 | 	 *
 172 | 	 * @param graph a graph
 173 | 	 * @param src the source node of the presumed edge
 174 | 	 * @param tgt the target node of the presumed edge
 175 | 	 * @return {@code true} if an edge from the specified source node to the
 176 | 	 * specified target node would be admissible
 177 | 	 */
 178 | 	private boolean edgeIsAdmissible(Graph graph, int src, int tgt) {
 179 | 		if (includePunctuation) {
 180 | 			return true;
 181 | 		} else {
 182 | 			return !isPunctuation(graph.getNode(src)) && !isPunctuation(graph.getNode(tgt));
 183 | 		}
 184 | 	}
 185 | 
 186 | 	/**
 187 | 	 * Extracts the (scorer-internal) edges from the specified graph.
 188 | 	 *
 189 | 	 * @param graph the graph from which to extract the edges
 190 | 	 * @return the set of extracted edges
 191 | 	 */
 192 | 	private Set<ScorerEdge> getEdges(Graph graph) {
 193 | 		Set<ScorerEdge> edges = new HashSet<ScorerEdge>();
 194 | 		for (Edge edge : graph.getEdges()) {
 195 | 			if (edgeIsAdmissible(graph, edge.source, edge.target)) {
 196 | 				String label = includeLabels ? edge.label : UNLABELED;
 197 | 				edges.add(makeEdge(nGraphs, edge.source, edge.target, label));
 198 | 			}
 199 | 		}
 200 | 		if (includeTopNodes) {
 201 | 			for (Node node : graph.getNodes()) {
 202 | 				if (node.isTop && edgeIsAdmissible(graph, 0, node.id)) {
 203 | 					edges.add(makeEdge(nGraphs, 0, node.id, VIRTUAL));
 204 | 				}
 205 | 			}
 206 | 		}
 207 | 		return edges;
 208 | 	}
 209 | 
 210 | 	/**
 211 | 	 * Returns the number of edges in the gold standard.
 212 | 	 *
 213 | 	 * @return the number of edges in the gold standard
 214 | 	 */
 215 | 	public int getNEdgesInGoldStandard() {
 216 | 		return edgesInGoldStandard.size();
 217 | 	}
 218 | 
 219 | 	/**
 220 | 	 * Returns the number of edges in the system output.
 221 | 	 *
 222 | 	 * @return the number of edges in the system output
 223 | 	 */
 224 | 	public int getNEdgesInSystemOutput() {
 225 | 		return edgesInSystemOutput.size();
 226 | 	}
 227 | 
 228 | 	/**
 229 | 	 * Returns the precision computed by this scorer.
 230 | 	 *
 231 | 	 * @return the precision computed by this scorer
 232 | 	 */
 233 | 	public double getPrecision() {
 234 | 		return (double) getNEdgesInCommon() / (double) getNEdgesInSystemOutput();
 235 | 	}
 236 | 
 237 | 	/**
 238 | 	 * Returns the recall computed by this scorer.
 239 | 	 *
 240 | 	 * @return the recall computed by this scorer
 241 | 	 */
 242 | 	public double getRecall() {
 243 | 		return (double) getNEdgesInCommon() / (double) getNEdgesInGoldStandard();
 244 | 	}
 245 | 
 246 | 	/**
 247 | 	 * Returns the edges that occur both in the gold standard and in the system
 248 | 	 * output.
 249 | 	 *
 250 | 	 * @return the edges that occur both in the gold standard and in the system
 251 | 	 * output
 252 | 	 */
 253 | 	private Set<ScorerEdge> getEdgesInCommon() {
 254 | 		Set<ScorerEdge> intersection = new HashSet<ScorerEdge>(edgesInGoldStandard);
 255 | 		intersection.retainAll(edgesInSystemOutput);
 256 | 		return intersection;
 257 | 	}
 258 | 
 259 | 	/**
 260 | 	 * Returns the number of edges that occur both in the gold standard and in
 261 | 	 * the system output.
 262 | 	 *
 263 | 	 * @return the number of edges that occur both in the gold standard and in
 264 | 	 * the system output
 265 | 	 */
 266 | 	public int getNEdgesInCommon() {
 267 | 		return getEdgesInCommon().size();
 268 | 	}
 269 | 
 270 | 	/**
 271 | 	 * Returns the F1-score computed by this scorer.
 272 | 	 *
 273 | 	 * @return the F1-score computed by this scorer
 274 | 	 */
 275 | 	public double getF1() {
 276 | 		double p = getPrecision();
 277 | 		double r = getRecall();
 278 | 		return 2.0 * p * r / (p + r);
 279 | 	}
 280 | 
 281 | 	/**
 282 | 	 * Returns the exact match score computed by this scorer.
 283 | 	 *
 284 | 	 * @return the exact match score computed by this scorer
 285 | 	 */
 286 | 	public double getExactMatch() {
 287 | 		return (double) nExactMatches / (double) nGraphs;
 288 | 	}
 289 | 
 290 | 	/**
 291 | 	 * Tests whether the specified node represents a scorable predicate.
 292 | 	 * Currently only predicates corresponding to verbs are scored.
 293 | 	 *
 294 | 	 * @param node a node in a dependency graph
 295 | 	 * @return {@code true} if the specified node represents a scorable
 296 | 	 * predicate
 297 | 	 */
 298 | 	private boolean representsScorablePredicate(Node node) {
 299 | 		return node.isPred && node.pos.startsWith("V");
 300 | 	}
 301 | 
 302 | 	/**
 303 | 	 * Returns the semantic frames contained in the specified graph.
 304 | 	 *
 305 | 	 * @param graph a dependency graph
 306 | 	 * @return the semantic frames contained in the specified graph
 307 | 	 */
 308 | 	private Set<SemanticFrame> getSemanticFrames(Graph graph) {
 309 | 		Set<SemanticFrame> semanticFrames = new HashSet<>();
 310 | 		for (Node node : graph.getNodes()) {
 311 | 			if (representsScorablePredicate(node)) {
 312 | 				Set<ScorerEdge> outgoingEdges = new HashSet<>();
 313 | 				for (Edge edge : node.getOutgoingEdges()) {
 314 | 					if (labelPredicate.applies(edge.label)) {
 315 | 						ScorerEdge scorerEdge = new ScorerEdge(nGraphs, edge.source, edge.target, edge.label);
 316 | 						outgoingEdges.add(scorerEdge);
 317 | 					}
 318 | 				}
 319 | 				SemanticFrame frame = new SemanticFrame(nGraphs, node.id, node.sense, outgoingEdges);
 320 | 				semanticFrames.add(frame);
 321 | 			}
 322 | 		}
 323 | 		return semanticFrames;
 324 | 	}
 325 | 
 326 | 	/**
 327 | 	 * Returns the number of semantic frames in the gold standard.
 328 | 	 *
 329 | 	 * @return the number of semantic frames in the gold standard
 330 | 	 */
 331 | 	public int getNSemanticFramesInGoldStandard() {
 332 | 		return semanticFramesInGoldStandard.size();
 333 | 	}
 334 | 
 335 | 	/**
 336 | 	 * Returns the number of semantic frames in the system output.
 337 | 	 *
 338 | 	 * @return the number of semantic frames in the system output
 339 | 	 */
 340 | 	public int getNSemanticFramesInSystemOutput() {
 341 | 		return semanticFramesInSystemOutput.size();
 342 | 	}
 343 | 
 344 | 	/**
 345 | 	 * Returns the semantic frames precision computed by this scorer.
 346 | 	 *
 347 | 	 * @return the semantic frames precision computed by this scorer
 348 | 	 */
 349 | 	public double getSemanticFramesPrecision() {
 350 | 		return (double) getNSemanticFramesInCommon() / (double) getNSemanticFramesInSystemOutput();
 351 | 	}
 352 | 
 353 | 	/**
 354 | 	 * Returns the semantic frames recall computed by this scorer.
 355 | 	 *
 356 | 	 * @return the semantic frames recall computed by this scorer
 357 | 	 */
 358 | 	public double getSemanticFramesRecall() {
 359 | 		return (double) getNSemanticFramesInCommon() / (double) getNSemanticFramesInGoldStandard();
 360 | 	}
 361 | 
 362 | 	/**
 363 | 	 * Returns the semantic frames that occur both in the gold standard and in
 364 | 	 * the system output.
 365 | 	 *
 366 | 	 * @return the semantic frames that occur both in the gold standard and in
 367 | 	 * the system output
 368 | 	 */
 369 | 	private Set<SemanticFrame> getSemanticFramesInCommon() {
 370 | 		Set<SemanticFrame> intersection = new HashSet<>(semanticFramesInGoldStandard);
 371 | 		intersection.retainAll(semanticFramesInSystemOutput);
 372 | 		return intersection;
 373 | 	}
 374 | 
 375 | 	/**
 376 | 	 * Returns the number of semantic frames that occur both in the gold
 377 | 	 * standard and in the system output.
 378 | 	 *
 379 | 	 * @return the number of semantic frames that occur both in the gold
 380 | 	 * standard and in the system output
 381 | 	 */
 382 | 	public int getNSemanticFramesInCommon() {
 383 | 		return getSemanticFramesInCommon().size();
 384 | 	}
 385 | 
 386 | 	/**
 387 | 	 * Returns the semantic frames F1-score computed by this scorer.
 388 | 	 *
 389 | 	 * @return the semantic frames F1-score computed by this scorer
 390 | 	 */
 391 | 	public double getSemanticFramesF1() {
 392 | 		double p = getSemanticFramesPrecision();
 393 | 		double r = getSemanticFramesRecall();
 394 | 		return 2.0 * p * r / (p + r);
 395 | 	}
 396 | 
 397 | 	/**
 398 | 	 * Returns the core predications contained in the specified graph.
 399 | 	 *
 400 | 	 * @param graph a dependency graph
 401 | 	 * @return the core predications contained in the specified graph
 402 | 	 */
 403 | 	private Set<SemanticFrame> getCorePredications(Graph graph) {
 404 | 		Set<SemanticFrame> semanticFrames = new HashSet<>();
 405 | 		for (Node node : graph.getNodes()) {
 406 | 			if (representsScorablePredicate(node)) {
 407 | 				Set<ScorerEdge> outgoingEdges = new HashSet<>();
 408 | 				for (Edge edge : node.getOutgoingEdges()) {
 409 | 					if (labelPredicate.applies(edge.label)) {
 410 | 						ScorerEdge scorerEdge = new ScorerEdge(nGraphs, edge.source, edge.target, edge.label);
 411 | 						outgoingEdges.add(scorerEdge);
 412 | 					}
 413 | 				}
 414 | 				SemanticFrame frame = new SemanticFrame(nGraphs, node.id, NO_SENSE, outgoingEdges);
 415 | 				semanticFrames.add(frame);
 416 | 			}
 417 | 		}
 418 | 		return semanticFrames;
 419 | 	}
 420 | 
 421 | 	/**
 422 | 	 * Returns the number of core predications in the gold standard.
 423 | 	 *
 424 | 	 * @return the number of core predications in the gold standard
 425 | 	 */
 426 | 	public int getNCorePredicationsInGoldStandard() {
 427 | 		return corePredicationsInGoldStandard.size();
 428 | 	}
 429 | 
 430 | 	/**
 431 | 	 * Returns the number of core predications in the system output.
 432 | 	 *
 433 | 	 * @return the number of core predications in the system output
 434 | 	 */
 435 | 	public int getNCorePredicationsInSystemOutput() {
 436 | 		return corePredicationsInSystemOutput.size();
 437 | 	}
 438 | 
 439 | 	/**
 440 | 	 * Returns the core predications precision computed by this scorer.
 441 | 	 *
 442 | 	 * @return the core predications precision computed by this scorer
 443 | 	 */
 444 | 	public double getCorePredicationsPrecision() {
 445 | 		return (double) getNCorePredicationsInCommon() / (double) getNCorePredicationsInSystemOutput();
 446 | 	}
 447 | 
 448 | 	/**
 449 | 	 * Returns the core predications recall computed by this scorer.
 450 | 	 *
 451 | 	 * @return the core predications recall computed by this scorer
 452 | 	 */
 453 | 	public double getCorePredicationsRecall() {
 454 | 		return (double) getNCorePredicationsInCommon() / (double) getNCorePredicationsInGoldStandard();
 455 | 	}
 456 | 
 457 | 	/**
 458 | 	 * Returns the core predications that occur both in the gold standard and in
 459 | 	 * the system output.
 460 | 	 *
 461 | 	 * @return the core predications that occur both in the gold standard and in
 462 | 	 * the system output
 463 | 	 */
 464 | 	private Set<SemanticFrame> getCorePredicationsInCommon() {
 465 | 		Set<SemanticFrame> intersection = new HashSet<>(corePredicationsInGoldStandard);
 466 | 		intersection.retainAll(corePredicationsInSystemOutput);
 467 | 		return intersection;
 468 | 	}
 469 | 
 470 | 	/**
 471 | 	 * Returns the number of core predications that occur both in the gold
 472 | 	 * standard and in the system output.
 473 | 	 *
 474 | 	 * @return the number of core predications that occur both in the gold
 475 | 	 * standard and in the system output
 476 | 	 */
 477 | 	public int getNCorePredicationsInCommon() {
 478 | 		return getCorePredicationsInCommon().size();
 479 | 	}
 480 | 
 481 | 	/**
 482 | 	 * Returns the core predications F1-score computed by this scorer.
 483 | 	 *
 484 | 	 * @return the core predications F1-score computed by this scorer
 485 | 	 */
 486 | 	public double getCorePredicationsF1() {
 487 | 		double p = getCorePredicationsPrecision();
 488 | 		double r = getCorePredicationsRecall();
 489 | 		return 2.0 * p * r / (p + r);
 490 | 	}
 491 | 
 492 | 	/**
 493 | 	 * Read graphs from the specified files.
 494 | 	 *
 495 | 	 * @param goldStandardFile the file containing the gold standard graphs
 496 | 	 * @param systemOutputFile the file containing the system output graphs
 497 | 	 * @throws Exception if an I/O error occurs
 498 | 	 */
 499 | 	private static List<GraphPair> readGraphs(String goldStandardFile, String systemOutputFile, int max) throws Exception {
 500 | 		List<GraphPair> graphPairs = new LinkedList<GraphPair>();
 501 | 		GraphReader goldStandardReader = new GraphReader2015(goldStandardFile);
 502 | 		GraphReader systemOutputReader = new GraphReader2015(systemOutputFile);
 503 | 		Graph goldStandard;
 504 | 		Graph systemOutput;
 505 | 		int nGraphs = 0;
 506 | 		while ((goldStandard = goldStandardReader.readGraph()) != null && (max < 0 || nGraphs < max)) {
 507 | 			systemOutput = systemOutputReader.readGraph();
 508 | 			graphPairs.add(new GraphPair(goldStandard, systemOutput));
 509 | 			nGraphs++;
 510 | 		}
 511 | 		assert systemOutputReader.readGraph() == null;
 512 | 		goldStandardReader.close();
 513 | 		systemOutputReader.close();
 514 | 		return graphPairs;
 515 | 	}
 516 | 
 517 | 	/**
 518 | 	 * Scores the specified graphs using the specified scorer.
 519 | 	 *
 520 | 	 * @param scorer the scorer to use
 521 | 	 * @param graphPairs a list of reference-candidate pairs
 522 | 	 */
 523 | 	private static void score(Scorer scorer, List<GraphPair> graphPairs) {
 524 | 		for (GraphPair pair : graphPairs) {
 525 | 			scorer.update(pair.goldStandard, pair.systemOutput);
 526 | 		}
 527 | 	}
 528 | 
 529 | 	/**
 530 | 	 * Scores the specified graphs.
 531 | 	 *
 532 | 	 * @param includeTopNodes whether the scoring should include top nodes
 533 | 	 * @param graphPairs a list of reference-candidate pairs
 534 | 	 */
 535 | 	private static void score(boolean includeTopNodes, boolean includePunctuation, boolean treatEdgesAsUndirected, List<GraphPair> graphPairs, ArgumentFilter labelPredicate) {
 536 | 		Scorer scorerL = new Scorer(true, includeTopNodes, includePunctuation, treatEdgesAsUndirected, labelPredicate);
 537 | 		Scorer scorerU = new Scorer(false, includeTopNodes, includePunctuation, treatEdgesAsUndirected, labelPredicate);
 538 | 
 539 | 		score(scorerL, graphPairs);
 540 | 		score(scorerU, graphPairs);
 541 | 
 542 | 		System.err.format("Number of edges in gold standard: %d%n", scorerL.getNEdgesInGoldStandard());
 543 | 		System.err.format("Number of edges in system output: %d%n", scorerL.getNEdgesInSystemOutput());
 544 | 		System.err.format("Number of edges in common, labeled: %d%n", scorerL.getNEdgesInCommon());
 545 | 		System.err.format("Number of edges in common, unlabeled: %d%n", scorerU.getNEdgesInCommon());
 546 | 		System.err.println();
 547 | 
 548 | 		System.err.println("### Labeled scores");
 549 | 		System.err.println();
 550 | 		System.err.format("LP: %f%n", scorerL.getPrecision());
 551 | 		System.err.format("LR: %f%n", scorerL.getRecall());
 552 | 		System.err.format("LF: %f%n", scorerL.getF1());
 553 | 		System.err.format("LM: %f%n", scorerL.getExactMatch());
 554 | 		System.err.println();
 555 | 
 556 | 		System.err.println("### Breakdown by label type");
 557 | 		System.err.println();
 558 | 		System.err.println("Label type,Number of edges in gold standard,Number of edges in system output,Precision,Recall");
 559 | 		List<String> labels = new ArrayList<String>(scorerL.getLabels());
 560 | 		Collections.sort(labels);
 561 | 		for (String label : labels) {
 562 | 			System.err.format("%s,%d,%d,%f,%f%n", label, scorerL.getNEdgesInGoldStandardByLabel(label), scorerL.getNEdgesInSystemOutputByLabel(label), scorerL.getPrecisionPerLabel(label), scorerL.getRecallPerLabel(label));
 563 | 		}
 564 | 		System.err.println();
 565 | 
 566 | 		System.err.println("### Breakdown by edge length");
 567 | 		System.err.println();
 568 | 		List<String> quantizedLengths = new ArrayList<String>();
 569 | 		for (int i = 1; i < 100; i++) {
 570 | 			String quantizedLength = scorerL.getQuantizedLength(i);
 571 | 			if (!quantizedLengths.contains(quantizedLength)) {
 572 | 				quantizedLengths.add(quantizedLength);
 573 | 			}
 574 | 		}
 575 | 		System.err.println("Edge length,Number of edges in gold standard,Number of edges in system output,Precision,Recall");
 576 | 		for (String quantizedLength : quantizedLengths) {
 577 | 			System.err.format("%s,%d,%d,%f,%f%n", quantizedLength, scorerL.getNEdgesInGoldStandardByQuantizedLength(quantizedLength), scorerL.getNEdgesInSystemOutputByQuantizedLength(quantizedLength), scorerL.getPrecisionPerQuantizedLength(quantizedLength), scorerL.getRecallPerQuantizedLength(quantizedLength));
 578 | 		}
 579 | 		System.err.println();
 580 | 
 581 | 		System.err.println("### Unlabeled scores");
 582 | 		System.err.println();
 583 | 		System.err.format("UP: %f%n", scorerU.getPrecision());
 584 | 		System.err.format("UR: %f%n", scorerU.getRecall());
 585 | 		System.err.format("UF: %f%n", scorerU.getF1());
 586 | 		System.err.format("UM: %f%n", scorerU.getExactMatch());
 587 | 		System.err.println();
 588 | 
 589 | 		System.err.println("### Complete predications");
 590 | 		System.err.println();
 591 | 		System.err.format("Number of complete predications in gold standard: %d%n", scorerL.getNCorePredicationsInGoldStandard());
 592 | 		System.err.format("Number of complete predications in system output: %d%n", scorerL.getNCorePredicationsInSystemOutput());
 593 | 		System.err.println();
 594 | 		System.err.format("PP: %f%n", scorerL.getCorePredicationsPrecision());
 595 | 		System.err.format("PR: %f%n", scorerL.getCorePredicationsRecall());
 596 | 		System.err.format("PF: %f%n", scorerL.getCorePredicationsF1());
 597 | 		System.err.println();
 598 | 
 599 | 		System.err.println("### Semantic frames");
 600 | 		System.err.println();
 601 | 		System.err.format("Number of semantic frames in gold standard: %d%n", scorerL.getNSemanticFramesInGoldStandard());
 602 | 		System.err.format("Number of semantic frames in system output: %d%n", scorerL.getNSemanticFramesInSystemOutput());
 603 | 		System.err.println();
 604 | 		System.err.format("FP: %f%n", scorerL.getSemanticFramesPrecision());
 605 | 		System.err.format("FR: %f%n", scorerL.getSemanticFramesRecall());
 606 | 		System.err.format("FF: %f%n", scorerL.getSemanticFramesF1());
 607 | 	}
 608 | 
 609 | 	/**
 610 | 	 * Compute scores for two files.
 611 | 	 *
 612 | 	 * @param args the names of the files containing the gold standard graphs
 613 | 	 * and the system output graphs
 614 | 	 * @throws Exception if an I/O exception occurs
 615 | 	 */
 616 | 	public static void main(String[] args) throws Exception {
 617 | 		boolean includePunctuation = true;
 618 | 		boolean treatEdgesAsUndirected = false;
 619 | 		ArgumentFilter labelPredicate = new TrueFilter();
 620 | 		int graphsToRead = -1;
 621 | 		for (String arg : args) {
 622 | 			if (arg.equals("excludePunctuation")) {
 623 | 				System.err.println("Will exclude punctuation.");
 624 | 				includePunctuation = false;
 625 | 			}
 626 | 			if (arg.equals("treatEdgesAsUndirected")) {
 627 | 				System.err.println("Will treat edges as undirected.");
 628 | 				treatEdgesAsUndirected = true;
 629 | 			}
 630 | 			if (arg.startsWith("corePredicates=")) {
 631 | 				String fileName = arg.substring(15);
 632 | 				System.err.format("Reading core predicates from %s%n", fileName);
 633 | 				labelPredicate = new ListFilter(new File(fileName));
 634 | 			}
 635 | 			if (arg.startsWith("max=")) {
 636 | 				graphsToRead = Integer.parseInt(arg.substring(4));
 637 | 				System.err.format("Will read at most %d graphs.%n", graphsToRead);
 638 | 			}
 639 | 			if (arg.startsWith("representation=")) {
 640 | 				String representation = arg.substring(15).toLowerCase();
 641 | 				if (representation.equals("dm")) {
 642 | 					System.err.println("Representation type: DM");
 643 | 					labelPredicate = new DMArgumentFilter();
 644 | 				}
 645 | 				if (representation.equals("pas")) {
 646 | 					System.err.println("Representation type: PAS");
 647 | 					labelPredicate = new PASArgumentFilter();
 648 | 				}
 649 | 				if (representation.equals("psd")) {
 650 | 					System.err.println("Representation type: PSD");
 651 | 					labelPredicate = new PSDPredicate();
 652 | 				}
 653 | 			}
 654 | 		}
 655 | 
 656 | 		System.err.println("# Evaluation");
 657 | 		System.err.println();
 658 | 
 659 | 		System.err.format("Gold standard file: %s%n", args[0]);
 660 | 		System.err.format("System output file: %s%n", args[1]);
 661 | 		System.err.println();
 662 | 
 663 | 		List<GraphPair> graphPairs = readGraphs(args[0], args[1], graphsToRead);
 664 | 
 665 | 		System.err.println("## Scores including virtual dependencies to top nodes");
 666 | 		System.err.println();
 667 | 		score(true, includePunctuation, treatEdgesAsUndirected, graphPairs, labelPredicate);
 668 | 		System.err.println();
 669 | 
 670 | 		System.err.println("## Scores excluding virtual dependencies to top nodes");
 671 | 		System.err.println();
 672 | 		score(false, includePunctuation, treatEdgesAsUndirected, graphPairs, labelPredicate);
 673 | 	}
 674 | 
 675 | 	private static class GraphPair {
 676 | 
 677 | 		public final Graph goldStandard;
 678 | 		public final Graph systemOutput;
 679 | 
 680 | 		public GraphPair(Graph goldStandard, Graph systemOutput) {
 681 | 			this.goldStandard = goldStandard;
 682 | 			this.systemOutput = systemOutput;
 683 | 		}
 684 | 	}
 685 | 
 686 | 	private interface ArgumentFilter {
 687 | 
 688 | 		abstract public boolean applies(String label);
 689 | 	}
 690 | 
 691 | 	private static class TrueFilter implements ArgumentFilter {
 692 | 
 693 | 		@Override
 694 | 		public boolean applies(String label) {
 695 | 			return true;
 696 | 		}
 697 | 	}
 698 | 
 699 | 	private static class ListFilter implements ArgumentFilter {
 700 | 
 701 | 		private final Set<String> labels;
 702 | 
 703 | 		public ListFilter(File file) {
 704 | 			this.labels = new HashSet<>();
 705 | 			try {
 706 | 				BufferedReader reader = new BufferedReader(new FileReader(file));
 707 | 				String line;
 708 | 				while ((line = reader.readLine()) != null) {
 709 | 					labels.add(line.trim());
 710 | 				}
 711 | 			} catch (FileNotFoundException e) {
 712 | 				System.err.println("File not found.");
 713 | 				System.exit(1);
 714 | 			} catch (IOException e) {
 715 | 				System.err.println("I/O exception.");
 716 | 				System.exit(1);
 717 | 			}
 718 | 		}
 719 | 
 720 | 		@Override
 721 | 		public boolean applies(String label) {
 722 | 			return labels.contains(label);
 723 | 		}
 724 | 	}
 725 | 
 726 | 	private static class DMArgumentFilter implements ArgumentFilter {
 727 | 
 728 | 		@Override
 729 | 		public boolean applies(String label) {
 730 | 			return true;
 731 | 		}
 732 | 	}
 733 | 
 734 | 	private static class PASArgumentFilter implements ArgumentFilter {
 735 | 
 736 | 		private final Set<String> coreArguments;
 737 | 
 738 | 		public PASArgumentFilter() {
 739 | 			this.coreArguments = new HashSet<>();
 740 | 			coreArguments.add("adj_ARG1");
 741 | 			coreArguments.add("adj_ARG2");
 742 | 			coreArguments.add("adj_MOD");
 743 | 			coreArguments.add("coord_ARG1");
 744 | 			coreArguments.add("coord_ARG2");
 745 | 			coreArguments.add("prep_ARG1");
 746 | 			coreArguments.add("prep_ARG2");
 747 | 			coreArguments.add("prep_ARG3");
 748 | 			coreArguments.add("prep_MOD");
 749 | 			coreArguments.add("verb_ARG1");
 750 | 			coreArguments.add("verb_ARG2");
 751 | 			coreArguments.add("verb_ARG3");
 752 | 			coreArguments.add("verb_ARG4");
 753 | 			coreArguments.add("verb_MOD");
 754 | 		}
 755 | 
 756 | 		@Override
 757 | 		public boolean applies(String label) {
 758 | 			return coreArguments.contains(label);
 759 | 		}
 760 | 	}
 761 | 
 762 | 	public static class PSDPredicate implements ArgumentFilter {
 763 | 
 764 | 		@Override
 765 | 		public boolean applies(String label) {
 766 | 			return label.endsWith("-arg");
 767 | 		}
 768 | 	}
 769 | 
 770 | 	private ScorerEdge makeEdge(int graphId, int src, int tgt, String label) {
 771 | 		if (treatEdgesAsUndirected) {
 772 | 			return new UndirectedScorerEdge(graphId, src, tgt, label);
 773 | 		} else {
 774 | 			return new ScorerEdge(graphId, src, tgt, label);
 775 | 		}
 776 | 	}
 777 | 
 778 | 	private static class ScorerEdge {
 779 | 
 780 | 		final int graphId;
 781 | 		final int src;
 782 | 		final int tgt;
 783 | 		final String label;
 784 | 
 785 | 		public ScorerEdge(int graphId, int src, int tgt, String label) {
 786 | 			this.graphId = graphId;
 787 | 			this.src = src;
 788 | 			this.tgt = tgt;
 789 | 			this.label = label;
 790 | 		}
 791 | 
 792 | 		public int getLength() {
 793 | 			return Math.max(src, tgt) - Math.min(src, tgt);
 794 | 		}
 795 | 
 796 | 		@Override
 797 | 		public int hashCode() {
 798 | 			int hash = 3;
 799 | 			hash = 53 * hash + this.graphId;
 800 | 			hash = 53 * hash + this.src;
 801 | 			hash = 53 * hash + this.tgt;
 802 | 			hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0);
 803 | 			return hash;
 804 | 		}
 805 | 
 806 | 		@Override
 807 | 		public boolean equals(Object obj) {
 808 | 			if (obj == null) {
 809 | 				return false;
 810 | 			}
 811 | 			if (getClass() != obj.getClass()) {
 812 | 				return false;
 813 | 			}
 814 | 			final ScorerEdge other = (ScorerEdge) obj;
 815 | 			if (this.graphId != other.graphId) {
 816 | 				return false;
 817 | 			}
 818 | 			if (this.src != other.src) {
 819 | 				return false;
 820 | 			}
 821 | 			if (this.tgt != other.tgt) {
 822 | 				return false;
 823 | 			}
 824 | 			if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) {
 825 | 				return false;
 826 | 			}
 827 | 			return true;
 828 | 		}
 829 | 	}
 830 | 
 831 | 	private static class UndirectedScorerEdge extends ScorerEdge {
 832 | 
 833 | 		public UndirectedScorerEdge(int graphId, int src, int tgt, String label) {
 834 | 			super(graphId, src, tgt, label);
 835 | 		}
 836 | 
 837 | 		@Override
 838 | 		public int hashCode() {
 839 | 			int hash = 3;
 840 | 			hash = 53 * hash + this.graphId;
 841 | 			hash = 53 * hash + Math.min(this.src, this.tgt);
 842 | 			hash = 53 * hash + Math.max(this.src, this.tgt);
 843 | 			hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0);
 844 | 			return hash;
 845 | 		}
 846 | 
 847 | 		@Override
 848 | 		public boolean equals(Object obj) {
 849 | 			if (obj == null) {
 850 | 				return false;
 851 | 			}
 852 | 			if (getClass() != obj.getClass()) {
 853 | 				return false;
 854 | 			}
 855 | 			final ScorerEdge other = (ScorerEdge) obj;
 856 | 			if (this.graphId != other.graphId) {
 857 | 				return false;
 858 | 			}
 859 | 			if (Math.min(this.src, this.tgt) != Math.min(other.src, other.tgt)) {
 860 | 				return false;
 861 | 			}
 862 | 			if (Math.max(this.src, this.tgt) != Math.max(other.src, other.tgt)) {
 863 | 				return false;
 864 | 			}
 865 | 			if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) {
 866 | 				return false;
 867 | 			}
 868 | 			return true;
 869 | 		}
 870 | 	}
 871 | 
 872 | 	private static class SemanticFrame {
 873 | 
 874 | 		final int graphId;
 875 | 		final int node;
 876 | 		final String sense;
 877 | 		final Set<ScorerEdge> outgoingEdges;
 878 | 
 879 | 		public SemanticFrame(int graphId, int node, String sense, Set<ScorerEdge> outgoingEdges) {
 880 | 			this.graphId = graphId;
 881 | 			this.node = node;
 882 | 			this.sense = sense;
 883 | 			this.outgoingEdges = outgoingEdges;
 884 | 		}
 885 | 
 886 | 		@Override
 887 | 		public int hashCode() {
 888 | 			int hash = 3;
 889 | 			hash = 53 * hash + this.graphId;
 890 | 			hash = 53 * hash + this.node;
 891 | 			hash = 53 * hash + (this.sense != null ? this.sense.hashCode() : 0);
 892 | 			hash = 53 * hash + (this.outgoingEdges != null ? this.outgoingEdges.hashCode() : 0);
 893 | 			return hash;
 894 | 		}
 895 | 
 896 | 		@Override
 897 | 		public boolean equals(Object obj) {
 898 | 			if (obj == null) {
 899 | 				return false;
 900 | 			}
 901 | 			if (getClass() != obj.getClass()) {
 902 | 				return false;
 903 | 			}
 904 | 			final SemanticFrame other = (SemanticFrame) obj;
 905 | 			if (this.graphId != other.graphId) {
 906 | 				return false;
 907 | 			}
 908 | 			if (this.node != other.node) {
 909 | 				return false;
 910 | 			}
 911 | 			if ((this.sense == null) ? (other.sense != null) : !this.sense.equals(other.sense)) {
 912 | 				return false;
 913 | 			}
 914 | 			if ((this.outgoingEdges == null) ? (other.outgoingEdges != null) : !this.outgoingEdges.equals(other.outgoingEdges)) {
 915 | 				return false;
 916 | 			}
 917 | 			return true;
 918 | 		}
 919 | 	}
 920 | 
 921 | 	private Set<String> getLabels() {
 922 | 		Set<String> labels = new HashSet<String>();
 923 | 		for (ScorerEdge edge : edgesInGoldStandard) {
 924 | 			labels.add(edge.label);
 925 | 		}
 926 | 		for (ScorerEdge edge : edgesInSystemOutput) {
 927 | 			labels.add(edge.label);
 928 | 		}
 929 | 		return labels;
 930 | 	}
 931 | 
 932 | 	private int getNEdgesByLabel(String label, Set<ScorerEdge> edges) {
 933 | 		int n = 0;
 934 | 		for (ScorerEdge edge : edges) {
 935 | 			n += edge.label.equals(label) ? 1 : 0;
 936 | 		}
 937 | 		return n;
 938 | 	}
 939 | 
 940 | 	private int getNEdgesInGoldStandardByLabel(String label) {
 941 | 		return getNEdgesByLabel(label, edgesInGoldStandard);
 942 | 	}
 943 | 
 944 | 	private int getNEdgesInSystemOutputByLabel(String label) {
 945 | 		return getNEdgesByLabel(label, edgesInSystemOutput);
 946 | 	}
 947 | 
 948 | 	private double getPrecisionPerLabel(String label) {
 949 | 		int nEdges = 0;
 950 | 		int nCorrect = 0;
 951 | 		for (ScorerEdge edgeS : edgesInSystemOutput) {
 952 | 			if (edgeS.label.equals(label)) {
 953 | 				nEdges++;
 954 | 				if (edgesInGoldStandard.contains(edgeS)) {
 955 | 					nCorrect++;
 956 | 				}
 957 | 			}
 958 | 		}
 959 | 		return (double) nCorrect / (double) nEdges;
 960 | 	}
 961 | 
 962 | 	private double getRecallPerLabel(String label) {
 963 | 		int nEdges = 0;
 964 | 		int nCorrect = 0;
 965 | 		for (ScorerEdge edgeG : edgesInGoldStandard) {
 966 | 			if (edgeG.label.equals(label)) {
 967 | 				nEdges++;
 968 | 				if (edgesInSystemOutput.contains(edgeG)) {
 969 | 					nCorrect++;
 970 | 				}
 971 | 			}
 972 | 		}
 973 | 		return (double) nCorrect / (double) nEdges;
 974 | 	}
 975 | 
 976 | 	private String getQuantizedLength(int length) {
 977 | 		if (length <= 4) {
 978 | 			return Integer.toString(length);
 979 | 		} else if (length < 10) {
 980 | 			return "5-9";
 981 | 		} else {
 982 | 			return "10-";
 983 | 		}
 984 | 	}
 985 | 
 986 | 	private String getQuantizedLength(ScorerEdge edge) {
 987 | 		return getQuantizedLength(edge.getLength());
 988 | 	}
 989 | 
 990 | 	private Set<String> getQuantizedLengths() {
 991 | 		Set<String> lengths = new HashSet<String>();
 992 | 		for (ScorerEdge edge : edgesInGoldStandard) {
 993 | 			lengths.add(getQuantizedLength(edge));
 994 | 		}
 995 | 		for (ScorerEdge edge : edgesInSystemOutput) {
 996 | 			lengths.add(getQuantizedLength(edge));
 997 | 		}
 998 | 		return lengths;
 999 | 	}
1000 | 
1001 | 	private int getNEdgesByQuantizedLength(String quantizedLength, Set<ScorerEdge> edges) {
1002 | 		int n = 0;
1003 | 		for (ScorerEdge edge : edges) {
1004 | 			if (getQuantizedLength(edge).equals(quantizedLength)) {
1005 | 				n++;
1006 | 			}
1007 | 		}
1008 | 		return n;
1009 | 	}
1010 | 
1011 | 	private int getNEdgesInGoldStandardByQuantizedLength(String quantizedLength) {
1012 | 		return getNEdgesByQuantizedLength(quantizedLength, edgesInGoldStandard);
1013 | 	}
1014 | 
1015 | 	private int getNEdgesInSystemOutputByQuantizedLength(String quantizedLength) {
1016 | 		return getNEdgesByQuantizedLength(quantizedLength, edgesInSystemOutput);
1017 | 	}
1018 | 
1019 | 	private double getPrecisionPerQuantizedLength(String quantizedLength) {
1020 | 		int nEdges = 0;
1021 | 		int nCorrect = 0;
1022 | 		for (ScorerEdge edgeS : edgesInSystemOutput) {
1023 | 			if (getQuantizedLength(edgeS).equals(quantizedLength)) {
1024 | 				nEdges++;
1025 | 				if (edgesInGoldStandard.contains(edgeS)) {
1026 | 					nCorrect++;
1027 | 				}
1028 | 			}
1029 | 		}
1030 | 		return (double) nCorrect / (double) nEdges;
1031 | 	}
1032 | 
1033 | 	private double getRecallPerQuantizedLength(String quantizedLength) {
1034 | 		int nEdges = 0;
1035 | 		int nCorrect = 0;
1036 | 		for (ScorerEdge edgeG : edgesInGoldStandard) {
1037 | 			if (getQuantizedLength(edgeG).equals(quantizedLength)) {
1038 | 				nEdges++;
1039 | 				if (edgesInSystemOutput.contains(edgeG)) {
1040 | 					nCorrect++;
1041 | 				}
1042 | 			}
1043 | 		}
1044 | 		return (double) nCorrect / (double) nEdges;
1045 | 	}
1046 | }
1047 | 


--------------------------------------------------------------------------------
/src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Splitter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See the file "LICENSE" for the full license governing this code.
 3 |  */
 4 | package se.liu.ida.nlp.sdp.toolkit.tools;
 5 | 
 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph;
 7 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader;
 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015;
 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter;
10 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter2015;
11 | 
12 | /**
13 |  * Splits the SDP training data into training and development.
14 |  *
15 |  * @author Marco Kuhlmann
16 |  */
17 | public class Splitter {
18 | 
19 | 	public static void main(String[] args) throws Exception {
20 | 		GraphReader reader = new GraphReader2015(args[0]);
21 | 		GraphWriter writerTrain = new GraphWriter2015(args[1]);
22 | 		GraphWriter writerDevel = new GraphWriter2015(args[2]);
23 | 		Graph graph;
24 | 		while ((graph = reader.readGraph()) != null) {
25 | 			if (graph.id.substring(2, 4).equals("20")) {
26 | 				writerDevel.writeGraph(graph);
27 | 			} else {
28 | 				writerTrain.writeGraph(graph);
29 | 			}
30 | 		}
31 | 		reader.close();
32 | 		writerTrain.close();
33 | 		writerDevel.close();
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------