├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── run.sh ├── settings.gradle └── src └── main └── java └── se └── liu └── ida └── nlp └── sdp └── toolkit ├── graph ├── DFS.java ├── Edge.java ├── Graph.java ├── InspectedGraph.java └── Node.java ├── io ├── Constants.java ├── GraphReader.java ├── GraphReader2014.java ├── GraphReader2015.java ├── GraphWriter.java ├── GraphWriter2014.java ├── GraphWriter2015.java └── ParagraphReader.java └── tools ├── Analyzer.java ├── BasicAnalyzer.java ├── IOTest.java ├── PairedBootstrap.java ├── Scorer.java └── Splitter.java /.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | /.gradle/ 3 | /.nb-gradle/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Marco Kuhlmann 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Semantic Dependency Parsing Toolkit 2 | 3 | This repository contains a Java toolkit for semantic dependency parsing. It has been developed in connection with two shared tasks: 4 | 5 | * [SemEval-2014 Task on Broad-Coverage Semantic Dependency Parsing](http://alt.qcri.org/semeval2014/task8/) 6 | * [SemEval-2015 Task on Broad-Coverage Semantic Dependency Parsing](http://alt.qcri.org/semeval2015/task18/) 7 | 8 | Detailed information about the tasks can be found at the respective websites. 9 | 10 | ## Downloading 11 | 12 | The primary form of distribution for the project is via Git. See the [Releases](https://github.com/semantic-dependency-parsing/toolkit/releases) page for precompiled jar files. 13 | 14 | ## Building 15 | 16 | After checking out the project from the repository, you should be able to build it using [Gradle](http://www.gradle.org/). 17 | 18 | $ cd toolkit 19 | $ gradle build 20 | 21 | This will create a file `build/libs/sdp.jar` with the compiled classes. The jar can then be added to your classpath, whereby you will be able to use the provided classes in your own project. To see what is there, build the documentation: 22 | 23 | $ gradle javadoc 24 | 25 | The entry page for the documentation is `build/docs/javadoc/index.html`. 26 | 27 | ## Command-line tools 28 | 29 | Some of the tools implemented in the project can be called from the command line. The most revelant example is the `Scorer` tool, which is run as follows: 30 | 31 | $ java -cp se.liu.ida.nlp.sdp.toolkit.tools.Scorer gold.sdp system.sdp representation=DM 32 | 33 | This will evaluate the parser output in the file `system.sdp` based on the gold-standard analyses in the file `gold.sdp` based on the assumption that the data is given in the `DM` representation; other possible representations are `PAS` and `PSD`. The evaluation metrics used are defined on the [Evaluation page](http://alt.qcri.org/semeval2015/task18/index.php?id=evaluation). 34 | 35 | The Git repository contains a convenience shell script called `run.sh` that allows you to use an abbreviated form of the above command, assuming that the jar file is in `build/libs/sdp.jar`: 36 | 37 | $ sh run.sh Scorer gold.sdp system.sdp representation=DM 38 | 39 | Abbreviations: 40 | 41 | LP: labeled precision 42 | LR: labeled recall 43 | LF: labeled F1 44 | LM: labeled exact match 45 | 46 | UP: unlabeled precision 47 | UR: unlabeled recall 48 | UF: unlabeled F1 49 | UM: unlabeled exact match 50 | 51 | PP: precision with respect to complete predications 52 | PR: recall with respect to complete predications 53 | PF: F1 with respect to complete predications 54 | 55 | FP: precision with respect to semantic frames 56 | FR: recall with respect to semantic frames 57 | FF: F1 with respect to semantic frames 58 | 59 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'java' 2 | apply plugin: 'maven-publish' 3 | 4 | sourceCompatibility = '1.8' 5 | 6 | repositories { 7 | mavenCentral() 8 | mavenLocal() 9 | } 10 | 11 | publishing { 12 | publications { 13 | mavenJava(MavenPublication) { 14 | groupId 'se.liu.ida.nlp.sdp' 15 | artifactId 'toolkit' 16 | version '2.0-SNAPSHOT' 17 | from components.java 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | root=$(cd "$(dirname "$0")"; pwd) 6 | pkg=se.liu.ida.nlp.sdp.toolkit.tools. 7 | 8 | java -cp $root/build/libs/sdp.jar $pkg$@ 9 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'sdp' 2 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/DFS.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.graph; 5 | 6 | /** 7 | * Depth-first graph search. 8 | * 9 | * @author Marco Kuhlmann 10 | */ 11 | public class DFS { 12 | 13 | /** 14 | * Whether to search the graph as an undirected graph. 15 | */ 16 | private final boolean undirected; 17 | 18 | /** 19 | * The graph. 20 | */ 21 | private final Graph graph; 22 | 23 | /** 24 | * The index of the run during which each node was (first) visited. 25 | */ 26 | private final int[] run; 27 | 28 | /** 29 | * The number of runs. 30 | */ 31 | private int nRuns; 32 | 33 | /** 34 | * The preorder timestamps of each node. 35 | */ 36 | private final int[] enter; 37 | 38 | /** 39 | * The postorder timestamps of each node. 40 | */ 41 | private final int[] leave; 42 | 43 | /** 44 | * A depth-first search on the specified graph. 45 | * 46 | * @param graph a graph 47 | * @param undirected if {@code true}, the graph will be searched as an 48 | * undirected graph 49 | */ 50 | public DFS(Graph graph, boolean undirected) { 51 | this.undirected = undirected; 52 | 53 | this.graph = graph; 54 | 55 | int nNodes = graph.getNNodes(); 56 | this.run = new int[nNodes]; 57 | this.enter = new int[nNodes]; 58 | this.leave = new int[nNodes]; 59 | computeTimestamps(); 60 | } 61 | 62 | /** 63 | * A depth-first search on the specified graph. 64 | * 65 | * @param graph a graph 66 | */ 67 | public DFS(Graph graph) { 68 | this(graph, false); 69 | } 70 | 71 | /** 72 | * Computes the preorder and postorder timestamps for the inspected graph. 73 | */ 74 | private void computeTimestamps() { 75 | for (Node node : graph.getNodes()) { 76 | enter[node.id] = -1; 77 | } 78 | Timer timer = new Timer(); 79 | for (Node node : graph.getNodes()) { 80 | if (enter[node.id] == -1) { 81 | computeTimestamps(node, timer); 82 | nRuns++; 83 | } 84 | } 85 | } 86 | 87 | /** 88 | * Computes the preorder and postorder timestamps for the subgraph starting 89 | * at the specified node. 90 | * 91 | * @param node the entry point for the subgraph 92 | * @param timer the global timer 93 | */ 94 | private void computeTimestamps(Node node, Timer timer) { 95 | run[node.id] = nRuns; 96 | enter[node.id] = timer.tick(); 97 | for (Edge outgoingEdge : node.getOutgoingEdges()) { 98 | // Only visit nodes that have not been visited before. 99 | if (enter[outgoingEdge.target] == -1) { 100 | computeTimestamps(graph.getNode(outgoingEdge.target), timer); 101 | } 102 | } 103 | if (undirected) { 104 | for (Edge incomingEdge : node.getIncomingEdges()) { 105 | if (enter[incomingEdge.source] == -1) { 106 | computeTimestamps(graph.getNode(incomingEdge.source), timer); 107 | } 108 | } 109 | } 110 | leave[node.id] = timer.tick(); 111 | } 112 | 113 | /** 114 | * Timer used in depth-first search. 115 | */ 116 | private static final class Timer { 117 | 118 | /** 119 | * The current time. 120 | */ 121 | private int time; 122 | 123 | /** 124 | * Returns the current time, then increments it. 125 | * 126 | * @return the current time 127 | */ 128 | public int tick() { 129 | return time++; 130 | } 131 | } 132 | 133 | /** 134 | * Returns the number of runs of the search. This is the number of times the 135 | * recursive search was called to cover all nodes in the graph. 136 | * 137 | * @return the number of runs of the search 138 | */ 139 | public int getNRuns() { 140 | return nRuns; 141 | } 142 | 143 | /** 144 | * Test whether the specified edge is a self-loop. 145 | * 146 | * @param edge an edge in the searched graph 147 | * @return {@code true} if the specified edge is a self-loop 148 | */ 149 | public boolean isSelfLoop(Edge edge) { 150 | return edge.source == edge.target; 151 | } 152 | 153 | /** 154 | * Test whether the specified edge is a back edge. 155 | * 156 | * @param edge an edge in the searched graph 157 | * @return {@code true} if the specified edge is a back edge 158 | */ 159 | public boolean isBackEdge(Edge edge) { 160 | return enter[edge.target] < enter[edge.source] && leave[edge.source] < leave[edge.target]; 161 | } 162 | 163 | /** 164 | * Test whether the searched graph is cyclic. 165 | * 166 | * @return {@code true} if the searched graph is cyclic 167 | */ 168 | public boolean isCyclic() { 169 | for (Edge edge : graph.getEdges()) { 170 | if (isSelfLoop(edge) || isBackEdge(edge)) { 171 | return true; 172 | } 173 | } 174 | return false; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Edge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.graph; 5 | 6 | /** 7 | * An edge in a semantic dependency graph. 8 | * 9 | * @author Marco Kuhlmann 10 | */ 11 | public class Edge implements Comparable { 12 | 13 | /** 14 | * The unique ID of this edge. 15 | */ 16 | public final int id; 17 | /** 18 | * The ID of the source node of this edge. 19 | */ 20 | public final int source; 21 | /** 22 | * The ID of the target node of this edge. 23 | */ 24 | public final int target; 25 | /** 26 | * The label of this edge. 27 | */ 28 | public final String label; 29 | 30 | /** 31 | * Construct a new edge. 32 | * 33 | * @param id the unique ID of the new edge 34 | * @param source the ID of the source node of the new edge 35 | * @param target the ID of the target node of the new edge 36 | * @param label the label of the new edge 37 | */ 38 | public Edge(int id, int source, int target, String label) { 39 | this.id = id; 40 | this.source = source; 41 | this.target = target; 42 | this.label = label; 43 | } 44 | 45 | /** 46 | * Compares this edge with the specified edge for order. The order used is 47 | * the lexicographical order on the (target, source) pairs. 48 | * 49 | * @param otherEdge the edge to be compared to this edge 50 | * @return a negative integer, zero, or a positive integer as this edge is 51 | * less than, equal to, or greater than the specified edge 52 | */ 53 | @Override 54 | public int compareTo(Edge otherEdge) { 55 | if (this.target == otherEdge.target) { 56 | return this.source - otherEdge.source; 57 | } else { 58 | return this.target - otherEdge.target; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Graph.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.graph; 5 | 6 | import java.util.ArrayList; 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | 10 | /** 11 | * A semantic dependency graph. 12 | * 13 | * @author Marco Kuhlmann 14 | */ 15 | public class Graph { 16 | 17 | /** 18 | * The list of nodes of this graph. 19 | */ 20 | private final List nodes; 21 | /** 22 | * The list of edges of this graph. 23 | */ 24 | private final List edges; 25 | /** 26 | * The unique ID of this graph. 27 | */ 28 | public final String id; 29 | 30 | /** 31 | * Construct an empty graph. 32 | * 33 | * @param id the unique ID of the new graph 34 | */ 35 | public Graph(String id) { 36 | this.id = id; 37 | this.nodes = new ArrayList(); 38 | this.edges = new ArrayList(); 39 | } 40 | 41 | /** 42 | * Adds a new node to this graph. 43 | * 44 | * @param form the word form to be associated with the new node 45 | * @param lemma the lemma to be associated with the new node 46 | * @param pos the part-of-speech tag to be associated with the new node 47 | * @param isTop a flag indicating whether the new node is a TOP node 48 | * @param isPred a flag indicating whether the new node represents a 49 | * predicate 50 | * @param sense the sense or frame to be associated with the new node 51 | * @return the newly added node 52 | */ 53 | public Node addNode(String form, String lemma, String pos, boolean isTop, boolean isPred, String sense) { 54 | Node node = new Node(nodes.size(), form, lemma, pos, isTop, isPred, sense); 55 | nodes.add(node); 56 | return node; 57 | } 58 | 59 | /** 60 | * Adds a new edge to this graph. 61 | * 62 | * @param source the ID of the source node of the new edge 63 | * @param target the ID of the target node of the new edge 64 | * @param label the label of the new edge 65 | * @return the newly added edge 66 | */ 67 | public Edge addEdge(int source, int target, String label) { 68 | assert 0 <= source && source < nodes.size(); 69 | assert 0 <= target && target < nodes.size(); 70 | Edge edge = new Edge(edges.size(), source, target, label); 71 | edges.add(edge); 72 | nodes.get(source).addOutgoingEdge(edge); 73 | nodes.get(target).addIncomingEdge(edge); 74 | return edge; 75 | } 76 | 77 | /** 78 | * Returns the number of nodes of this graph. 79 | * 80 | * @return the number of nodes of this graph 81 | */ 82 | public int getNNodes() { 83 | return nodes.size(); 84 | } 85 | 86 | /** 87 | * Returns the nodes of this graph. This returns a list whose elements are 88 | * sorted in increasing order of their IDs. 89 | * 90 | * @return the nodes of this graph 91 | */ 92 | public List getNodes() { 93 | return nodes; 94 | } 95 | 96 | /** 97 | * Returns the node of this graph with the specified ID. 98 | * 99 | * @param node the ID of the node to return 100 | * @return the node with the specified ID 101 | */ 102 | public Node getNode(int node) { 103 | assert 0 <= node && node < nodes.size(); 104 | return nodes.get(node); 105 | } 106 | 107 | /** 108 | * Returns the number of edges of this graph. 109 | * 110 | * @return the number of edges of this graph 111 | */ 112 | public int getNEdges() { 113 | return edges.size(); 114 | } 115 | 116 | /** 117 | * Returns the edges of this graph. This returns a list whose elements are 118 | * sorted in increasing order of their IDs. 119 | * 120 | * @return the edges of this graph 121 | */ 122 | public List getEdges() { 123 | return edges; 124 | } 125 | 126 | /** 127 | * Returns the edge of this graph with the specified ID. 128 | * 129 | * @param edge the ID of the edge of return 130 | * @return the edge with the specified ID 131 | */ 132 | public Edge getEdge(int edge) { 133 | assert 0 <= edge && edge < edges.size(); 134 | return edges.get(edge); 135 | } 136 | 137 | /** 138 | * Returns the top nodes of this graph. This returns a list whose elements 139 | * are sorted in increasing order of their IDs. 140 | * 141 | * @return the top nodes of this graph 142 | */ 143 | public List getTops() { 144 | List roots = new LinkedList(); 145 | for (Node node : nodes) { 146 | if (node.isTop) { 147 | roots.add(node); 148 | } 149 | } 150 | return roots; 151 | } 152 | 153 | /** 154 | * Returns the predicates of this graph. This returns a list whose elements 155 | * are sorted in increasing order of their IDs. 156 | * 157 | * @return the predicates of this graph 158 | */ 159 | public List getPreds() { 160 | List preds = new LinkedList(); 161 | for (Node node : nodes) { 162 | if (node.isPred) { 163 | preds.add(node); 164 | } 165 | } 166 | return preds; 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/InspectedGraph.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.graph; 5 | 6 | /** 7 | * Inspect graph-theoretic properties. 8 | * 9 | * @author Marco Kuhlmann 10 | */ 11 | public class InspectedGraph { 12 | 13 | /** 14 | * The analyzed graph. 15 | */ 16 | private final Graph graph; 17 | 18 | /** 19 | * The number of non-wall node of this graph. 20 | */ 21 | private final int nNonWallNodes; 22 | 23 | /** 24 | * DFS of the graph. 25 | */ 26 | private final DFS directedDFS; 27 | 28 | /** 29 | * DFS of the undirected graph. 30 | */ 31 | private final DFS undirectedDFS; 32 | 33 | /** 34 | * Flags indicating whether a node is a singleton. 35 | */ 36 | private final boolean[] isSingleton; 37 | 38 | /** 39 | * The number of singleton nodes in this graph. 40 | */ 41 | private final int nSingletons; 42 | 43 | /** 44 | * Construct a new inspector for the specified graph. 45 | * 46 | * @param graph the graph to be inspected 47 | */ 48 | public InspectedGraph(Graph graph) { 49 | this.graph = graph; 50 | 51 | int nNodes = graph.getNNodes(); 52 | this.nNonWallNodes = nNodes - 1; 53 | this.isSingleton = new boolean[nNodes]; 54 | this.nSingletons = computeSingletons(); 55 | 56 | this.directedDFS = new DFS(graph); 57 | this.undirectedDFS = new DFS(graph, true); 58 | } 59 | 60 | /** 61 | * Returns the number of non-wall nodes of the inspected graph. 62 | * 63 | * @return the number of non-wall nodes of the inspected graph 64 | */ 65 | public int getNNonWallNodes() { 66 | return nNonWallNodes; 67 | } 68 | 69 | /** 70 | * Returns the number of weakly connected components of the inspected graph. 71 | * 72 | * @return The number of weakly connected components of the inspected graph 73 | */ 74 | public int getNComponents() { 75 | return undirectedDFS.getNRuns(); 76 | } 77 | 78 | /** 79 | * Tests whether the inspected graph contains a cycle. 80 | * 81 | * @return {@code true} if and only if the inspected graph contains a cycle 82 | */ 83 | public boolean isCyclic() { 84 | return directedDFS.isCyclic(); 85 | } 86 | 87 | /** 88 | * Computes flags indicating whether a node is a singleton. 89 | */ 90 | private int computeSingletons() { 91 | int n = 0; 92 | for (Node node : graph.getNodes()) { 93 | if (node.id != 0 && !node.hasIncomingEdges() && !node.hasOutgoingEdges() && !node.isTop) { 94 | isSingleton[node.id] = true; 95 | n++; 96 | } 97 | } 98 | return n; 99 | } 100 | 101 | /** 102 | * Tests whether the specified node is a singleton. 103 | * 104 | * @param id a node id 105 | * @return {@code true} if the specified node is a singleton 106 | */ 107 | public boolean isSingleton(int id) { 108 | return isSingleton[id]; 109 | } 110 | 111 | /** 112 | * Returns the number of singleton nodes of this graph. A node is a 113 | * singleton if it has no neighbors and is not a top node. 114 | * 115 | * @return the number of singleton nodes of this graph 116 | */ 117 | public int getNSingletons() { 118 | return nSingletons; 119 | } 120 | 121 | /** 122 | * Computes the maximal indegree of the nodes in the inspected graph. 123 | * 124 | * @return the maximal indegree of the nodes in the inspected graph 125 | */ 126 | public int getMaximalIndegree() { 127 | int max = 0; 128 | for (Node node : graph.getNodes()) { 129 | max = Math.max(max, node.getNIncomingEdges()); 130 | } 131 | return max; 132 | } 133 | 134 | /** 135 | * Computes the maximal outdegree of the nodes in the inspected graph. 136 | * 137 | * @return the maximal outdegree of the nodes in the inspected graph 138 | */ 139 | public int getMaximalOutdegree() { 140 | int max = 0; 141 | for (Node node : graph.getNodes()) { 142 | max = Math.max(max, node.getNOutgoingEdges()); 143 | } 144 | return max; 145 | } 146 | 147 | /** 148 | * Returns the number of root nodes in the inspected graph. A 149 | * root node is a node without incoming edges. The wall node is not 150 | * considered to be a root node. 151 | * 152 | * @return the number of root nodes in the inspected graph 153 | */ 154 | public int getNRootNodes() { 155 | int nRootNodes = 0; 156 | for (Node node : graph.getNodes()) { 157 | nRootNodes += node.hasIncomingEdges() ? 0 : 1; 158 | } 159 | return nRootNodes - 1; // the wall node 160 | } 161 | 162 | /** 163 | * Returns the number of leaf nodes in the inspected graph. A 164 | * leaf node is a node without outgoing edges. The wall node is not 165 | * considered to be a leaf node. 166 | * 167 | * @return the number of leaf nodes in the inspected graph 168 | */ 169 | public int getNLeafNodes() { 170 | int nLeafNodes = 0; 171 | for (Node node : graph.getNodes()) { 172 | nLeafNodes += node.hasOutgoingEdges() ? 0 : 1; 173 | } 174 | return nLeafNodes - 1; // the wall node 175 | } 176 | 177 | /** 178 | * Tests whether the inspected graph is a forest. A forest is an acyclic 179 | * graph in which every node has at most one incoming edge. 180 | * 181 | * @return {@code true} if and only if the inspected graph is a forest 182 | */ 183 | public boolean isForest() { 184 | return !isCyclic() && getMaximalIndegree() <= 1; 185 | } 186 | 187 | /** 188 | * Tests whether the inspected graph is a tree. A tree is a forest with 189 | * exactly one root node. 190 | * 191 | * @return {@code true} if and only if the inspected graph is a tree 192 | */ 193 | public boolean isTree() { 194 | return isForest() && getNRootNodes() - getNSingletons() == 1; 195 | } 196 | 197 | /** 198 | * Tests whether the inspected graph is noncrossing. A graph is noncrossing 199 | * if there are no overlapping edges. 200 | * 201 | * @return {@code true} if and only if the inspected graph is noncrossing 202 | */ 203 | public boolean isNoncrossing() { 204 | for (Edge edge1 : graph.getEdges()) { 205 | int min1 = Math.min(edge1.source, edge1.target); 206 | int max1 = Math.max(edge1.source, edge1.target); 207 | for (Edge edge2 : graph.getEdges()) { 208 | int min2 = Math.min(edge2.source, edge2.target); 209 | int max2 = Math.max(edge2.source, edge2.target); 210 | if (overlap(min1, max1, min2, max2)) { 211 | return false; 212 | } 213 | } 214 | } 215 | return true; 216 | } 217 | 218 | /** 219 | * Tests whether the specified edges overlap (cross). 220 | * 221 | * @param min1 the position of the left node of the first edge 222 | * @param max1 the position of the right node of the first edge 223 | * @param min2 the position of the left node of the second edge 224 | * @param max2 the position of the right node of the second edge 225 | * @return {@code true} if and only if the specified edges overlap 226 | */ 227 | private static boolean overlap(int min1, int max1, int min2, int max2) { 228 | return min1 < min2 && min2 < max1 && max1 < max2 || min2 < min1 && min1 < max2 && max2 < max1; 229 | } 230 | 231 | /** 232 | * Tests whether the inspected graph is projective. A graph is projective if 233 | * it is noncrossing and there are no covered roots. In the context of 234 | * semantic dependency graphs, a root is defined as a non-singleton 235 | * node without incoming edges. 236 | * 237 | * @return {@code true} if and only if the inspected graph is projective 238 | */ 239 | public boolean isProjective() { 240 | if (!isNoncrossing()) { 241 | return false; 242 | } else { 243 | for (Edge edge : graph.getEdges()) { 244 | int min = Math.min(edge.source, edge.target); 245 | int max = Math.max(edge.source, edge.target); 246 | for (int i = min + 1; i < max; i++) { 247 | Node node = graph.getNode(i); 248 | if (!isSingleton(i) && !node.hasIncomingEdges()) { 249 | return false; 250 | } 251 | } 252 | } 253 | return true; 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/graph/Node.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.graph; 5 | 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | 9 | /** 10 | * A node in a semantic dependency graph. 11 | * 12 | * @author Marco Kuhlmann 13 | */ 14 | public class Node { 15 | 16 | /** 17 | * The unique ID of this node. 18 | */ 19 | public final int id; 20 | /** 21 | * The list of incoming edges of this node. 22 | */ 23 | public final List incomingEdges; 24 | /** 25 | * The list of outgoing edges of this node. 26 | */ 27 | public final List outgoingEdges; 28 | /** 29 | * The word form associated with this node. 30 | */ 31 | public final String form; 32 | /** 33 | * The lemma associated with this node. 34 | */ 35 | public final String lemma; 36 | /** 37 | * The part-of-speech tag associated with this node. 38 | */ 39 | public final String pos; 40 | /** 41 | * Whether this node is a top node. 42 | */ 43 | public final boolean isTop; 44 | /** 45 | * Whether this node is a predicate. 46 | */ 47 | public final boolean isPred; 48 | /** 49 | * The sense or frame of this predicate. 50 | */ 51 | public final String sense; 52 | 53 | /** 54 | * Construct a new node. 55 | * 56 | * @param id the unique ID of the new node 57 | * @param form the word form to be associated with the new node 58 | * @param lemma the lemma to be associated with the new node 59 | * @param pos the part-of-speech tag to be associated with the new node 60 | * @param isTop a flag indicating whether the new node is a top node 61 | * @param isPred a flag indicating whether the new node is a predicate 62 | * @param sense the sense or frame to be associated with the new node 63 | */ 64 | public Node(int id, String form, String lemma, String pos, boolean isTop, boolean isPred, String sense) { 65 | this.id = id; 66 | this.incomingEdges = new LinkedList(); 67 | this.outgoingEdges = new LinkedList(); 68 | this.form = form; 69 | this.lemma = lemma; 70 | this.pos = pos; 71 | this.isTop = isTop; 72 | this.isPred = isPred; 73 | this.sense = sense; 74 | } 75 | 76 | /** 77 | * Adds the specified edge as an incoming edge of this node. 78 | * 79 | * @param edge the edge to be added as an incoming edge 80 | * @return the newly added edge 81 | */ 82 | public Edge addIncomingEdge(Edge edge) { 83 | incomingEdges.add(edge); 84 | return edge; 85 | } 86 | 87 | /** 88 | * Tests whether this node has any incoming edges. 89 | * 90 | * @return {@code true} if this node has incoming edges; {@code false} 91 | * otherwise 92 | */ 93 | public boolean hasIncomingEdges() { 94 | return !incomingEdges.isEmpty(); 95 | } 96 | 97 | /** 98 | * Returns the number of incoming edges of this node. 99 | * 100 | * @return the number of incoming edges of this node 101 | */ 102 | public int getNIncomingEdges() { 103 | return incomingEdges.size(); 104 | } 105 | 106 | /** 107 | * Returns the incoming edges of this node. 108 | * 109 | * @return the incoming edges of this node 110 | */ 111 | public List getIncomingEdges() { 112 | return incomingEdges; 113 | } 114 | 115 | /** 116 | * Adds the specified edge as an outgoing edge of this node. 117 | * 118 | * @param edge the edge to be added as an outgoing edge 119 | * @return the newly added edge 120 | */ 121 | public Edge addOutgoingEdge(Edge edge) { 122 | outgoingEdges.add(edge); 123 | return edge; 124 | } 125 | 126 | /** 127 | * Tests whether this node has any outgoing edges. 128 | * 129 | * @return {@code true} if this node has any outgoing edges; {@code false} 130 | * otherwise 131 | */ 132 | public boolean hasOutgoingEdges() { 133 | return !outgoingEdges.isEmpty(); 134 | } 135 | 136 | /** 137 | * Returns the number of outgoing edges of this node. 138 | * 139 | * @return the number of outgoing edges of this node 140 | */ 141 | public int getNOutgoingEdges() { 142 | return outgoingEdges.size(); 143 | } 144 | 145 | /** 146 | * Returns the outgoing edges of this node. 147 | * 148 | * @return the outgoing edges of this node. 149 | */ 150 | public List getOutgoingEdges() { 151 | return outgoingEdges; 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/Constants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | /** 7 | * Constants related to CoNLL-type data formats. 8 | * 9 | * @author Marco Kuhlmann 10 | */ 11 | public class Constants { 12 | 13 | /** 14 | * The string that is used to separate data columns. 15 | */ 16 | public static final String COLUMN_SEPARATOR = "\t"; 17 | /** 18 | * The string that represents undefined values. 19 | */ 20 | public static final String UNDEFINED = "_"; 21 | /** 22 | * The word form associated with the wall token. 23 | */ 24 | public static final String WALL_FORM = "$$_FORM"; 25 | /** 26 | * The lemma associated with the wall token. 27 | */ 28 | public static final String WALL_LEMMA = "$$_LEMMA"; 29 | /** 30 | * The part-of-speech tag associated with the wall token. 31 | */ 32 | public static final String WALL_POS = "$$_POS"; 33 | /** 34 | * The sense or frame associated with the wall token. 35 | */ 36 | public static final String WALL_SENSE = "$$_SENSE"; 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.IOException; 7 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 8 | 9 | /** 10 | * Read semantic dependency graphs from a file. 11 | * 12 | * @author Marco Kuhlmann 13 | */ 14 | public interface GraphReader { 15 | 16 | /** 17 | * Reads a single graph. 18 | * 19 | * @return the graph read, or {@code null} if the end of the stream has been 20 | * reached 21 | * @throws IOException if an I/O error occurs 22 | */ 23 | abstract public Graph readGraph() throws IOException; 24 | 25 | /** 26 | * Closes the stream and releases any system resources associated with it. 27 | * 28 | * @throws IOException if an I/O error occurs 29 | */ 30 | abstract public void close() throws IOException; 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader2014.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.File; 7 | import java.io.FileNotFoundException; 8 | import java.io.IOException; 9 | import java.io.Reader; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 14 | 15 | /** 16 | * Read semantic dependency graphs in the SDP 2014 format. The format is 17 | * specified 18 | * here. 19 | * 20 | * @author Marco Kuhlmann 21 | */ 22 | public class GraphReader2014 extends ParagraphReader implements GraphReader { 23 | 24 | /** 25 | * Create a graph reader, using the default input-buffer size. 26 | * 27 | * @param reader a Reader object to provide the underlying stream 28 | */ 29 | public GraphReader2014(Reader reader) { 30 | super(reader); 31 | } 32 | 33 | /** 34 | * Create a graph reader that reads from the specified file. The file will 35 | * be read using the default input-buffer size. 36 | * 37 | * @param file the file to read from 38 | * @throws FileNotFoundException if the specified file does not exist, is a 39 | * directory rather than a regular file, or for some other reason cannot be 40 | * opened for reading 41 | */ 42 | public GraphReader2014(File file) throws FileNotFoundException { 43 | super(file); 44 | } 45 | 46 | /** 47 | * Create a graph reader that reads from the specified file. The file will 48 | * be read using the default input-buffer size. 49 | * 50 | * @param fileName the name of the file to read from 51 | * @throws FileNotFoundException if the specified file does not exist, is a 52 | * directory rather than a regular file, or for some other reason cannot be 53 | * opened for reading 54 | */ 55 | public GraphReader2014(String fileName) throws FileNotFoundException { 56 | super(fileName); 57 | } 58 | 59 | /** 60 | * Reads a single graph. 61 | * 62 | * @return the graph read, or {@code null} if the end of the stream has been 63 | * reached 64 | * @throws IOException if an I/O error occurs 65 | */ 66 | @Override 67 | public Graph readGraph() throws IOException { 68 | List lines = super.readParagraph(); 69 | if (lines == null) { 70 | return null; 71 | } else { 72 | // Every graph should contain at least one token. 73 | assert lines.size() >= 2; 74 | // Assert the format of the graph ID. 75 | assert lines.get(0).matches("#2[0-9]{7}$"); 76 | 77 | Graph graph = new Graph(lines.get(0)); 78 | 79 | // Add the wall node. 80 | graph.addNode(Constants.WALL_FORM, Constants.WALL_LEMMA, Constants.WALL_POS, false, false, Constants.WALL_SENSE); 81 | 82 | // Add the token nodes to the graph and collect a list of predicates. 83 | List predicates = new ArrayList(); 84 | for (String line : lines.subList(1, lines.size())) { 85 | String[] tokens = line.split(Constants.COLUMN_SEPARATOR); 86 | 87 | // There should be at least six columns: ID, FORM, LEMMA, POS, TOP, PRED 88 | assert tokens.length >= 6; 89 | // Enforce valid values for the TOP column. 90 | assert tokens[4].equals("+") || tokens[4].equals("-"); 91 | // Enforce valid values for the PRED column. 92 | assert tokens[5].equals("+") || tokens[5].equals("-"); 93 | 94 | String form = tokens[1]; 95 | String lemma = tokens[2]; 96 | String pos = tokens[3]; 97 | boolean isTop = tokens[4].equals("+"); 98 | boolean isPred = tokens[5].equals("+"); 99 | 100 | Node node = graph.addNode(form, lemma, pos, isTop, isPred, Constants.UNDEFINED); 101 | // Make sure that the node ID equals the value of the ID column. 102 | assert node.id == Integer.parseInt(tokens[0]); 103 | 104 | if (node.isPred) { 105 | predicates.add(node.id); 106 | } 107 | } 108 | 109 | // Add the edges to the graph. 110 | int id = 1; 111 | for (String line : lines.subList(1, lines.size())) { 112 | String[] tokens = line.split(Constants.COLUMN_SEPARATOR); 113 | 114 | // There should be exactly 6 + number of predicates many columns. 115 | assert tokens.length == 6 + predicates.size(); 116 | 117 | for (int i = 6; i < tokens.length; i++) { 118 | if (!tokens[i].equals(Constants.UNDEFINED)) { 119 | graph.addEdge(predicates.get(i - 6), id, tokens[i]); 120 | } 121 | } 122 | id++; 123 | } 124 | 125 | // If a node is labeled as a PRED, it should have outgoing edges. 126 | for (Node node : graph.getNodes()) { 127 | assert !node.isPred || node.hasOutgoingEdges(); 128 | } 129 | 130 | return graph; 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphReader2015.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.File; 7 | import java.io.FileNotFoundException; 8 | import java.io.IOException; 9 | import java.io.Reader; 10 | import java.io.UncheckedIOException; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 15 | 16 | /** 17 | * Read semantic dependency graphs in the SDP 2015 format. The format is 18 | * specified 19 | * here. 20 | * 21 | * @author Marco Kuhlmann 22 | */ 23 | public class GraphReader2015 extends ParagraphReader implements GraphReader { 24 | 25 | /** 26 | * Create a graph reader, using the default input-buffer size. 27 | * 28 | * @param reader a Reader object to provide the underlying stream 29 | */ 30 | public GraphReader2015(Reader reader) { 31 | super(reader); 32 | readFirstLine(); 33 | } 34 | 35 | /** 36 | * Create a graph reader that reads from the specified file. The file will 37 | * be read using the default input-buffer size. 38 | * 39 | * @param file the file to read from 40 | * @throws FileNotFoundException if the specified file does not exist, is a 41 | * directory rather than a regular file, or for some other reason cannot be 42 | * opened for reading 43 | */ 44 | public GraphReader2015(File file) throws FileNotFoundException { 45 | super(file); 46 | readFirstLine(); 47 | } 48 | 49 | /** 50 | * Create a graph reader that reads from the specified file. The file will 51 | * be read using the default input-buffer size. 52 | * 53 | * @param fileName the name of the file to read from 54 | * @throws FileNotFoundException if the specified file does not exist, is a 55 | * directory rather than a regular file, or for some other reason cannot be 56 | * opened for reading 57 | */ 58 | public GraphReader2015(String fileName) throws FileNotFoundException { 59 | super(fileName); 60 | readFirstLine(); 61 | } 62 | 63 | /** 64 | * Reads the format identifier line. 65 | */ 66 | private void readFirstLine() { 67 | try { 68 | String line = super.readLine(); 69 | assert line.equals("#SDP 2015"); 70 | } catch (IOException e) { 71 | throw new UncheckedIOException(e); 72 | } 73 | } 74 | 75 | /** 76 | * Reads a single graph. 77 | * 78 | * @return the graph read, or {@code null} if the end of the stream has been 79 | * reached 80 | * @throws IOException if an I/O error occurs 81 | */ 82 | @Override 83 | public Graph readGraph() throws IOException { 84 | List lines = super.readParagraph(); 85 | if (lines == null) { 86 | return null; 87 | } else { 88 | // Every graph should contain at least one token. 89 | assert lines.size() >= 2; 90 | // Assert the format of the graph ID. 91 | assert lines.get(0).matches("#2[0-9]{7}$"); 92 | 93 | Graph graph = new Graph(lines.get(0)); 94 | 95 | // Add the wall node. 96 | graph.addNode(Constants.WALL_FORM, Constants.WALL_LEMMA, Constants.WALL_POS, false, false, Constants.WALL_SENSE); 97 | 98 | // Add the token nodes to the graph and collect a list of predicates. 99 | List predicates = new ArrayList(); 100 | for (String line : lines.subList(1, lines.size())) { 101 | String[] tokens = line.split(Constants.COLUMN_SEPARATOR); 102 | 103 | // There should be at least seven columns: ID, FORM, LEMMA, POS, TOP, PRED, SENSE 104 | assert tokens.length >= 7; 105 | // Enforce valid values for the TOP column. 106 | assert tokens[4].equals("+") || tokens[4].equals("-"); 107 | // Enforce valid values for the PRED column. 108 | assert tokens[5].equals("+") || tokens[5].equals("-"); 109 | 110 | String form = tokens[1]; 111 | String lemma = tokens[2]; 112 | String pos = tokens[3]; 113 | boolean isTop = tokens[4].equals("+"); 114 | boolean isPred = tokens[5].equals("+"); 115 | String sense = tokens[6]; 116 | 117 | Node node = graph.addNode(form, lemma, pos, isTop, isPred, sense); 118 | // Make sure that the node ID equals the value of the ID column. 119 | assert node.id == Integer.parseInt(tokens[0]); 120 | 121 | if (node.isPred) { 122 | predicates.add(node.id); 123 | } 124 | } 125 | 126 | // Add the edges to the graph. 127 | int id = 1; 128 | for (String line : lines.subList(1, lines.size())) { 129 | String[] tokens = line.split(Constants.COLUMN_SEPARATOR); 130 | 131 | // There should be exactly 7 + number of predicates many columns. 132 | assert tokens.length == 7 + predicates.size(); 133 | 134 | for (int i = 7; i < tokens.length; i++) { 135 | if (!tokens[i].equals(Constants.UNDEFINED)) { 136 | graph.addEdge(predicates.get(i - 7), id, tokens[i]); 137 | } 138 | } 139 | id++; 140 | } 141 | 142 | // If a node is labeled as a PRED, it should have outgoing edges. 143 | for (Node node : graph.getNodes()) { 144 | assert !node.isPred || node.hasOutgoingEdges(); 145 | } 146 | 147 | return graph; 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.IOException; 7 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 8 | 9 | /** 10 | * Write semantic dependency graphs to a file. 11 | * 12 | * @author Marco Kuhlmann 13 | */ 14 | public interface GraphWriter { 15 | 16 | /** 17 | * Writes a single graph. 18 | * 19 | * @param graph the graph to be written 20 | * @throws IOException if an I/O error occurs 21 | */ 22 | abstract public void writeGraph(Graph graph) throws IOException; 23 | 24 | /** 25 | * Closes the stream and releases any system resources associated with it. 26 | * 27 | * @throws IOException if an I/O error occurs 28 | */ 29 | abstract public void close() throws IOException; 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter2014.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.File; 8 | import java.io.FileWriter; 9 | import java.io.IOException; 10 | import java.io.PrintWriter; 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge; 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 14 | 15 | /** 16 | * Write semantic dependency graphs in the SDP 2014 format. The format is 17 | * specified 18 | * here. 19 | * 20 | * @author Marco Kuhlmann 21 | */ 22 | public class GraphWriter2014 implements GraphWriter { 23 | 24 | /** 25 | * The low-level writer. 26 | */ 27 | private final PrintWriter writer; 28 | 29 | /** 30 | * Create a graph writer that writes to the specified PrintWriter. 31 | * 32 | * @param writer the PrintWriter to be written to 33 | */ 34 | public GraphWriter2014(PrintWriter writer) { 35 | this.writer = writer; 36 | } 37 | 38 | /** 39 | * Create a graph writer that writes to the specified file. 40 | * 41 | * @param file the file to write to 42 | * @throws IOException if the specified file does not exist, is a directory 43 | * rather than a regular file, or for some other reason cannot be opened for 44 | * writing 45 | */ 46 | public GraphWriter2014(File file) throws IOException { 47 | this(new PrintWriter(new BufferedWriter(new FileWriter(file)))); 48 | } 49 | 50 | /** 51 | * Create a graph writer that writes to the specified file. 52 | * 53 | * @param fileName the name of the file to read from 54 | * @throws IOException if the specified file does not exist, is a directory 55 | * rather than a regular file, or for some other reason cannot be opened for 56 | * writing 57 | */ 58 | public GraphWriter2014(String fileName) throws IOException { 59 | this(new File(fileName)); 60 | } 61 | 62 | /** 63 | * Writes a single graph. 64 | * 65 | * @param graph the graph to be written 66 | * @throws IOException if an I/O error occurs 67 | */ 68 | @Override 69 | public void writeGraph(Graph graph) throws IOException { 70 | int nNodes = graph.getNNodes(); 71 | 72 | String[][] labels = new String[nNodes][nNodes]; 73 | for (Edge edge : graph.getEdges()) { 74 | labels[edge.source][edge.target] = edge.label; 75 | } 76 | 77 | writer.println(graph.id); 78 | 79 | for (Node node : graph.getNodes()) { 80 | if (node.id > 0) { 81 | StringBuilder sb = new StringBuilder(); 82 | // Field 1: ID 83 | sb.append(Integer.toString(node.id)); 84 | sb.append(Constants.COLUMN_SEPARATOR); 85 | // Field 2: FORM 86 | sb.append(node.form); 87 | sb.append(Constants.COLUMN_SEPARATOR); 88 | // Field 3: LEMMA 89 | sb.append(node.lemma); 90 | sb.append(Constants.COLUMN_SEPARATOR); 91 | // Field 4: POS 92 | sb.append(node.pos); 93 | sb.append(Constants.COLUMN_SEPARATOR); 94 | // Field 5: TOP 95 | sb.append(node.isTop ? "+" : "-"); 96 | sb.append(Constants.COLUMN_SEPARATOR); 97 | // Field 6: PRED 98 | sb.append(node.isPred ? "+" : "-"); 99 | 100 | for (Node source : graph.getNodes().subList(1, nNodes)) { 101 | if (source.isPred) { 102 | sb.append(Constants.COLUMN_SEPARATOR); 103 | String label = labels[source.id][node.id]; 104 | if (label == null) { 105 | sb.append(Constants.UNDEFINED); 106 | } else { 107 | sb.append(label); 108 | } 109 | } 110 | } 111 | 112 | writer.println(sb.toString()); 113 | } 114 | } 115 | 116 | writer.println(); 117 | } 118 | 119 | /** 120 | * Closes the stream and releases any system resources associated with it. 121 | * 122 | * @throws IOException if an I/O error occurs 123 | */ 124 | @Override 125 | public void close() throws IOException { 126 | writer.close(); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/GraphWriter2015.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.File; 8 | import java.io.FileWriter; 9 | import java.io.IOException; 10 | import java.io.PrintWriter; 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge; 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 14 | 15 | /** 16 | * Write semantic dependency graphs in the SDP 2015 format. The format is 17 | * specified 18 | * here. 19 | * 20 | * @author Marco Kuhlmann 21 | */ 22 | public class GraphWriter2015 implements GraphWriter { 23 | 24 | /** 25 | * The low-level writer. 26 | */ 27 | private final PrintWriter writer; 28 | 29 | /** 30 | * Create a graph writer that writes to the specified PrintWriter. 31 | * 32 | * @param writer the PrintWriter to be written to 33 | */ 34 | public GraphWriter2015(PrintWriter writer) { 35 | this.writer = writer; 36 | writeFirstLine(); 37 | } 38 | 39 | /** 40 | * Create a graph writer that writes to the specified file. 41 | * 42 | * @param file the file to write to 43 | * @throws IOException if the specified file does not exist, is a directory 44 | * rather than a regular file, or for some other reason cannot be opened for 45 | * writing 46 | */ 47 | public GraphWriter2015(File file) throws IOException { 48 | this(new PrintWriter(new BufferedWriter(new FileWriter(file)))); 49 | } 50 | 51 | /** 52 | * Create a graph writer that writes to the specified file. 53 | * 54 | * @param fileName the name of the file to read from 55 | * @throws IOException if the specified file does not exist, is a directory 56 | * rather than a regular file, or for some other reason cannot be opened for 57 | * writing 58 | */ 59 | public GraphWriter2015(String fileName) throws IOException { 60 | this(new File(fileName)); 61 | } 62 | 63 | /** 64 | * Writes the format identifier line. 65 | */ 66 | private void writeFirstLine() { 67 | writer.println("#SDP 2015"); 68 | } 69 | 70 | /** 71 | * Writes a single graph. 72 | * 73 | * @param graph the graph to be written 74 | * @throws IOException if an I/O error occurs 75 | */ 76 | @Override 77 | public void writeGraph(Graph graph) throws IOException { 78 | int nNodes = graph.getNNodes(); 79 | 80 | String[][] labels = new String[nNodes][nNodes]; 81 | for (Edge edge : graph.getEdges()) { 82 | labels[edge.source][edge.target] = edge.label; 83 | } 84 | 85 | writer.println(graph.id); 86 | 87 | for (Node node : graph.getNodes()) { 88 | if (node.id > 0) { 89 | StringBuilder sb = new StringBuilder(); 90 | // Field 1: ID 91 | sb.append(Integer.toString(node.id)); 92 | sb.append(Constants.COLUMN_SEPARATOR); 93 | // Field 2: FORM 94 | sb.append(node.form); 95 | sb.append(Constants.COLUMN_SEPARATOR); 96 | // Field 3: LEMMA 97 | sb.append(node.lemma); 98 | sb.append(Constants.COLUMN_SEPARATOR); 99 | // Field 4: POS 100 | sb.append(node.pos); 101 | sb.append(Constants.COLUMN_SEPARATOR); 102 | // Field 5: TOP 103 | sb.append(node.isTop ? "+" : "-"); 104 | sb.append(Constants.COLUMN_SEPARATOR); 105 | // Field 6: PRED 106 | sb.append(node.isPred ? "+" : "-"); 107 | sb.append(Constants.COLUMN_SEPARATOR); 108 | // Field 7: SENSE 109 | sb.append(node.sense); 110 | 111 | for (Node source : graph.getNodes().subList(1, nNodes)) { 112 | if (source.isPred) { 113 | sb.append(Constants.COLUMN_SEPARATOR); 114 | String label = labels[source.id][node.id]; 115 | if (label == null) { 116 | sb.append(Constants.UNDEFINED); 117 | } else { 118 | sb.append(label); 119 | } 120 | } 121 | } 122 | 123 | writer.println(sb.toString()); 124 | } 125 | } 126 | 127 | writer.println(); 128 | } 129 | 130 | /** 131 | * Closes the stream and releases any system resources associated with it. 132 | * 133 | * @throws IOException if an I/O error occurs 134 | */ 135 | public void close() throws IOException { 136 | writer.close(); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/io/ParagraphReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.io; 5 | 6 | import java.io.File; 7 | import java.io.FileNotFoundException; 8 | import java.io.FileReader; 9 | import java.io.IOException; 10 | import java.io.LineNumberReader; 11 | import java.io.Reader; 12 | import java.util.Collections; 13 | import java.util.LinkedList; 14 | import java.util.List; 15 | 16 | /** 17 | * Reads paragraphs separated by blank lines from a character-input stream. 18 | * 19 | * @author Marco Kuhlmann 20 | */ 21 | public class ParagraphReader extends LineNumberReader { 22 | 23 | /** 24 | * Create a paragraph reader, using the default input-buffer size. 25 | * 26 | * @param reader a Reader object to provide the underlying stream 27 | */ 28 | public ParagraphReader(Reader reader) { 29 | super(reader); 30 | } 31 | 32 | /** 33 | * Create a paragraph reader, reading characters into a buffer of the given 34 | * size. 35 | * 36 | * @param reader a Reader object to provide the underlying stream 37 | * @param sz the size of the buffer 38 | */ 39 | public ParagraphReader(Reader reader, int sz) { 40 | super(reader, sz); 41 | } 42 | 43 | /** 44 | * Create a paragraph reader that reads from the specified file. The file 45 | * will be read using the default input-buffer size. 46 | * 47 | * @param file the file to read from 48 | * @throws FileNotFoundException if the specified file does not exist, is a 49 | * directory rather than a regular file, or for some other reason cannot be 50 | * opened for reading 51 | */ 52 | public ParagraphReader(File file) throws FileNotFoundException { 53 | super(new FileReader(file)); 54 | } 55 | 56 | /** 57 | * Create a paragraph reader that reads from the specified file. The file 58 | * will be read using the default input-buffer size. 59 | * 60 | * @param fileName the name of the file to read from 61 | * @throws FileNotFoundException if the specified file does not exist, is a 62 | * directory rather than a regular file, or for some other reason cannot be 63 | * opened for reading 64 | */ 65 | public ParagraphReader(String fileName) throws FileNotFoundException { 66 | super(new FileReader(fileName)); 67 | } 68 | 69 | /** 70 | * Reads a single paragraph. A paragraph is a list of lines terminated by a 71 | * blank (empty) line, or the end of the stream. 72 | * 73 | * @return the lines of the paragraph read, or {@code null} if the end of 74 | * the stream has been reached 75 | * @throws IOException if an I/O error occurs 76 | */ 77 | public List readParagraph() throws IOException { 78 | String line = super.readLine(); 79 | if (line == null) { 80 | return null; 81 | } else { 82 | if (line.isEmpty()) { 83 | return Collections.emptyList(); 84 | } else { 85 | List lines = new LinkedList(); 86 | do { 87 | lines.add(line); 88 | } while ((line = super.readLine()) != null && !line.isEmpty()); 89 | return lines; 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Analyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import java.io.InputStreamReader; 7 | import java.text.NumberFormat; 8 | import java.util.HashSet; 9 | import java.util.Locale; 10 | import java.util.Set; 11 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge; 12 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.InspectedGraph; 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 15 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 16 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 17 | 18 | /** 19 | * Print statistics about a collection of graphs. 20 | * 21 | * @author Marco Kuhlmann 22 | */ 23 | public class Analyzer { 24 | 25 | // The number of graphs read. 26 | private int nGraphs; 27 | 28 | // The number of non-wall nodes seen. 29 | private int nNonWallNodes; 30 | 31 | // The number of edges seen. 32 | private int nEdges; 33 | 34 | // Set of labels seen. 35 | private final Set labels = new HashSet(); 36 | 37 | // The number of singleton nodes. 38 | private int nSingletons; 39 | 40 | // Number of cyclic graphs. 41 | private int nCyclic; 42 | 43 | // Number of forests. 44 | private int nForests; 45 | 46 | // Number of trees. 47 | private int nTrees; 48 | 49 | // Number of graphs that are semi-connected. 50 | private int nFragmented; 51 | 52 | // Number of nodes that have more than one incoming edge. 53 | private int nReentrantNodes; 54 | 55 | // Number of topless graphs. 56 | private int nToplessGraphs; 57 | 58 | // Number of top nodes. 59 | private int nTopNodes; 60 | 61 | // Number of special nodes. 62 | private int nSpecialNodes; 63 | 64 | // Number of noncrossing graphs. 65 | private int nNoncrossingGraphs; 66 | 67 | // Number of projective graphs. 68 | private int nProjectiveGraphs; 69 | 70 | // Set of senses seen. 71 | private final Set senses = new HashSet(); 72 | 73 | // Number of (non-singleton) nodes with senses. 74 | private int nScorablePredicates; 75 | 76 | /** 77 | * Updates the statistics with the specified graph. 78 | * 79 | * @param graph a semantic dependency graph 80 | */ 81 | public void update(Graph graph) { 82 | InspectedGraph inspectedGraph = new InspectedGraph(graph); 83 | 84 | // number of graphs 85 | nGraphs++; 86 | 87 | // number of non-wall nodes 88 | nNonWallNodes += inspectedGraph.getNNonWallNodes(); 89 | 90 | // number of edges 91 | nEdges += graph.getNEdges(); 92 | 93 | // distinct labels 94 | for (Edge edge : graph.getEdges()) { 95 | labels.add(edge.label); 96 | } 97 | 98 | // number of singleton nodes 99 | nSingletons += inspectedGraph.getNSingletons(); 100 | 101 | // number of cyclic graphs 102 | nCyclic += inspectedGraph.isCyclic() ? 1 : 0; 103 | 104 | // number of forests 105 | nForests += inspectedGraph.isForest() ? 1 : 0; 106 | 107 | // number of trees 108 | nTrees += inspectedGraph.isTree() ? 1 : 0; 109 | 110 | // number of graphs that are fragmented 111 | nFragmented += inspectedGraph.getNComponents() - 1 - inspectedGraph.getNSingletons() == 1 ? 0 : 1; 112 | 113 | // number of reentrant nodes 114 | for (Node node : graph.getNodes()) { 115 | nReentrantNodes += node.getNIncomingEdges() > 1 ? 1 : 0; 116 | } 117 | 118 | // number of topless graphs 119 | boolean isTopless = true; 120 | for (Node node : graph.getNodes()) { 121 | isTopless = isTopless && !node.isTop; 122 | } 123 | nToplessGraphs += isTopless ? 1 : 0; 124 | 125 | // number of top nodes 126 | for (Node node : graph.getNodes()) { 127 | nTopNodes += node.isTop ? 1 : 0; 128 | } 129 | 130 | // number of special nodes 131 | for (Node node : graph.getNodes()) { 132 | nSpecialNodes += node.id != 0 && !inspectedGraph.isSingleton(node.id) && !node.hasIncomingEdges() && !node.isTop ? 1 : 0; 133 | } 134 | 135 | // number of noncrossing graphs 136 | nNoncrossingGraphs += inspectedGraph.isNoncrossing() ? 1 : 0; 137 | 138 | // number of projective graphs 139 | nProjectiveGraphs += inspectedGraph.isProjective() ? 1 : 0; 140 | 141 | // number of senses 142 | for (Node node : graph.getNodes()) { 143 | if (node.id != 0 && !inspectedGraph.isSingleton(node.id) && node.isPred && node.pos.startsWith("V") && !node.sense.equals("_")) { 144 | senses.add(node.sense); 145 | nScorablePredicates += 1; 146 | } 147 | } 148 | } 149 | 150 | /** 151 | * Prints statistics about a set of graphs. 152 | * 153 | * @param args names of files from which to read graphs 154 | * @throws Exception if an I/O exception occurs 155 | */ 156 | public static void main(String[] args) throws Exception { 157 | Analyzer analyzer = new Analyzer(); 158 | GraphReader reader = new GraphReader2015(new InputStreamReader(System.in)); 159 | Graph graph; 160 | while ((graph = reader.readGraph()) != null) { 161 | analyzer.update(graph); 162 | } 163 | reader.close(); 164 | System.err.format("number of labels:\t%d%n", analyzer.labels.size()); 165 | System.err.format("percentage of singletons:\t%s%n", percentage(analyzer.nSingletons, analyzer.nNonWallNodes)); 166 | System.err.format("edge density:\t%s%n", fraction(analyzer.nEdges, analyzer.nNonWallNodes - analyzer.nSingletons, 2)); 167 | System.err.format("percentage of graphs that are trees:\t%s%n", percentage(analyzer.nTrees, analyzer.nGraphs)); 168 | System.err.format("percentage of graphs that are projective:\t%s%n", percentage(analyzer.nProjectiveGraphs, analyzer.nGraphs)); 169 | System.err.format("percentage of graphs that are fragmented:\t%s%n", percentage(analyzer.nFragmented, analyzer.nGraphs)); 170 | System.err.format("percentage of nodes that have reentrancies:\t%s%n", percentage(analyzer.nReentrantNodes, analyzer.nNonWallNodes - analyzer.nSingletons)); 171 | System.err.format("percentage of graphs that are topless:\t%s%n", percentage(analyzer.nToplessGraphs, analyzer.nGraphs)); 172 | System.err.format("number of top nodes per graph:\t%s%n", fraction(analyzer.nTopNodes, analyzer.nGraphs)); 173 | System.err.format("percentage of nodes that are non-top roots:\t%s%n", percentage(analyzer.nSpecialNodes, analyzer.nNonWallNodes - analyzer.nSingletons)); 174 | System.err.format("number of senses:\t%d%n", analyzer.senses.size()); 175 | System.err.format("percentage of predicates with senses:\t%s%n", percentage(analyzer.nScorablePredicates, analyzer.nNonWallNodes - analyzer.nSingletons)); 176 | } 177 | 178 | public static String fraction(int a, int b, int digits) { 179 | return String.format(String.format("%%.%df", digits), (double) a / (double) b); 180 | } 181 | 182 | public static String fraction(int a, int b) { 183 | return fraction(a, b, 4); 184 | } 185 | 186 | public static String percentage(int enumerator, int denominator) { 187 | return String.format("%.2f", (double) enumerator / (double) denominator * 100); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/BasicAnalyzer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 7 | import se.liu.ida.nlp.sdp.toolkit.graph.InspectedGraph; 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 10 | 11 | /** 12 | * Print some basic statistics about a collection of graphs. 13 | * 14 | * @author Marco Kuhlmann 15 | */ 16 | public class BasicAnalyzer { 17 | 18 | public static void main(String[] args) throws Exception { 19 | for (String arg : args) { 20 | GraphReader reader = new GraphReader2015(arg); 21 | int nGraphs = 0; 22 | int nTokens = 0; 23 | int nCyclic = 0; 24 | int nForests = 0; 25 | int nTrees = 0; 26 | int nProjective = 0; 27 | int maxIndegree = 0; 28 | int maxOutdegree = 0; 29 | Graph graph; 30 | while ((graph = reader.readGraph()) != null) { 31 | InspectedGraph analyzer = new InspectedGraph(graph); 32 | nCyclic += analyzer.isCyclic() ? 1 : 0; 33 | nForests += analyzer.isForest() ? 1 : 0; 34 | nTrees += analyzer.isTree() ? 1 : 0; 35 | nProjective += analyzer.isProjective() ? 1 : 0; 36 | maxIndegree = Math.max(maxIndegree, analyzer.getMaximalIndegree()); 37 | maxOutdegree = Math.max(maxOutdegree, analyzer.getMaximalOutdegree()); 38 | nGraphs++; 39 | nTokens += graph.getNNodes() - 1; 40 | } 41 | reader.close(); 42 | System.out.format("%s: %d graphs, %d tokens%n", arg, nGraphs, nTokens); 43 | System.out.format(" cyclic = %d%n", nCyclic); 44 | System.out.format(" forests = %d%n", nForests); 45 | System.out.format(" trees = %d%n", nTrees); 46 | System.out.format(" projective = %d%n", nProjective); 47 | System.out.format(" max indegree = %d%n", maxIndegree); 48 | System.out.format(" max outdegree = %d%n", maxOutdegree); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/IOTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 7 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter; 10 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter2015; 11 | 12 | /** 13 | * Test the graph I/O. 14 | * 15 | * @author Marco Kuhlmann 16 | */ 17 | public class IOTest { 18 | 19 | public static void main(String[] args) throws Exception { 20 | for (String arg : args) { 21 | GraphReader reader = new GraphReader2015(arg); 22 | GraphWriter writer = new GraphWriter2015(arg + ".out"); 23 | int nGraphs = 0; 24 | int nTokens = 0; 25 | Graph graph; 26 | while ((graph = reader.readGraph()) != null) { 27 | nGraphs++; 28 | nTokens += graph.getNNodes() - 1; 29 | writer.writeGraph(graph); 30 | } 31 | reader.close(); 32 | writer.close(); 33 | System.out.format("%s: %d graphs, %d tokens%n", arg, nGraphs, nTokens); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/PairedBootstrap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.HashSet; 10 | import java.util.List; 11 | import java.util.Random; 12 | import java.util.Set; 13 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge; 14 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 15 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 16 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 17 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 18 | 19 | /** 20 | * 21 | * @author Marco Kuhlmann 22 | */ 23 | public class PairedBootstrap { 24 | 25 | private static final String VIRTUAL_LABEL = "-VIRTUAL-"; 26 | private static final int B = 10000; 27 | private static final int SEED = 42; 28 | private static final Random R = new Random(SEED); 29 | 30 | public static void main(String[] args) throws Exception { 31 | List graphs0 = readGraphs(args[0]); 32 | List graphs1 = readGraphs(args[1]); // baseline 33 | List graphs2 = readGraphs(args[2]); // system 34 | 35 | assert graphs0.size() == graphs1.size(); 36 | assert graphs0.size() == graphs2.size(); 37 | 38 | int n = graphs0.size(); 39 | 40 | List items = new ArrayList(n); 41 | for (int i = 0; i < n; i++) { 42 | items.add(new Item(graphs0.get(i), graphs1.get(i), graphs2.get(i))); 43 | } 44 | 45 | double delta = getDelta(items); 46 | if (delta == 0) { 47 | System.err.println("Baseline and system have the same performance!"); 48 | System.exit(0); 49 | } 50 | if (delta < 0) { 51 | System.err.print("Baseline beats system!"); 52 | System.exit(0); 53 | } 54 | 55 | System.err.print("System beats baseline by "); 56 | System.err.format("%f LF.%n", Math.abs(delta)); 57 | 58 | System.err.println("Q: Could it be that the victory was just a random fluke?"); 59 | 60 | double p = getP(items); 61 | 62 | if (p < 0.05) { 63 | System.err.print("A: No; the difference is most probably real"); 64 | } else { 65 | System.err.print("A: Yes; this is probable"); 66 | } 67 | System.err.format(" (p = %f)%n", p); 68 | System.exit(0); 69 | } 70 | 71 | private static List readGraphs(String fileName) throws FileNotFoundException, IOException { 72 | List graphs = new ArrayList(); 73 | GraphReader reader = new GraphReader2015(fileName); 74 | Graph graph; 75 | while ((graph = reader.readGraph()) != null) { 76 | graphs.add(graph); 77 | } 78 | return graphs; 79 | } 80 | 81 | private static List getSample(List base) { 82 | int n = base.size(); 83 | List sample = new ArrayList(n); 84 | for (int i = 0; i < n; i++) { 85 | int j = R.nextInt(n); 86 | sample.add(base.get(j)); 87 | } 88 | return sample; 89 | } 90 | 91 | private static double getP(List base) { 92 | double delta0 = getDelta(base); 93 | int s = 0; 94 | double p = 0.0; 95 | for (int i = 0; i < B; i++) { 96 | s += (getDelta(getSample(base)) > 2 * delta0) ? 1 : 0; 97 | p = (double) s / (double) B; 98 | System.err.format("\rComputing ... (no. of samples = %d, p = %f)", i, p); 99 | } 100 | System.err.println(); 101 | return p; 102 | } 103 | 104 | private static double getDelta(List sample) { 105 | Set edges0 = new HashSet(); 106 | Set edges1 = new HashSet(); 107 | Set edges2 = new HashSet(); 108 | 109 | int graphId = 0; 110 | for (Item item : sample) { 111 | addEdges(edges0, item.graph0, graphId); 112 | addEdges(edges1, item.graph1, graphId); 113 | addEdges(edges2, item.graph2, graphId); 114 | graphId++; 115 | } 116 | 117 | int nEdgesIn0 = edges0.size(); 118 | int nEdgesIn1 = edges1.size(); 119 | int nEdgesIn2 = edges2.size(); 120 | int nEdgesCorrect1 = getIntersection(edges0, edges1).size(); 121 | int nEdgesCorrect2 = getIntersection(edges0, edges2).size(); 122 | 123 | double precision1 = (double) nEdgesCorrect1 / (double) nEdgesIn1; 124 | double recall1 = (double) nEdgesCorrect1 / (double) nEdgesIn0; 125 | double fOne1 = 2.0 * precision1 * recall1 / (precision1 + recall1); 126 | 127 | double precision2 = (double) nEdgesCorrect2 / (double) nEdgesIn2; 128 | double recall2 = (double) nEdgesCorrect2 / (double) nEdgesIn0; 129 | double fOne2 = 2.0 * precision2 * recall2 / (precision2 + recall2); 130 | 131 | return fOne2 - fOne1; 132 | } 133 | 134 | private static void addEdges(Set edges, Graph graph, int graphId) { 135 | for (Node node : graph.getNodes()) { 136 | if (node.isTop) { 137 | edges.add(new MyEdge(graphId, 0, node.id, VIRTUAL_LABEL)); 138 | } 139 | } 140 | for (Edge edge : graph.getEdges()) { 141 | edges.add(new MyEdge(graphId, edge.source, edge.target, edge.label)); 142 | } 143 | } 144 | 145 | private static Set getIntersection(Set edges1, Set edges2) { 146 | Set intersection = new HashSet(edges1); 147 | intersection.retainAll(edges2); 148 | return intersection; 149 | } 150 | 151 | private static class Item { 152 | 153 | public final Graph graph0; 154 | public final Graph graph1; 155 | public final Graph graph2; 156 | 157 | public Item(Graph graph0, Graph graph1, Graph graph2) { 158 | this.graph0 = graph0; 159 | this.graph1 = graph1; 160 | this.graph2 = graph2; 161 | } 162 | } 163 | 164 | private static class MyEdge { 165 | 166 | final int graphId; 167 | final int src; 168 | final int tgt; 169 | final String label; 170 | 171 | public MyEdge(int graphId, int src, int tgt, String label) { 172 | this.graphId = graphId; 173 | this.src = src; 174 | this.tgt = tgt; 175 | this.label = label; 176 | } 177 | 178 | @Override 179 | public int hashCode() { 180 | int hash = 3; 181 | hash = 53 * hash + this.graphId; 182 | hash = 53 * hash + this.src; 183 | hash = 53 * hash + this.tgt; 184 | hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0); 185 | return hash; 186 | } 187 | 188 | @Override 189 | public boolean equals(Object obj) { 190 | if (obj == null) { 191 | return false; 192 | } 193 | if (getClass() != obj.getClass()) { 194 | return false; 195 | } 196 | final MyEdge other = (MyEdge) obj; 197 | if (this.graphId != other.graphId) { 198 | return false; 199 | } 200 | if (this.src != other.src) { 201 | return false; 202 | } 203 | if (this.tgt != other.tgt) { 204 | return false; 205 | } 206 | if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) { 207 | return false; 208 | } 209 | return true; 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Scorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.File; 8 | import java.io.FileNotFoundException; 9 | import java.io.FileReader; 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.Collections; 13 | import java.util.HashSet; 14 | import java.util.LinkedList; 15 | import java.util.List; 16 | import java.util.Set; 17 | import se.liu.ida.nlp.sdp.toolkit.graph.Edge; 18 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 19 | import se.liu.ida.nlp.sdp.toolkit.graph.Node; 20 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 21 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 22 | 23 | /** 24 | * Score a collection of dependency graphs relative to a gold standard. 25 | * 26 | * @author Marco Kuhlmann 27 | */ 28 | public class Scorer { 29 | 30 | /** 31 | * The label used for unlabeled edges. 32 | */ 33 | private static final String UNLABELED = "-UNLABELED-"; 34 | 35 | /** 36 | * The label used for virtual edges. 37 | */ 38 | private static final String VIRTUAL = "-VIRTUAL-"; 39 | 40 | /** 41 | * The sense used for core predications. 42 | */ 43 | private static final String NO_SENSE = "-NOSENSE-"; 44 | 45 | /** 46 | * A flag indicating whether to include labels when scoring graphs. 47 | */ 48 | private final boolean includeLabels; 49 | 50 | /** 51 | * A flag indicating whether to include top nodes when scoring graphs. 52 | */ 53 | private final boolean includeTopNodes; 54 | 55 | /** 56 | * A flag indicating whether to include punctuation when scoring graphs. 57 | */ 58 | private final boolean includePunctuation; 59 | 60 | /** 61 | * A flag indicating whether to treat edges as undirected when scoring 62 | * graphs. 63 | */ 64 | private final boolean treatEdgesAsUndirected; 65 | 66 | /** 67 | * Counter to store the number of graphs read. 68 | */ 69 | private int nGraphs; 70 | 71 | /** 72 | * Set containing the edges from the gold standard graphs. 73 | */ 74 | private final Set edgesInGoldStandard; 75 | 76 | /** 77 | * Set containing the edges from the system output graphs. 78 | */ 79 | private final Set edgesInSystemOutput; 80 | 81 | /** 82 | * Counter for the number of exact matches. 83 | */ 84 | private int nExactMatches; 85 | 86 | private final Set semanticFramesInGoldStandard; 87 | private final Set semanticFramesInSystemOutput; 88 | private final Set corePredicationsInGoldStandard; 89 | private final Set corePredicationsInSystemOutput; 90 | 91 | private final ArgumentFilter labelPredicate; 92 | 93 | /** 94 | * Construct a new scorer. 95 | * 96 | * @param includeLabels flag indicating whether the scorer should do labeled 97 | * scoring 98 | * @param includeTopNodes flag indicating whether the scorer should include 99 | * top nodes 100 | * @param includePunctuation flag indicating whether the scorer should 101 | * include punctuation 102 | * @param treatEdgesAsUndirected flag indicating whether the scorer should 103 | * treat edges as undirected 104 | */ 105 | public Scorer(boolean includeLabels, boolean includeTopNodes, boolean includePunctuation, boolean treatEdgesAsUndirected, ArgumentFilter labelPredicate) { 106 | this.includeLabels = includeLabels; 107 | this.includeTopNodes = includeTopNodes; 108 | this.edgesInGoldStandard = new HashSet(); 109 | this.edgesInSystemOutput = new HashSet(); 110 | this.includePunctuation = includePunctuation; 111 | this.treatEdgesAsUndirected = treatEdgesAsUndirected; 112 | this.semanticFramesInGoldStandard = new HashSet<>(); 113 | this.semanticFramesInSystemOutput = new HashSet<>(); 114 | this.corePredicationsInGoldStandard = new HashSet<>(); 115 | this.corePredicationsInSystemOutput = new HashSet<>(); 116 | this.labelPredicate = labelPredicate; 117 | } 118 | 119 | /** 120 | * Construct a new scorer. 121 | */ 122 | public Scorer() { 123 | this(true, true, true, false, new TrueFilter()); 124 | } 125 | 126 | /** 127 | * Updates this scorer with the specified pair of graphs. 128 | * 129 | * @param goldStandard the graph that should be considered as the gold 130 | * standard 131 | * @param systemOutput the graph that should be considered as the system 132 | * output 133 | */ 134 | public void update(Graph goldStandard, Graph systemOutput) { 135 | assert goldStandard.getNNodes() == systemOutput.getNNodes(); 136 | 137 | Set edgesG = getEdges(goldStandard); 138 | Set edgesS = getEdges(systemOutput); 139 | 140 | nGraphs++; 141 | nExactMatches += edgesG.equals(edgesS) ? 1 : 0; 142 | 143 | edgesInGoldStandard.addAll(edgesG); 144 | edgesInSystemOutput.addAll(edgesS); 145 | 146 | Set semanticFramesG = getSemanticFrames(goldStandard); 147 | Set semanticFramesS = getSemanticFrames(systemOutput); 148 | 149 | semanticFramesInGoldStandard.addAll(semanticFramesG); 150 | semanticFramesInSystemOutput.addAll(semanticFramesS); 151 | 152 | Set corePredicationsG = getCorePredications(goldStandard); 153 | Set corePredicationsS = getCorePredications(systemOutput); 154 | 155 | corePredicationsInGoldStandard.addAll(corePredicationsG); 156 | corePredicationsInSystemOutput.addAll(corePredicationsS); 157 | } 158 | 159 | /** 160 | * Tests whether the specified node represents a punctuation token. 161 | * 162 | * @param node a node 163 | * @return {@code true} if the specified node represents a punctuation token 164 | */ 165 | private boolean isPunctuation(Node node) { 166 | return node.pos.equals(".") || node.pos.equals(",") || node.pos.equals(":") || node.pos.equals("(") || node.pos.equals(")"); 167 | } 168 | 169 | /** 170 | * Tests whether an edge between the specified nodes is admissible. 171 | * 172 | * @param graph a graph 173 | * @param src the source node of the presumed edge 174 | * @param tgt the target node of the presumed edge 175 | * @return {@code true} if an edge from the specified source node to the 176 | * specified target node would be admissible 177 | */ 178 | private boolean edgeIsAdmissible(Graph graph, int src, int tgt) { 179 | if (includePunctuation) { 180 | return true; 181 | } else { 182 | return !isPunctuation(graph.getNode(src)) && !isPunctuation(graph.getNode(tgt)); 183 | } 184 | } 185 | 186 | /** 187 | * Extracts the (scorer-internal) edges from the specified graph. 188 | * 189 | * @param graph the graph from which to extract the edges 190 | * @return the set of extracted edges 191 | */ 192 | private Set getEdges(Graph graph) { 193 | Set edges = new HashSet(); 194 | for (Edge edge : graph.getEdges()) { 195 | if (edgeIsAdmissible(graph, edge.source, edge.target)) { 196 | String label = includeLabels ? edge.label : UNLABELED; 197 | edges.add(makeEdge(nGraphs, edge.source, edge.target, label)); 198 | } 199 | } 200 | if (includeTopNodes) { 201 | for (Node node : graph.getNodes()) { 202 | if (node.isTop && edgeIsAdmissible(graph, 0, node.id)) { 203 | edges.add(makeEdge(nGraphs, 0, node.id, VIRTUAL)); 204 | } 205 | } 206 | } 207 | return edges; 208 | } 209 | 210 | /** 211 | * Returns the number of edges in the gold standard. 212 | * 213 | * @return the number of edges in the gold standard 214 | */ 215 | public int getNEdgesInGoldStandard() { 216 | return edgesInGoldStandard.size(); 217 | } 218 | 219 | /** 220 | * Returns the number of edges in the system output. 221 | * 222 | * @return the number of edges in the system output 223 | */ 224 | public int getNEdgesInSystemOutput() { 225 | return edgesInSystemOutput.size(); 226 | } 227 | 228 | /** 229 | * Returns the precision computed by this scorer. 230 | * 231 | * @return the precision computed by this scorer 232 | */ 233 | public double getPrecision() { 234 | return (double) getNEdgesInCommon() / (double) getNEdgesInSystemOutput(); 235 | } 236 | 237 | /** 238 | * Returns the recall computed by this scorer. 239 | * 240 | * @return the recall computed by this scorer 241 | */ 242 | public double getRecall() { 243 | return (double) getNEdgesInCommon() / (double) getNEdgesInGoldStandard(); 244 | } 245 | 246 | /** 247 | * Returns the edges that occur both in the gold standard and in the system 248 | * output. 249 | * 250 | * @return the edges that occur both in the gold standard and in the system 251 | * output 252 | */ 253 | private Set getEdgesInCommon() { 254 | Set intersection = new HashSet(edgesInGoldStandard); 255 | intersection.retainAll(edgesInSystemOutput); 256 | return intersection; 257 | } 258 | 259 | /** 260 | * Returns the number of edges that occur both in the gold standard and in 261 | * the system output. 262 | * 263 | * @return the number of edges that occur both in the gold standard and in 264 | * the system output 265 | */ 266 | public int getNEdgesInCommon() { 267 | return getEdgesInCommon().size(); 268 | } 269 | 270 | /** 271 | * Returns the F1-score computed by this scorer. 272 | * 273 | * @return the F1-score computed by this scorer 274 | */ 275 | public double getF1() { 276 | double p = getPrecision(); 277 | double r = getRecall(); 278 | return 2.0 * p * r / (p + r); 279 | } 280 | 281 | /** 282 | * Returns the exact match score computed by this scorer. 283 | * 284 | * @return the exact match score computed by this scorer 285 | */ 286 | public double getExactMatch() { 287 | return (double) nExactMatches / (double) nGraphs; 288 | } 289 | 290 | /** 291 | * Tests whether the specified node represents a scorable predicate. 292 | * Currently only predicates corresponding to verbs are scored. 293 | * 294 | * @param node a node in a dependency graph 295 | * @return {@code true} if the specified node represents a scorable 296 | * predicate 297 | */ 298 | private boolean representsScorablePredicate(Node node) { 299 | return node.isPred && node.pos.startsWith("V"); 300 | } 301 | 302 | /** 303 | * Returns the semantic frames contained in the specified graph. 304 | * 305 | * @param graph a dependency graph 306 | * @return the semantic frames contained in the specified graph 307 | */ 308 | private Set getSemanticFrames(Graph graph) { 309 | Set semanticFrames = new HashSet<>(); 310 | for (Node node : graph.getNodes()) { 311 | if (representsScorablePredicate(node)) { 312 | Set outgoingEdges = new HashSet<>(); 313 | for (Edge edge : node.getOutgoingEdges()) { 314 | if (labelPredicate.applies(edge.label)) { 315 | ScorerEdge scorerEdge = new ScorerEdge(nGraphs, edge.source, edge.target, edge.label); 316 | outgoingEdges.add(scorerEdge); 317 | } 318 | } 319 | SemanticFrame frame = new SemanticFrame(nGraphs, node.id, node.sense, outgoingEdges); 320 | semanticFrames.add(frame); 321 | } 322 | } 323 | return semanticFrames; 324 | } 325 | 326 | /** 327 | * Returns the number of semantic frames in the gold standard. 328 | * 329 | * @return the number of semantic frames in the gold standard 330 | */ 331 | public int getNSemanticFramesInGoldStandard() { 332 | return semanticFramesInGoldStandard.size(); 333 | } 334 | 335 | /** 336 | * Returns the number of semantic frames in the system output. 337 | * 338 | * @return the number of semantic frames in the system output 339 | */ 340 | public int getNSemanticFramesInSystemOutput() { 341 | return semanticFramesInSystemOutput.size(); 342 | } 343 | 344 | /** 345 | * Returns the semantic frames precision computed by this scorer. 346 | * 347 | * @return the semantic frames precision computed by this scorer 348 | */ 349 | public double getSemanticFramesPrecision() { 350 | return (double) getNSemanticFramesInCommon() / (double) getNSemanticFramesInSystemOutput(); 351 | } 352 | 353 | /** 354 | * Returns the semantic frames recall computed by this scorer. 355 | * 356 | * @return the semantic frames recall computed by this scorer 357 | */ 358 | public double getSemanticFramesRecall() { 359 | return (double) getNSemanticFramesInCommon() / (double) getNSemanticFramesInGoldStandard(); 360 | } 361 | 362 | /** 363 | * Returns the semantic frames that occur both in the gold standard and in 364 | * the system output. 365 | * 366 | * @return the semantic frames that occur both in the gold standard and in 367 | * the system output 368 | */ 369 | private Set getSemanticFramesInCommon() { 370 | Set intersection = new HashSet<>(semanticFramesInGoldStandard); 371 | intersection.retainAll(semanticFramesInSystemOutput); 372 | return intersection; 373 | } 374 | 375 | /** 376 | * Returns the number of semantic frames that occur both in the gold 377 | * standard and in the system output. 378 | * 379 | * @return the number of semantic frames that occur both in the gold 380 | * standard and in the system output 381 | */ 382 | public int getNSemanticFramesInCommon() { 383 | return getSemanticFramesInCommon().size(); 384 | } 385 | 386 | /** 387 | * Returns the semantic frames F1-score computed by this scorer. 388 | * 389 | * @return the semantic frames F1-score computed by this scorer 390 | */ 391 | public double getSemanticFramesF1() { 392 | double p = getSemanticFramesPrecision(); 393 | double r = getSemanticFramesRecall(); 394 | return 2.0 * p * r / (p + r); 395 | } 396 | 397 | /** 398 | * Returns the core predications contained in the specified graph. 399 | * 400 | * @param graph a dependency graph 401 | * @return the core predications contained in the specified graph 402 | */ 403 | private Set getCorePredications(Graph graph) { 404 | Set semanticFrames = new HashSet<>(); 405 | for (Node node : graph.getNodes()) { 406 | if (representsScorablePredicate(node)) { 407 | Set outgoingEdges = new HashSet<>(); 408 | for (Edge edge : node.getOutgoingEdges()) { 409 | if (labelPredicate.applies(edge.label)) { 410 | ScorerEdge scorerEdge = new ScorerEdge(nGraphs, edge.source, edge.target, edge.label); 411 | outgoingEdges.add(scorerEdge); 412 | } 413 | } 414 | SemanticFrame frame = new SemanticFrame(nGraphs, node.id, NO_SENSE, outgoingEdges); 415 | semanticFrames.add(frame); 416 | } 417 | } 418 | return semanticFrames; 419 | } 420 | 421 | /** 422 | * Returns the number of core predications in the gold standard. 423 | * 424 | * @return the number of core predications in the gold standard 425 | */ 426 | public int getNCorePredicationsInGoldStandard() { 427 | return corePredicationsInGoldStandard.size(); 428 | } 429 | 430 | /** 431 | * Returns the number of core predications in the system output. 432 | * 433 | * @return the number of core predications in the system output 434 | */ 435 | public int getNCorePredicationsInSystemOutput() { 436 | return corePredicationsInSystemOutput.size(); 437 | } 438 | 439 | /** 440 | * Returns the core predications precision computed by this scorer. 441 | * 442 | * @return the core predications precision computed by this scorer 443 | */ 444 | public double getCorePredicationsPrecision() { 445 | return (double) getNCorePredicationsInCommon() / (double) getNCorePredicationsInSystemOutput(); 446 | } 447 | 448 | /** 449 | * Returns the core predications recall computed by this scorer. 450 | * 451 | * @return the core predications recall computed by this scorer 452 | */ 453 | public double getCorePredicationsRecall() { 454 | return (double) getNCorePredicationsInCommon() / (double) getNCorePredicationsInGoldStandard(); 455 | } 456 | 457 | /** 458 | * Returns the core predications that occur both in the gold standard and in 459 | * the system output. 460 | * 461 | * @return the core predications that occur both in the gold standard and in 462 | * the system output 463 | */ 464 | private Set getCorePredicationsInCommon() { 465 | Set intersection = new HashSet<>(corePredicationsInGoldStandard); 466 | intersection.retainAll(corePredicationsInSystemOutput); 467 | return intersection; 468 | } 469 | 470 | /** 471 | * Returns the number of core predications that occur both in the gold 472 | * standard and in the system output. 473 | * 474 | * @return the number of core predications that occur both in the gold 475 | * standard and in the system output 476 | */ 477 | public int getNCorePredicationsInCommon() { 478 | return getCorePredicationsInCommon().size(); 479 | } 480 | 481 | /** 482 | * Returns the core predications F1-score computed by this scorer. 483 | * 484 | * @return the core predications F1-score computed by this scorer 485 | */ 486 | public double getCorePredicationsF1() { 487 | double p = getCorePredicationsPrecision(); 488 | double r = getCorePredicationsRecall(); 489 | return 2.0 * p * r / (p + r); 490 | } 491 | 492 | /** 493 | * Read graphs from the specified files. 494 | * 495 | * @param goldStandardFile the file containing the gold standard graphs 496 | * @param systemOutputFile the file containing the system output graphs 497 | * @throws Exception if an I/O error occurs 498 | */ 499 | private static List readGraphs(String goldStandardFile, String systemOutputFile, int max) throws Exception { 500 | List graphPairs = new LinkedList(); 501 | GraphReader goldStandardReader = new GraphReader2015(goldStandardFile); 502 | GraphReader systemOutputReader = new GraphReader2015(systemOutputFile); 503 | Graph goldStandard; 504 | Graph systemOutput; 505 | int nGraphs = 0; 506 | while ((goldStandard = goldStandardReader.readGraph()) != null && (max < 0 || nGraphs < max)) { 507 | systemOutput = systemOutputReader.readGraph(); 508 | graphPairs.add(new GraphPair(goldStandard, systemOutput)); 509 | nGraphs++; 510 | } 511 | assert systemOutputReader.readGraph() == null; 512 | goldStandardReader.close(); 513 | systemOutputReader.close(); 514 | return graphPairs; 515 | } 516 | 517 | /** 518 | * Scores the specified graphs using the specified scorer. 519 | * 520 | * @param scorer the scorer to use 521 | * @param graphPairs a list of reference-candidate pairs 522 | */ 523 | private static void score(Scorer scorer, List graphPairs) { 524 | for (GraphPair pair : graphPairs) { 525 | scorer.update(pair.goldStandard, pair.systemOutput); 526 | } 527 | } 528 | 529 | /** 530 | * Scores the specified graphs. 531 | * 532 | * @param includeTopNodes whether the scoring should include top nodes 533 | * @param graphPairs a list of reference-candidate pairs 534 | */ 535 | private static void score(boolean includeTopNodes, boolean includePunctuation, boolean treatEdgesAsUndirected, List graphPairs, ArgumentFilter labelPredicate) { 536 | Scorer scorerL = new Scorer(true, includeTopNodes, includePunctuation, treatEdgesAsUndirected, labelPredicate); 537 | Scorer scorerU = new Scorer(false, includeTopNodes, includePunctuation, treatEdgesAsUndirected, labelPredicate); 538 | 539 | score(scorerL, graphPairs); 540 | score(scorerU, graphPairs); 541 | 542 | System.err.format("Number of edges in gold standard: %d%n", scorerL.getNEdgesInGoldStandard()); 543 | System.err.format("Number of edges in system output: %d%n", scorerL.getNEdgesInSystemOutput()); 544 | System.err.format("Number of edges in common, labeled: %d%n", scorerL.getNEdgesInCommon()); 545 | System.err.format("Number of edges in common, unlabeled: %d%n", scorerU.getNEdgesInCommon()); 546 | System.err.println(); 547 | 548 | System.err.println("### Labeled scores"); 549 | System.err.println(); 550 | System.err.format("LP: %f%n", scorerL.getPrecision()); 551 | System.err.format("LR: %f%n", scorerL.getRecall()); 552 | System.err.format("LF: %f%n", scorerL.getF1()); 553 | System.err.format("LM: %f%n", scorerL.getExactMatch()); 554 | System.err.println(); 555 | 556 | System.err.println("### Breakdown by label type"); 557 | System.err.println(); 558 | System.err.println("Label type,Number of edges in gold standard,Number of edges in system output,Precision,Recall"); 559 | List labels = new ArrayList(scorerL.getLabels()); 560 | Collections.sort(labels); 561 | for (String label : labels) { 562 | System.err.format("%s,%d,%d,%f,%f%n", label, scorerL.getNEdgesInGoldStandardByLabel(label), scorerL.getNEdgesInSystemOutputByLabel(label), scorerL.getPrecisionPerLabel(label), scorerL.getRecallPerLabel(label)); 563 | } 564 | System.err.println(); 565 | 566 | System.err.println("### Breakdown by edge length"); 567 | System.err.println(); 568 | List quantizedLengths = new ArrayList(); 569 | for (int i = 1; i < 100; i++) { 570 | String quantizedLength = scorerL.getQuantizedLength(i); 571 | if (!quantizedLengths.contains(quantizedLength)) { 572 | quantizedLengths.add(quantizedLength); 573 | } 574 | } 575 | System.err.println("Edge length,Number of edges in gold standard,Number of edges in system output,Precision,Recall"); 576 | for (String quantizedLength : quantizedLengths) { 577 | System.err.format("%s,%d,%d,%f,%f%n", quantizedLength, scorerL.getNEdgesInGoldStandardByQuantizedLength(quantizedLength), scorerL.getNEdgesInSystemOutputByQuantizedLength(quantizedLength), scorerL.getPrecisionPerQuantizedLength(quantizedLength), scorerL.getRecallPerQuantizedLength(quantizedLength)); 578 | } 579 | System.err.println(); 580 | 581 | System.err.println("### Unlabeled scores"); 582 | System.err.println(); 583 | System.err.format("UP: %f%n", scorerU.getPrecision()); 584 | System.err.format("UR: %f%n", scorerU.getRecall()); 585 | System.err.format("UF: %f%n", scorerU.getF1()); 586 | System.err.format("UM: %f%n", scorerU.getExactMatch()); 587 | System.err.println(); 588 | 589 | System.err.println("### Complete predications"); 590 | System.err.println(); 591 | System.err.format("Number of complete predications in gold standard: %d%n", scorerL.getNCorePredicationsInGoldStandard()); 592 | System.err.format("Number of complete predications in system output: %d%n", scorerL.getNCorePredicationsInSystemOutput()); 593 | System.err.println(); 594 | System.err.format("PP: %f%n", scorerL.getCorePredicationsPrecision()); 595 | System.err.format("PR: %f%n", scorerL.getCorePredicationsRecall()); 596 | System.err.format("PF: %f%n", scorerL.getCorePredicationsF1()); 597 | System.err.println(); 598 | 599 | System.err.println("### Semantic frames"); 600 | System.err.println(); 601 | System.err.format("Number of semantic frames in gold standard: %d%n", scorerL.getNSemanticFramesInGoldStandard()); 602 | System.err.format("Number of semantic frames in system output: %d%n", scorerL.getNSemanticFramesInSystemOutput()); 603 | System.err.println(); 604 | System.err.format("FP: %f%n", scorerL.getSemanticFramesPrecision()); 605 | System.err.format("FR: %f%n", scorerL.getSemanticFramesRecall()); 606 | System.err.format("FF: %f%n", scorerL.getSemanticFramesF1()); 607 | } 608 | 609 | /** 610 | * Compute scores for two files. 611 | * 612 | * @param args the names of the files containing the gold standard graphs 613 | * and the system output graphs 614 | * @throws Exception if an I/O exception occurs 615 | */ 616 | public static void main(String[] args) throws Exception { 617 | boolean includePunctuation = true; 618 | boolean treatEdgesAsUndirected = false; 619 | ArgumentFilter labelPredicate = new TrueFilter(); 620 | int graphsToRead = -1; 621 | for (String arg : args) { 622 | if (arg.equals("excludePunctuation")) { 623 | System.err.println("Will exclude punctuation."); 624 | includePunctuation = false; 625 | } 626 | if (arg.equals("treatEdgesAsUndirected")) { 627 | System.err.println("Will treat edges as undirected."); 628 | treatEdgesAsUndirected = true; 629 | } 630 | if (arg.startsWith("corePredicates=")) { 631 | String fileName = arg.substring(15); 632 | System.err.format("Reading core predicates from %s%n", fileName); 633 | labelPredicate = new ListFilter(new File(fileName)); 634 | } 635 | if (arg.startsWith("max=")) { 636 | graphsToRead = Integer.parseInt(arg.substring(4)); 637 | System.err.format("Will read at most %d graphs.%n", graphsToRead); 638 | } 639 | if (arg.startsWith("representation=")) { 640 | String representation = arg.substring(15).toLowerCase(); 641 | if (representation.equals("dm")) { 642 | System.err.println("Representation type: DM"); 643 | labelPredicate = new DMArgumentFilter(); 644 | } 645 | if (representation.equals("pas")) { 646 | System.err.println("Representation type: PAS"); 647 | labelPredicate = new PASArgumentFilter(); 648 | } 649 | if (representation.equals("psd")) { 650 | System.err.println("Representation type: PSD"); 651 | labelPredicate = new PSDPredicate(); 652 | } 653 | } 654 | } 655 | 656 | System.err.println("# Evaluation"); 657 | System.err.println(); 658 | 659 | System.err.format("Gold standard file: %s%n", args[0]); 660 | System.err.format("System output file: %s%n", args[1]); 661 | System.err.println(); 662 | 663 | List graphPairs = readGraphs(args[0], args[1], graphsToRead); 664 | 665 | System.err.println("## Scores including virtual dependencies to top nodes"); 666 | System.err.println(); 667 | score(true, includePunctuation, treatEdgesAsUndirected, graphPairs, labelPredicate); 668 | System.err.println(); 669 | 670 | System.err.println("## Scores excluding virtual dependencies to top nodes"); 671 | System.err.println(); 672 | score(false, includePunctuation, treatEdgesAsUndirected, graphPairs, labelPredicate); 673 | } 674 | 675 | private static class GraphPair { 676 | 677 | public final Graph goldStandard; 678 | public final Graph systemOutput; 679 | 680 | public GraphPair(Graph goldStandard, Graph systemOutput) { 681 | this.goldStandard = goldStandard; 682 | this.systemOutput = systemOutput; 683 | } 684 | } 685 | 686 | private interface ArgumentFilter { 687 | 688 | abstract public boolean applies(String label); 689 | } 690 | 691 | private static class TrueFilter implements ArgumentFilter { 692 | 693 | @Override 694 | public boolean applies(String label) { 695 | return true; 696 | } 697 | } 698 | 699 | private static class ListFilter implements ArgumentFilter { 700 | 701 | private final Set labels; 702 | 703 | public ListFilter(File file) { 704 | this.labels = new HashSet<>(); 705 | try { 706 | BufferedReader reader = new BufferedReader(new FileReader(file)); 707 | String line; 708 | while ((line = reader.readLine()) != null) { 709 | labels.add(line.trim()); 710 | } 711 | } catch (FileNotFoundException e) { 712 | System.err.println("File not found."); 713 | System.exit(1); 714 | } catch (IOException e) { 715 | System.err.println("I/O exception."); 716 | System.exit(1); 717 | } 718 | } 719 | 720 | @Override 721 | public boolean applies(String label) { 722 | return labels.contains(label); 723 | } 724 | } 725 | 726 | private static class DMArgumentFilter implements ArgumentFilter { 727 | 728 | @Override 729 | public boolean applies(String label) { 730 | return true; 731 | } 732 | } 733 | 734 | private static class PASArgumentFilter implements ArgumentFilter { 735 | 736 | private final Set coreArguments; 737 | 738 | public PASArgumentFilter() { 739 | this.coreArguments = new HashSet<>(); 740 | coreArguments.add("adj_ARG1"); 741 | coreArguments.add("adj_ARG2"); 742 | coreArguments.add("adj_MOD"); 743 | coreArguments.add("coord_ARG1"); 744 | coreArguments.add("coord_ARG2"); 745 | coreArguments.add("prep_ARG1"); 746 | coreArguments.add("prep_ARG2"); 747 | coreArguments.add("prep_ARG3"); 748 | coreArguments.add("prep_MOD"); 749 | coreArguments.add("verb_ARG1"); 750 | coreArguments.add("verb_ARG2"); 751 | coreArguments.add("verb_ARG3"); 752 | coreArguments.add("verb_ARG4"); 753 | coreArguments.add("verb_MOD"); 754 | } 755 | 756 | @Override 757 | public boolean applies(String label) { 758 | return coreArguments.contains(label); 759 | } 760 | } 761 | 762 | public static class PSDPredicate implements ArgumentFilter { 763 | 764 | @Override 765 | public boolean applies(String label) { 766 | return label.endsWith("-arg"); 767 | } 768 | } 769 | 770 | private ScorerEdge makeEdge(int graphId, int src, int tgt, String label) { 771 | if (treatEdgesAsUndirected) { 772 | return new UndirectedScorerEdge(graphId, src, tgt, label); 773 | } else { 774 | return new ScorerEdge(graphId, src, tgt, label); 775 | } 776 | } 777 | 778 | private static class ScorerEdge { 779 | 780 | final int graphId; 781 | final int src; 782 | final int tgt; 783 | final String label; 784 | 785 | public ScorerEdge(int graphId, int src, int tgt, String label) { 786 | this.graphId = graphId; 787 | this.src = src; 788 | this.tgt = tgt; 789 | this.label = label; 790 | } 791 | 792 | public int getLength() { 793 | return Math.max(src, tgt) - Math.min(src, tgt); 794 | } 795 | 796 | @Override 797 | public int hashCode() { 798 | int hash = 3; 799 | hash = 53 * hash + this.graphId; 800 | hash = 53 * hash + this.src; 801 | hash = 53 * hash + this.tgt; 802 | hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0); 803 | return hash; 804 | } 805 | 806 | @Override 807 | public boolean equals(Object obj) { 808 | if (obj == null) { 809 | return false; 810 | } 811 | if (getClass() != obj.getClass()) { 812 | return false; 813 | } 814 | final ScorerEdge other = (ScorerEdge) obj; 815 | if (this.graphId != other.graphId) { 816 | return false; 817 | } 818 | if (this.src != other.src) { 819 | return false; 820 | } 821 | if (this.tgt != other.tgt) { 822 | return false; 823 | } 824 | if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) { 825 | return false; 826 | } 827 | return true; 828 | } 829 | } 830 | 831 | private static class UndirectedScorerEdge extends ScorerEdge { 832 | 833 | public UndirectedScorerEdge(int graphId, int src, int tgt, String label) { 834 | super(graphId, src, tgt, label); 835 | } 836 | 837 | @Override 838 | public int hashCode() { 839 | int hash = 3; 840 | hash = 53 * hash + this.graphId; 841 | hash = 53 * hash + Math.min(this.src, this.tgt); 842 | hash = 53 * hash + Math.max(this.src, this.tgt); 843 | hash = 53 * hash + (this.label != null ? this.label.hashCode() : 0); 844 | return hash; 845 | } 846 | 847 | @Override 848 | public boolean equals(Object obj) { 849 | if (obj == null) { 850 | return false; 851 | } 852 | if (getClass() != obj.getClass()) { 853 | return false; 854 | } 855 | final ScorerEdge other = (ScorerEdge) obj; 856 | if (this.graphId != other.graphId) { 857 | return false; 858 | } 859 | if (Math.min(this.src, this.tgt) != Math.min(other.src, other.tgt)) { 860 | return false; 861 | } 862 | if (Math.max(this.src, this.tgt) != Math.max(other.src, other.tgt)) { 863 | return false; 864 | } 865 | if ((this.label == null) ? (other.label != null) : !this.label.equals(other.label)) { 866 | return false; 867 | } 868 | return true; 869 | } 870 | } 871 | 872 | private static class SemanticFrame { 873 | 874 | final int graphId; 875 | final int node; 876 | final String sense; 877 | final Set outgoingEdges; 878 | 879 | public SemanticFrame(int graphId, int node, String sense, Set outgoingEdges) { 880 | this.graphId = graphId; 881 | this.node = node; 882 | this.sense = sense; 883 | this.outgoingEdges = outgoingEdges; 884 | } 885 | 886 | @Override 887 | public int hashCode() { 888 | int hash = 3; 889 | hash = 53 * hash + this.graphId; 890 | hash = 53 * hash + this.node; 891 | hash = 53 * hash + (this.sense != null ? this.sense.hashCode() : 0); 892 | hash = 53 * hash + (this.outgoingEdges != null ? this.outgoingEdges.hashCode() : 0); 893 | return hash; 894 | } 895 | 896 | @Override 897 | public boolean equals(Object obj) { 898 | if (obj == null) { 899 | return false; 900 | } 901 | if (getClass() != obj.getClass()) { 902 | return false; 903 | } 904 | final SemanticFrame other = (SemanticFrame) obj; 905 | if (this.graphId != other.graphId) { 906 | return false; 907 | } 908 | if (this.node != other.node) { 909 | return false; 910 | } 911 | if ((this.sense == null) ? (other.sense != null) : !this.sense.equals(other.sense)) { 912 | return false; 913 | } 914 | if ((this.outgoingEdges == null) ? (other.outgoingEdges != null) : !this.outgoingEdges.equals(other.outgoingEdges)) { 915 | return false; 916 | } 917 | return true; 918 | } 919 | } 920 | 921 | private Set getLabels() { 922 | Set labels = new HashSet(); 923 | for (ScorerEdge edge : edgesInGoldStandard) { 924 | labels.add(edge.label); 925 | } 926 | for (ScorerEdge edge : edgesInSystemOutput) { 927 | labels.add(edge.label); 928 | } 929 | return labels; 930 | } 931 | 932 | private int getNEdgesByLabel(String label, Set edges) { 933 | int n = 0; 934 | for (ScorerEdge edge : edges) { 935 | n += edge.label.equals(label) ? 1 : 0; 936 | } 937 | return n; 938 | } 939 | 940 | private int getNEdgesInGoldStandardByLabel(String label) { 941 | return getNEdgesByLabel(label, edgesInGoldStandard); 942 | } 943 | 944 | private int getNEdgesInSystemOutputByLabel(String label) { 945 | return getNEdgesByLabel(label, edgesInSystemOutput); 946 | } 947 | 948 | private double getPrecisionPerLabel(String label) { 949 | int nEdges = 0; 950 | int nCorrect = 0; 951 | for (ScorerEdge edgeS : edgesInSystemOutput) { 952 | if (edgeS.label.equals(label)) { 953 | nEdges++; 954 | if (edgesInGoldStandard.contains(edgeS)) { 955 | nCorrect++; 956 | } 957 | } 958 | } 959 | return (double) nCorrect / (double) nEdges; 960 | } 961 | 962 | private double getRecallPerLabel(String label) { 963 | int nEdges = 0; 964 | int nCorrect = 0; 965 | for (ScorerEdge edgeG : edgesInGoldStandard) { 966 | if (edgeG.label.equals(label)) { 967 | nEdges++; 968 | if (edgesInSystemOutput.contains(edgeG)) { 969 | nCorrect++; 970 | } 971 | } 972 | } 973 | return (double) nCorrect / (double) nEdges; 974 | } 975 | 976 | private String getQuantizedLength(int length) { 977 | if (length <= 4) { 978 | return Integer.toString(length); 979 | } else if (length < 10) { 980 | return "5-9"; 981 | } else { 982 | return "10-"; 983 | } 984 | } 985 | 986 | private String getQuantizedLength(ScorerEdge edge) { 987 | return getQuantizedLength(edge.getLength()); 988 | } 989 | 990 | private Set getQuantizedLengths() { 991 | Set lengths = new HashSet(); 992 | for (ScorerEdge edge : edgesInGoldStandard) { 993 | lengths.add(getQuantizedLength(edge)); 994 | } 995 | for (ScorerEdge edge : edgesInSystemOutput) { 996 | lengths.add(getQuantizedLength(edge)); 997 | } 998 | return lengths; 999 | } 1000 | 1001 | private int getNEdgesByQuantizedLength(String quantizedLength, Set edges) { 1002 | int n = 0; 1003 | for (ScorerEdge edge : edges) { 1004 | if (getQuantizedLength(edge).equals(quantizedLength)) { 1005 | n++; 1006 | } 1007 | } 1008 | return n; 1009 | } 1010 | 1011 | private int getNEdgesInGoldStandardByQuantizedLength(String quantizedLength) { 1012 | return getNEdgesByQuantizedLength(quantizedLength, edgesInGoldStandard); 1013 | } 1014 | 1015 | private int getNEdgesInSystemOutputByQuantizedLength(String quantizedLength) { 1016 | return getNEdgesByQuantizedLength(quantizedLength, edgesInSystemOutput); 1017 | } 1018 | 1019 | private double getPrecisionPerQuantizedLength(String quantizedLength) { 1020 | int nEdges = 0; 1021 | int nCorrect = 0; 1022 | for (ScorerEdge edgeS : edgesInSystemOutput) { 1023 | if (getQuantizedLength(edgeS).equals(quantizedLength)) { 1024 | nEdges++; 1025 | if (edgesInGoldStandard.contains(edgeS)) { 1026 | nCorrect++; 1027 | } 1028 | } 1029 | } 1030 | return (double) nCorrect / (double) nEdges; 1031 | } 1032 | 1033 | private double getRecallPerQuantizedLength(String quantizedLength) { 1034 | int nEdges = 0; 1035 | int nCorrect = 0; 1036 | for (ScorerEdge edgeG : edgesInGoldStandard) { 1037 | if (getQuantizedLength(edgeG).equals(quantizedLength)) { 1038 | nEdges++; 1039 | if (edgesInSystemOutput.contains(edgeG)) { 1040 | nCorrect++; 1041 | } 1042 | } 1043 | } 1044 | return (double) nCorrect / (double) nEdges; 1045 | } 1046 | } 1047 | -------------------------------------------------------------------------------- /src/main/java/se/liu/ida/nlp/sdp/toolkit/tools/Splitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * See the file "LICENSE" for the full license governing this code. 3 | */ 4 | package se.liu.ida.nlp.sdp.toolkit.tools; 5 | 6 | import se.liu.ida.nlp.sdp.toolkit.graph.Graph; 7 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader; 8 | import se.liu.ida.nlp.sdp.toolkit.io.GraphReader2015; 9 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter; 10 | import se.liu.ida.nlp.sdp.toolkit.io.GraphWriter2015; 11 | 12 | /** 13 | * Splits the SDP training data into training and development. 14 | * 15 | * @author Marco Kuhlmann 16 | */ 17 | public class Splitter { 18 | 19 | public static void main(String[] args) throws Exception { 20 | GraphReader reader = new GraphReader2015(args[0]); 21 | GraphWriter writerTrain = new GraphWriter2015(args[1]); 22 | GraphWriter writerDevel = new GraphWriter2015(args[2]); 23 | Graph graph; 24 | while ((graph = reader.readGraph()) != null) { 25 | if (graph.id.substring(2, 4).equals("20")) { 26 | writerDevel.writeGraph(graph); 27 | } else { 28 | writerTrain.writeGraph(graph); 29 | } 30 | } 31 | reader.close(); 32 | writerTrain.close(); 33 | writerDevel.close(); 34 | } 35 | } 36 | --------------------------------------------------------------------------------