81 | )
82 | : (
83 |
84 |
85 |
100 |
101 | )
102 | }
103 |
104 | );
105 | }
106 | }
107 |
108 | export default App;
109 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SQLQuery.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collection;
5 | import java.util.HashMap;
6 | import java.util.HashSet;
7 | import java.util.List;
8 | import java.util.Map;
9 |
10 | /**
11 | * Just a wrapper for a String of sql translateNL.
12 | * @author keping
13 | */
14 | public class SQLQuery {
15 | private List blocks;
16 | private Map> map;
17 |
18 | SQLQuery() {
19 | map = new HashMap<>();
20 | map.put("SELECT", new ArrayList());
21 | map.put("FROM", new HashSet());
22 | map.put("WHERE", new HashSet());
23 | blocks = new ArrayList();
24 | }
25 |
26 | @Deprecated
27 | public SQLQuery(String s) {
28 |
29 | }
30 |
31 | /**
32 | * Get the String translateNL insides the SQLQuery.
33 | * @return
34 | */
35 | String get() { return toString(); }
36 |
37 | public void addBlock(SQLQuery query) {
38 | blocks.add(query);
39 | add("FROM", "BLOCK"+blocks.size());
40 | }
41 |
42 | Collection getCollection(String keyWord) { return map.get(keyWord); }
43 |
44 | /**
45 | * Add (key, value) to the SQL Query.
46 | * For example, (SELECT, article.title) or (FROM, article).
47 | * @param key
48 | * @param val
49 | */
50 | void add(String key, String value) {
51 | map.get(key).add(value);
52 | }
53 |
54 |
55 | /**
56 | * Serve for the toString() method.
57 | * @param SELECT (or FROM)
58 | * @return one line of arguments of that translateNL (SELECT, FROM)
59 | */
60 | private StringBuilder toSBLine(Collection SELECT) {
61 | StringBuilder sb = new StringBuilder();
62 | for (String val : SELECT) {
63 | if (sb.length() == 0) {
64 | sb.append(val);
65 | } else {
66 | sb.append(", ").append(val);
67 | }
68 | }
69 | return sb;
70 | }
71 |
72 | /**
73 | * Similar to {@link #toSBLine(Collection)}, but that incorporates
74 | * the information of "AND" and "OR".
75 | * @param WHERE
76 | * @return
77 | */
78 | private StringBuilder toSBLineCondition(Collection WHERE) {
79 | StringBuilder sb = new StringBuilder();
80 | for (String val : WHERE) {
81 | if (sb.length() == 0) {
82 | sb.append(val);
83 | } else {
84 | // currently only allow for "AND"
85 | // TODO: add "OR"
86 | sb.append(" AND ").append(val);
87 | }
88 | }
89 | return sb;
90 | }
91 |
92 | @Override
93 | public String toString() {
94 | if (map.get("SELECT").isEmpty() || map.get("FROM").isEmpty()) {
95 | return "Illegal Query";
96 | }
97 | StringBuilder sb = new StringBuilder();
98 | for (int i = 0; i < blocks.size(); i++) {
99 | sb.append("BLOCK"+(i+1)+":").append("\n");
100 | sb.append(blocks.get(i).toString()).append("\n");
101 | sb.append("\n");
102 | }
103 | sb.append("SELECT ").append(toSBLine(map.get("SELECT"))).append("\n");
104 | sb.append("FROM ").append(toSBLine(map.get("FROM"))).append("\n");
105 | if (!map.get("WHERE").isEmpty()) {
106 | sb.append("WHERE ").append(toSBLineCondition(map.get("WHERE"))).append("\n");
107 | }
108 | sb.append(";\n");
109 | return sb.toString();
110 | }
111 |
112 |
113 | }
114 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/NodeInfo.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.Comparator;
4 |
5 | /**
6 | * Immutable class indicating the SQL component for a Node.
7 | * @author keping
8 | *
9 | */
10 | public class NodeInfo {
11 | // TODO: all fields should be private in final version.
12 | private String type;
13 | private String value;
14 | /**
15 | * Similarity score of the Node to the column/table name in schema.
16 | */
17 | private double score = 1.0;
18 |
19 | public NodeInfo(String type, String value) {
20 | this.type = type;
21 | this.value = value;
22 | }
23 | public NodeInfo(String type, String value, double score) {
24 | this(type, value);
25 | this.score = score;
26 | }
27 | public NodeInfo(NodeInfo ni){
28 | this.type = ni.type;
29 | this.value = ni.value;
30 | this.score = ni.score;
31 | }
32 | @Override
33 | public String toString() {
34 | return type+": "+value;
35 | }
36 | public String getType() { return type; }
37 | public String getValue() {
38 | return value;
39 | }
40 |
41 | public double getScore(){
42 | return score;
43 | }
44 |
45 | public static class ReverseScoreComparator implements Comparator {
46 | @Override
47 | public int compare(NodeInfo a, NodeInfo b) {
48 | if (a.score < b.score) { return 1; }
49 | else if (a.score > b.score) { return -1; }
50 | else { return 0; }
51 | }
52 | }
53 |
54 | @Override
55 | public int hashCode() {
56 | final int prime = 31;
57 | int result = 1;
58 | result = prime * result + ((type == null) ? 0 : type.hashCode());
59 | result = prime * result + ((value == null) ? 0 : value.hashCode());
60 | return result;
61 | }
62 | @Override
63 | public boolean equals(Object obj) {
64 | if (this == obj)
65 | return true;
66 | if (obj == null)
67 | return false;
68 | if (getClass() != obj.getClass())
69 | return false;
70 | NodeInfo other = (NodeInfo) obj;
71 | if (type == null) {
72 | if (other.type != null)
73 | return false;
74 | } else if (!type.equals(other.type))
75 | return false;
76 | if (value == null) {
77 | if (other.value != null)
78 | return false;
79 | } else if (!value.equals(other.value))
80 | return false;
81 | return true;
82 | }
83 |
84 | public boolean ExactSameSchema (NodeInfo other) {
85 |
86 | if (type == null || other.getType() == null || value == null || other.getValue() == null) {
87 | return false;
88 | }
89 |
90 | if (type.equals(other.getType()) && value.equals(other.getValue())) {
91 |
92 | return true;
93 | }
94 |
95 | return false;
96 | }
97 |
98 | public boolean sameSchema (NodeInfo other) {
99 |
100 | if (type == null || other.getType() == null || value == null || other.getValue() == null) {
101 | return false;
102 | }
103 |
104 | int indexOfDot_Other = other.getValue().indexOf('.');
105 |
106 | int indexOfDot = value.indexOf('.');
107 |
108 | if (indexOfDot_Other == -1) {
109 |
110 | indexOfDot_Other = other.getValue().length();
111 | }
112 |
113 | if (indexOfDot == -1) {
114 |
115 | indexOfDot = value.length();
116 | }
117 |
118 | if (other.getValue().substring(0, indexOfDot_Other - 1)
119 | .equals(value.substring(0, indexOfDot - 1))) {
120 |
121 | return true;
122 | }
123 |
124 |
125 | return false;
126 | }
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/client/src/utils/registerServiceWorker.js:
--------------------------------------------------------------------------------
1 | // In production, we register a com.dukenlidb.nlidb.service worker to serve assets from local cache.
2 |
3 | // This lets the app load faster on subsequent visits in production, and gives
4 | // it offline capabilities. However, it also means that developers (and users)
5 | // will only see deployed updates on the "N+1" visit to a page, since previously
6 | // cached resources are updated in the background.
7 |
8 | // To learn more about the benefits of this com.dukenlidb.nlidb.model, read https://goo.gl/KwvDNy.
9 | // This link also includes instructions on opting out of this behavior.
10 |
11 | const isLocalhost = Boolean(
12 | window.location.hostname === 'localhost' ||
13 | // [::1] is the IPv6 localhost address.
14 | window.location.hostname === '[::1]' ||
15 | // 127.0.0.1/8 is considered localhost for IPv4.
16 | window.location.hostname.match(
17 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
18 | )
19 | );
20 |
21 | export default function register() {
22 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
23 | // The URL constructor is available in all browsers that support SW.
24 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location);
25 | if (publicUrl.origin !== window.location.origin) {
26 | // Our com.dukenlidb.nlidb.service worker won't work if PUBLIC_URL is on a different origin
27 | // from what our page is served on. This might happen if a CDN is used to
28 | // serve assets; see https://github.com/facebookincubator/create-react-app/issues/2374
29 | return;
30 | }
31 |
32 | window.addEventListener('load', () => {
33 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
34 |
35 | if (!isLocalhost) {
36 | // Is not local host. Just register com.dukenlidb.nlidb.service worker
37 | registerValidSW(swUrl);
38 | } else {
39 | // This is running on localhost. Lets check if a com.dukenlidb.nlidb.service worker still exists or not.
40 | checkValidServiceWorker(swUrl);
41 | }
42 | });
43 | }
44 | }
45 |
46 | function registerValidSW(swUrl) {
47 | navigator.serviceWorker
48 | .register(swUrl)
49 | .then(registration => {
50 | registration.onupdatefound = () => {
51 | const installingWorker = registration.installing;
52 | installingWorker.onstatechange = () => {
53 | if (installingWorker.state === 'installed') {
54 | if (navigator.serviceWorker.controller) {
55 | // At this point, the old content will have been purged and
56 | // the fresh content will have been added to the cache.
57 | // It's the perfect time to display a "New content is
58 | // available; please refresh." message in your web app.
59 | console.log('New content is available; please refresh.');
60 | } else {
61 | // At this point, everything has been precached.
62 | // It's the perfect time to display a
63 | // "Content is cached for offline use." message.
64 | console.log('Content is cached for offline use.');
65 | }
66 | }
67 | };
68 | };
69 | })
70 | .catch(error => {
71 | console.error('Error during com.dukenlidb.nlidb.service worker registration:', error);
72 | });
73 | }
74 |
75 | function checkValidServiceWorker(swUrl) {
76 | // Check if the com.dukenlidb.nlidb.service worker can be found. If it can't reload the page.
77 | fetch(swUrl)
78 | .then(response => {
79 | // Ensure com.dukenlidb.nlidb.service worker exists, and that we really are getting a JS file.
80 | if (
81 | response.status === 404 ||
82 | response.headers.get('content-type').indexOf('javascript') === -1
83 | ) {
84 | // No com.dukenlidb.nlidb.service worker found. Probably a different app. Reload the page.
85 | navigator.serviceWorker.ready.then(registration => {
86 | registration.unregister().then(() => {
87 | window.location.reload();
88 | });
89 | });
90 | } else {
91 | // Service worker found. Proceed as normal.
92 | registerValidSW(swUrl);
93 | }
94 | })
95 | .catch(() => {
96 | console.log(
97 | 'No internet connection found. App is running in offline mode.'
98 | );
99 | });
100 | }
101 |
102 | export function unregister() {
103 | if ('serviceWorker' in navigator) {
104 | navigator.serviceWorker.ready.then(registration => {
105 | registration.unregister();
106 | });
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/NodeMapper.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collections;
5 | import java.util.HashMap;
6 | import java.util.HashSet;
7 | import java.util.List;
8 | import java.util.Map;
9 | import java.util.Set;
10 |
11 | /**
12 | * A class to help map word {@link Node} in {@link ParseTree}
13 | * to SQL components (represented by class {@link NodeInfo}).
14 | * @author keping
15 | *
16 | */
17 | public class NodeMapper {
18 | private WordNet wordNet;
19 | /**
20 | * Key is the word. Value is the corresponding SQL component.
21 | * For example: ("return", ("SN", "SELECT"))
22 | */
23 | private Map map;
24 |
25 |
26 | /**
27 | * Initialize the NodeMapper. (The mapper could be made configurable. It can also initialize
28 | * by reading mappings from a file)
29 | * @throws Exception
30 | */
31 | public NodeMapper() throws Exception {
32 | wordNet = new WordNet();
33 | map = new HashMap();
34 | map.put("return", new NodeInfo("SN", "SELECT")); // Select Node
35 |
36 | map.put("equals", new NodeInfo("ON", "=")); // Operator Node
37 | map.put("less", new NodeInfo("ON", "<"));
38 | map.put("greater", new NodeInfo("ON", ">"));
39 | map.put("not", new NodeInfo("ON", "!=")); //TODO: not is a operator node or logic node?
40 | map.put("before", new NodeInfo("ON", "<"));
41 | map.put("after", new NodeInfo("ON", ">"));
42 | map.put("more", new NodeInfo("ON", ">"));
43 | map.put("older", new NodeInfo("ON", ">"));
44 | map.put("newer", new NodeInfo("ON", "<"));
45 |
46 | map.put("fn", new NodeInfo("FN", "AVG")); // Function Node
47 | map.put("average", new NodeInfo("FN", "AVG"));
48 | map.put("most", new NodeInfo("FN", "MAX"));
49 | map.put("total", new NodeInfo("FN", "SUM"));
50 | map.put("number", new NodeInfo("FN","COUNT"));
51 |
52 | map.put("all", new NodeInfo("QN", "ALL")); // Quantifier Node
53 | map.put("any", new NodeInfo("QN", "ANY"));
54 | map.put("each", new NodeInfo("QN", "EACH"));
55 |
56 | map.put("and", new NodeInfo("LN", "AND")); // Logic Node
57 | map.put("or", new NodeInfo("LN", "OR"));
58 |
59 |
60 | }
61 |
62 | /**
63 | *
Return the a ranked list of candidate NodeInfos for this Node. This method
64 | * will be called by the controller, and then the candidates will be passed on
65 | * to the view for user to choose. If there is only one candidate in the list,
66 | * the choice is automatically made.
67 | *
The length of the list of NodeInfos is at least 1. We will have special type
68 | * in NodeInfo if the Node doesn't correspond to any SQL component (the Node is
69 | * meaningless).
70 | *
The returned list contains at most 6 elements.
71 | *
Treat all input as lower case.
72 | * @param node
73 | * @param schema
74 | * @return a ranked of NodeInfo
75 | */
76 | public List getNodeInfoChoices(Node node, SchemaGraph schema) {
77 | List result = new ArrayList(); //final output
78 | if (node.getWord().equals("ROOT")) {
79 | result.add(new NodeInfo("ROOT", "ROOT"));
80 | return result;
81 | }
82 | Set valueNodes = new HashSet(); //used to store (type, value, score) of 100 sample values for every column in every table
83 | String word = node.getWord().toLowerCase(); // all words as lower case
84 |
85 | if (map.containsKey(word)) {
86 | result.add(map.get(word));
87 | return result;
88 | }
89 |
90 | for (String tableName : schema.getTableNames()) {
91 | result.add(new NodeInfo("NN", tableName,
92 | WordSimilarity.getSimilarity(word, tableName, wordNet))); //map name nodes(table names)
93 | for (String colName : schema.getColumns(tableName)) {
94 | result.add(new NodeInfo("NN", tableName+"."+colName,
95 | WordSimilarity.getSimilarity(word, colName, wordNet))); //map name nodes (attribute names)
96 | for (String value : schema.getValues(tableName, colName)) {
97 | if (word == null || value == null) {
98 | System.out.println("Comparing "+word+" and "+value);
99 | System.out.println("In table "+tableName+", column "+colName);
100 | }
101 | valueNodes.add(new NodeInfo("VN", tableName+"."+colName,
102 | WordSimilarity.getSimilarity(word, value, wordNet))); //add every sample value into valueNodes
103 | }
104 | }
105 | }
106 |
107 | //map value nodes (table values), to get the value node with highest similarity, add its (type, value, score) into result
108 | // we want all candidates, not only the one with the highest similarity
109 | result.addAll(valueNodes);
110 | result.add(new NodeInfo("UNKNOWN", "meaningless", 1.0));
111 | Collections.sort(result, new NodeInfo.ReverseScoreComparator());
112 | return result;
113 | }
114 |
115 | }
116 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/ui/UserView.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.ui;
2 |
3 | import com.dukenlidb.nlidb.archive.app.Controller;
4 | import javafx.application.Application;
5 | import javafx.application.Platform;
6 | import javafx.collections.FXCollections;
7 | import javafx.collections.ObservableList;
8 | import javafx.geometry.Insets;
9 | import javafx.scene.Scene;
10 | import javafx.scene.control.Button;
11 | import javafx.scene.control.ComboBox;
12 | import javafx.scene.control.Label;
13 | import javafx.scene.control.TextArea;
14 | import javafx.scene.layout.HBox;
15 | import javafx.scene.layout.VBox;
16 | import javafx.scene.text.Text;
17 | import javafx.stage.Stage;
18 | import com.dukenlidb.nlidb.archive.model.NodeInfo;
19 |
20 | public class UserView extends Application {
21 | private static final String TEST_TEXT = "Return the number of authors who published theory papers before 1980.";
22 | // "Return the number of authors who published theory papers before 1980."
23 |
24 | Stage stage; // the window
25 | Scene scene; // the com.dukenlidb.nlidb.main content in the window
26 | Controller ctrl;
27 | Button btnTranslate;
28 | Text display;
29 | ComboBox choiceBox; // use scrollable comboBox instead of choiceBox
30 | Button btnConfirmChoice;
31 | ComboBox treeChoice;
32 | Button btnTreeConfirm;
33 | HBox hb;
34 | VBox vb1, vb2;
35 |
36 |
37 | public void setDisplay(String text) {
38 | display.setText(text);
39 | }
40 |
41 | public void appendDisplay(String text) {
42 | display.setText(display.getText()+text);
43 | }
44 |
45 | public void showNodesChoice() {
46 | vb2.getChildren().addAll(choiceBox, btnConfirmChoice);
47 | }
48 |
49 | public void removeChoiceBoxButton() {
50 | vb2.getChildren().remove(choiceBox);
51 | vb2.getChildren().remove(btnConfirmChoice);
52 | }
53 |
54 | public void setChoices(ObservableList choices) {
55 | choiceBox.setItems(choices);
56 | choiceBox.setValue(choices.get(0));
57 | }
58 |
59 | public NodeInfo getChoice() {
60 | return choiceBox.getValue();
61 | }
62 |
63 | public void showTreesChoice() {
64 | vb2.getChildren().addAll(treeChoice, btnTreeConfirm);
65 | }
66 |
67 | public void removeTreesChoices() {
68 | vb2.getChildren().removeAll(treeChoice, btnTreeConfirm);
69 | }
70 |
71 | @Override
72 | public void start(Stage primaryStage) throws Exception {
73 |
74 | stage = primaryStage;
75 | stage.setTitle("Window for NLIDB");
76 |
77 | Label label1 = new Label("Welcome to Natural Language Interface to DataBase!");
78 |
79 | Label lblInput = new Label("Natural Language Input:");
80 | TextArea fieldIn = new TextArea();
81 | fieldIn.setPrefHeight(100);
82 | fieldIn.setPrefWidth(100);
83 | fieldIn.setWrapText(true);
84 | fieldIn.setText(TEST_TEXT);
85 |
86 | btnTranslate = new Button("translate");
87 |
88 | // Define action of the translate button.
89 | btnTranslate.setOnAction(e -> {
90 | ctrl.processNaturalLanguage(fieldIn.getText());
91 | });
92 |
93 | display = new Text();
94 | display.setWrappingWidth(500);
95 | display.prefHeight(300);
96 | display.setText("Default display text");
97 |
98 | // choices and button for nodes mapping
99 | choiceBox = new ComboBox();
100 | choiceBox.setVisibleRowCount(6);
101 | btnConfirmChoice = new Button("confirm choice");
102 | btnConfirmChoice.setOnAction(e -> {
103 | ctrl.chooseNode(getChoice());
104 | });
105 |
106 | // choices and button for tree selection
107 | treeChoice = new ComboBox(); // ! only show 3 choices now
108 | treeChoice.setItems(FXCollections.observableArrayList(0,1,2));
109 | treeChoice.getSelectionModel().selectedIndexProperty().addListener((ov, oldV, newV) -> {
110 | ctrl.showTree(treeChoice.getItems().get((Integer) newV));
111 | });
112 | btnTreeConfirm = new Button("confirm tree choice");
113 | btnTreeConfirm.setOnAction(e -> {
114 | ctrl.chooseTree(treeChoice.getValue());
115 | });
116 |
117 | vb1 = new VBox();
118 | vb1.setSpacing(10);
119 | vb1.getChildren().addAll(
120 | label1,
121 | lblInput,fieldIn,
122 | btnTranslate
123 | );
124 |
125 | vb2 = new VBox();
126 | vb2.setSpacing(20);
127 | vb2.getChildren().addAll(display);
128 |
129 | hb = new HBox();
130 | hb.setPadding(new Insets(15, 12, 15, 12));
131 | hb.setSpacing(10);
132 | hb.getChildren().addAll(vb1, vb2);
133 |
134 | scene = new Scene(hb, 800, 450);
135 |
136 | stage.setScene(scene);
137 | ctrl = new Controller(this);
138 | stage.show();
139 |
140 | }
141 |
142 | @Override
143 | public void stop() throws Exception {
144 | super.stop();
145 | if (ctrl != null) {
146 | ctrl.closeConnection();
147 | }
148 | Platform.exit();
149 | System.exit(0);
150 | }
151 |
152 | public static void main(String[] args) {
153 | try {
154 | Application.launch(args);
155 | } catch (Exception e) { e.printStackTrace(); }
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SQLTranslator.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Set;
6 |
7 | /**
8 | * See the paper by Fei Li and H. V. Jagadish for the defined grammar.
9 | * @author keping
10 | *
11 | */
12 | public class SQLTranslator {
13 | private SQLQuery query;
14 | private SchemaGraph schema;
15 | private int blockCounter = 1;
16 |
17 | public SQLTranslator(Node root, SchemaGraph schema) {
18 | this(root, schema, false);
19 | }
20 |
21 | /**
22 | * Translating a block, starting from translateGNP.
23 | * @param root
24 | * @param schema
25 | */
26 | public SQLTranslator(Node root, SchemaGraph schema, boolean block) {
27 | if (!block) {
28 | this.schema = schema;
29 | query = new SQLQuery();
30 |
31 | translateSClause(root.getChildren().get(0));
32 | if (root.getChildren().size() >= 2) {
33 | translateComplexCondition(root.getChildren().get(1));
34 | }
35 |
36 | if (schema != null) addJoinPath();
37 | } else {
38 | this.schema = schema;
39 | query = new SQLQuery();
40 | translateGNP(root);
41 | }
42 | }
43 |
44 | public SQLQuery getResult() { return query; }
45 |
46 |
47 | private static boolean isNumber(String str) {
48 | int length = str.length();
49 | if (length == 0) { return false; }
50 | int i = 0;
51 | if (str.charAt(0) == '-') {
52 | if (length == 1) { return false; }
53 | i = 1;
54 | }
55 | for (; i < length; i++) {
56 | char c = str.charAt(i);
57 | if (c < '0' || c > '9' && c != '.') { return false; }
58 | }
59 | return true;
60 | }
61 |
62 | private void translateCondition(Node node) {
63 | String attribute = "ATTRIBUTE";
64 | String compareSymbol = "=";
65 | String value = "VALUE";
66 | if (node.getInfo().getType().equals("VN")) {
67 | attribute = node.getInfo().getValue();
68 | value = node.getWord();
69 | } else if (node.getInfo().getType().equals("ON")) {
70 | compareSymbol = node.getInfo().getValue();
71 | Node VN = node.getChildren().get(0);
72 | attribute = VN.getInfo().getValue();
73 | value = VN.getWord();
74 | }
75 | if (!isNumber(value)) { value = "\""+value+"\""; }
76 | query.add("WHERE", attribute+" "+compareSymbol+" "+value);
77 | query.add("FROM", attribute.split("\\.")[0]);
78 | }
79 |
80 | private void translateNN(Node node) {
81 | translateNN(node, "");
82 | }
83 | private void translateNN(Node node, String valueFN) {
84 | if (!node.getInfo().getType().equals("NN")) { return; }
85 | if (!valueFN.equals("")) {
86 | query.add("SELECT", valueFN+"("+node.getInfo().getValue()+")");
87 | } else {
88 | query.add("SELECT", node.getInfo().getValue());
89 | }
90 | query.add("FROM", node.getInfo().getValue().split("\\.")[0]);
91 | }
92 |
93 | private void translateNP(Node node) {
94 | translateNP(node, "");
95 | }
96 | private void translateNP(Node node, String valueFN) {
97 | translateNN(node, valueFN);
98 | for (Node child : node.getChildren()) {
99 | if (child.getInfo().getType().equals("NN")) {
100 | translateNN(child);
101 | } else if (child.getInfo().getType().equals("ON") ||
102 | child.getInfo().getType().equals("VN")){
103 | translateCondition(child);
104 | }
105 | }
106 | }
107 |
108 | private void translateGNP(Node node) {
109 | if (node.getInfo().getType().equals("FN")) {
110 | if (node.getChildren().isEmpty()) { return; }
111 | translateNP(node.getChildren().get(0), node.getInfo().getValue());
112 | } else if (node.getInfo().getType().equals("NN")) {
113 | translateNP(node);
114 | }
115 | }
116 |
117 | private void translateComplexCondition(Node node) {
118 | if (!node.getInfo().getType().equals("ON")) { return; }
119 | if (node.getChildren().size() != 2) { return; }
120 | SQLTranslator transLeft = new SQLTranslator(node.getChildren().get(0), schema, true);
121 | SQLTranslator transRight= new SQLTranslator(node.getChildren().get(1), schema, true);
122 | query.addBlock(transLeft.getResult());
123 | query.addBlock(transRight.getResult());
124 | query.add("WHERE", "BLOCK"+(blockCounter++)+" "+node.getInfo().getValue()+" "+"BLOCK"+(blockCounter++));
125 | }
126 |
127 | private void translateSClause(Node node) {
128 | if (!node.getInfo().getType().equals("SN")) { return; }
129 | translateGNP(node.getChildren().get(0));
130 | }
131 |
132 | private void addJoinKeys(String table1, String table2) {
133 | Set joinKeys = schema.getJoinKeys(table1, table2);
134 | for (String joinKey : joinKeys) {
135 | query.add("WHERE", table1+"."+joinKey+" = "+table2+"."+joinKey);
136 | }
137 | }
138 |
139 | private void addJoinPath(List joinPath) {
140 | for (int i = 0; i < joinPath.size()-1; i++) {
141 | addJoinKeys(joinPath.get(i), joinPath.get(i+1));
142 | }
143 | }
144 |
145 | private void addJoinPath() {
146 | List fromTables = new ArrayList(query.getCollection("FROM"));
147 | if (fromTables.size() <= 1) { return; }
148 | for (int i = 0; i < fromTables.size()-1; i++) {
149 | for (int j = i+1; j < fromTables.size(); j++) {
150 | List joinPath = schema.getJoinPath(fromTables.get(i), fromTables.get(j));
151 | addJoinPath(joinPath);
152 | }
153 | }
154 | }
155 |
156 | }
157 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/Node.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.LinkedList;
5 | import java.util.List;
6 |
7 | /**
8 | * Interface for a parse tree node.
9 | * @author keping
10 | *
11 | */
12 | public class Node {
13 |
14 | /**
15 | * record if the node is copied over
16 | */
17 | boolean outside = false;
18 |
19 | private int index = 0;
20 | /**
21 | * Information indicating the corresponding SQL component of the Node.
22 | */
23 | NodeInfo info = null;
24 | /**
25 | * The natural language word of the Node. This is the only field of
26 | * the Node object that is immutable.
27 | */
28 | String word;
29 | /**
30 | * Part-of-speech tag for the Node.
31 | */
32 | String posTag;
33 |
34 | /**
35 | * Parent of the node can be directly modified by ParseTree.
36 | */
37 | Node parent = null; // package private
38 | /**
39 | * Children of the node can be directly modified by ParseTree.
40 | */
41 | List children = new ArrayList(); // package private
42 |
43 | //for testing purpose
44 | boolean isInvalid = false;
45 |
46 | public Node(int index, String word, String posTag){
47 | this(index, word, posTag, null);
48 | }
49 |
50 | public Node(int index, String word, String posTag, NodeInfo info) {
51 | this.index = index;
52 | this.word = word;
53 | this.posTag = posTag;
54 | this.info = info;
55 | }
56 |
57 | public Node(String word, String posTag, NodeInfo info) {
58 | this(0, word, posTag, info);
59 |
60 | }
61 |
62 | private Node clone(Node node){
63 | if (node == null) return null;
64 | Node copy = new Node(node.index, node.word, node.posTag, node.info);
65 | for (Node child : node.children){
66 | Node copyChild = clone(child);
67 | copyChild.parent = copy;
68 | copy.children.add(copyChild);
69 | }
70 | return copy;
71 | }
72 | public Node clone(){
73 | return clone(this);
74 | }
75 |
76 |
77 | public NodeInfo getInfo() { return info; }
78 | public void setInfo(NodeInfo info) { this.info = info; }
79 | public String getWord() { return word; }
80 | public void setWord(String word) {this.word = word;}
81 | public String getPosTag() { return posTag; }
82 | public List getChildren() { return children; }
83 | public void setChild(Node child) {this.children.add(child);}
84 | public Node getParent() {return parent;}
85 | public void setParent(Node parent) {this.parent = parent;}
86 | public void setOutside(boolean outside) {this.outside = outside;}
87 | public boolean getOutside() {return this.outside;}
88 |
89 | public void removeChild (Node child) {
90 |
91 | for (int i = 0; i < children.size(); i ++) {
92 |
93 | if (children.get(i).equals(child)) {
94 |
95 | children.remove(i);
96 | return;
97 | }
98 | }
99 | }
100 |
101 | public void printNodeArray () {
102 |
103 | Node [] nodes = genNodesArray();
104 |
105 | for (int i = 0; i < nodes.length; i++) {
106 | System.out.println("type: " + nodes[i].getInfo().getType() + " value: " + nodes[i].getInfo().getValue());
107 | }
108 | }
109 |
110 |
111 | /**
112 | * Generate an array of the nodes tree with this as root
113 | * using pre-order traversal;
114 | * @return
115 | */
116 | public Node[] genNodesArray() {
117 | List nodesList = new ArrayList<>();
118 | LinkedList stack = new LinkedList<>();
119 | stack.push(this);
120 | while (!stack.isEmpty()) {
121 | Node curr = stack.pop();
122 | nodesList.add(curr);
123 | List currChildren = curr.getChildren();
124 | for (int i = currChildren.size()-1; i >= 0; i--) {
125 | stack.push(currChildren.get(i));
126 | }
127 | }
128 | int N = nodesList.size();
129 | Node[] nodes = new Node[N];
130 | for (int i = 0; i < N; i++) {
131 | nodes[i] = nodesList.get(i);
132 | }
133 | return nodes;
134 | }
135 |
136 | /**
137 | * Only includes posTag, word, info, and children.
138 | * Return the hashCode of the tree represented by this node.
139 | */
140 | @Override
141 | public int hashCode() { // exclude parent.
142 | final int prime = 31;
143 | int result = 17;
144 | result = prime * result + index;
145 | result = prime * result + ((posTag == null) ? 0 : posTag.hashCode());
146 | result = prime * result + ((word == null) ? 0 : word.hashCode());
147 | result = prime * result + ((info == null) ? 0 : info.hashCode());
148 | if (children != null) {
149 | for (Node child : children) {
150 | result = prime * result + child.hashCode();
151 | }
152 | }
153 |
154 | return result;
155 | }
156 |
157 | /**
158 | * Only considers word, posTag, info, and children (recursively).
159 | * See whether two trees represented by two nodes are equal.
160 | */
161 | @Override
162 | public boolean equals(Object obj) { // exclude parent
163 | if (this == obj) { return true; }
164 | if (obj == null) { return false; }
165 | if (getClass() != obj.getClass()) { return false; }
166 | Node other = (Node) obj;
167 | if (index != other.index) { return false; }
168 | if (!word.equals(other.word)) { return false; }
169 | if (!posTag.equals(other.posTag)) { return false; }
170 | if (info != other.info) {
171 | if (info == null || other.info == null) { return false; }
172 | if (!info.equals(other.info)) { return false; }
173 | }
174 | if (children != other.children) {
175 | if (children == null || other.children == null) { return false; }
176 | if (children.size() != other.children.size()) { return false; }
177 | for (int i = 0; i < children.size(); i++) {
178 | if (!children.get(i).equals(other.children.get(i))) { return false; }
179 | }
180 | }
181 | return true;
182 | }
183 |
184 | public String toString() {
185 | String s = "("+index+")"+word;
186 | if (info != null) {
187 | s += "("+info.getType()+":"+info.getValue()+")";
188 | }
189 | return s;
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Attempt to set APP_HOME
10 | # Resolve links: $0 may be a link
11 | PRG="$0"
12 | # Need this for relative symlinks.
13 | while [ -h "$PRG" ] ; do
14 | ls=`ls -ld "$PRG"`
15 | link=`expr "$ls" : '.*-> \(.*\)$'`
16 | if expr "$link" : '/.*' > /dev/null; then
17 | PRG="$link"
18 | else
19 | PRG=`dirname "$PRG"`"/$link"
20 | fi
21 | done
22 | SAVED="`pwd`"
23 | cd "`dirname \"$PRG\"`/" >/dev/null
24 | APP_HOME="`pwd -P`"
25 | cd "$SAVED" >/dev/null
26 |
27 | APP_NAME="Gradle"
28 | APP_BASE_NAME=`basename "$0"`
29 |
30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31 | DEFAULT_JVM_OPTS=""
32 |
33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
34 | MAX_FD="maximum"
35 |
36 | warn ( ) {
37 | echo "$*"
38 | }
39 |
40 | die ( ) {
41 | echo
42 | echo "$*"
43 | echo
44 | exit 1
45 | }
46 |
47 | # OS specific support (must be 'true' or 'false').
48 | cygwin=false
49 | msys=false
50 | darwin=false
51 | nonstop=false
52 | case "`uname`" in
53 | CYGWIN* )
54 | cygwin=true
55 | ;;
56 | Darwin* )
57 | darwin=true
58 | ;;
59 | MINGW* )
60 | msys=true
61 | ;;
62 | NONSTOP* )
63 | nonstop=true
64 | ;;
65 | esac
66 |
67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68 |
69 | # Determine the Java command to use to start the JVM.
70 | if [ -n "$JAVA_HOME" ] ; then
71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72 | # IBM's JDK on AIX uses strange locations for the executables
73 | JAVACMD="$JAVA_HOME/jre/sh/java"
74 | else
75 | JAVACMD="$JAVA_HOME/bin/java"
76 | fi
77 | if [ ! -x "$JAVACMD" ] ; then
78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79 |
80 | Please set the JAVA_HOME variable in your environment to match the
81 | location of your Java installation."
82 | fi
83 | else
84 | JAVACMD="java"
85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86 |
87 | Please set the JAVA_HOME variable in your environment to match the
88 | location of your Java installation."
89 | fi
90 |
91 | # Increase the maximum file descriptors if we can.
92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93 | MAX_FD_LIMIT=`ulimit -H -n`
94 | if [ $? -eq 0 ] ; then
95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96 | MAX_FD="$MAX_FD_LIMIT"
97 | fi
98 | ulimit -n $MAX_FD
99 | if [ $? -ne 0 ] ; then
100 | warn "Could not set maximum file descriptor limit: $MAX_FD"
101 | fi
102 | else
103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 | fi
105 | fi
106 |
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 |
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 | JAVACMD=`cygpath --unix "$JAVACMD"`
117 |
118 | # We build the pattern for arguments to be converted via cygpath
119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 | SEP=""
121 | for dir in $ROOTDIRSRAW ; do
122 | ROOTDIRS="$ROOTDIRS$SEP$dir"
123 | SEP="|"
124 | done
125 | OURCYGPATTERN="(^($ROOTDIRS))"
126 | # Add a user-defined pattern to the cygpath arguments
127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 | fi
130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 | i=0
132 | for arg in "$@" ; do
133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135 |
136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 | else
139 | eval `echo args$i`="\"$arg\""
140 | fi
141 | i=$((i+1))
142 | done
143 | case $i in
144 | (0) set -- ;;
145 | (1) set -- "$args0" ;;
146 | (2) set -- "$args0" "$args1" ;;
147 | (3) set -- "$args0" "$args1" "$args2" ;;
148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 | esac
155 | fi
156 |
157 | # Escape application args
158 | save ( ) {
159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 | echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 |
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 |
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 | cd "$(dirname "$0")"
170 | fi
171 |
172 | exec "$JAVACMD" "$@"
173 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustor.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.HashMap;
5 | import java.util.HashSet;
6 | import java.util.List;
7 | import java.util.PriorityQueue;
8 | import java.util.Set;
9 |
10 | public class TreeAdjustor {
11 |
12 | private static final int MAX_EDIT = 15;
13 |
14 | /**
15 | * Return the node in the tree that equals to the targetNode.
16 | * @param tree
17 | * @param targetNode
18 | * @return
19 | */
20 | private static Node find(ParseTree tree, Node targetNode) {
21 | for (Node node : tree) {
22 | if (node.equals(targetNode)) { return node; }
23 | }
24 | return null;
25 | }
26 |
27 | /**
28 | * Swap this parent node and a child node.
29 | * @param parent
30 | * @param child
31 | */
32 | private static void swap(Node parent, Node child) {
33 | // swap the attributes directly.
34 | NodeInfo childInfo = child.info;
35 | String childWord = child.word;
36 | String childPosTag = child.posTag;
37 | child.info = parent.info;
38 | child.word = parent.word;
39 | child.posTag = parent.posTag;
40 | parent.info = childInfo;
41 | parent.word = childWord;
42 | parent.posTag = childPosTag;
43 | }
44 |
45 | /**
46 | * Make the child node a rightmost sibling of the target Node.
47 | * @param target
48 | * @param child
49 | */
50 | private static void makeSibling(Node target, Node child) {
51 | List children = target.getChildren();
52 | target.children = new ArrayList();;
53 | for (Node anyChild : children) {
54 | if (anyChild != child) { target.getChildren().add(anyChild); }
55 | }
56 | target.parent.children.add(child);
57 | child.parent = target.parent;
58 | }
59 |
60 | /**
61 | * Make a sibling the rightmost child of the target.
62 | * @param target
63 | * @param sibling
64 | */
65 | private static void makeChild(Node target, Node sibling) {
66 | List siblings = target.parent.children;
67 | target.parent.children = new ArrayList();
68 | for (Node anySibling : siblings) {
69 | if (anySibling != sibling) {
70 | target.parent.children.add(anySibling);
71 | }
72 | }
73 | target.children.add(sibling);
74 | sibling.parent = target;
75 | }
76 |
77 | /**
78 | *
Return a list of adjusted trees after one adjustment to the input tree
79 | * at the target Node.
80 | *
Four possible adjustments can be made to that node:
81 | *
82 | *
Swap this node with its child. (all possible positions)
We want to translate: "Return all titles of theory papers before 1970."
9 | * into (in for inproceedings):
10 | * SELECT in.title FROM in
11 | * WHERE in.area = 'Theory' AND in.year < 1970;
12 | *
13 | *
The direct parsing result of this natural language input is:
Suppose we have already successfully gone through the process of
28 | * nodes mapping and structural adjustment. Then we should arrive at a ParseTree
29 | * like this: (in for inproceedings)
Still need the adjustor to swap the position of "1970" and "before".
142 | */
143 | public static void removeMeaninglessNodesTest() {
144 | String input = "Return all titles of theory papers before 1970.";
145 | NLParser parser = new NLParser();
146 | ParseTree tree = new ParseTree(input, parser);
147 | System.out.println("ParseTree: ");
148 | System.out.println(tree);
149 |
150 | // Set NodeInfo
151 | Node[] nodes = tree.genNodesArray();
152 | nodes[1].info = new NodeInfo("SN", "SELECT");
153 | nodes[2].info = new NodeInfo("UNKNOWN", "meaningless");
154 | nodes[3].info = new NodeInfo("NN", "in.title");
155 | nodes[4].info = new NodeInfo("UNKNOWN", "meaningless");
156 | nodes[5].info = new NodeInfo("VN", "in.area");
157 | nodes[6].info = new NodeInfo("UNKNOWN", "meaningless");
158 | nodes[7].info = new NodeInfo("ON", "<");
159 | nodes[8].info = new NodeInfo("VN", "in.year");
160 | nodes[9].info = new NodeInfo("UNKNOWN", "meaningless");
161 |
162 | System.out.println("After setting nodeinfo:");
163 | System.out.println(tree);
164 |
165 | tree.removeMeaninglessNodes();
166 |
167 | System.out.println("After removing meaningless nodes");
168 | System.out.println(tree);
169 |
170 | SQLQuery query = tree.translateToSQL();
171 |
172 | System.out.println(query);
173 |
174 | }
175 |
176 | public static void main(String[] args) {
177 | testTranslation1();
178 | //removeMeaninglessNodesTest();
179 | }
180 |
181 | }
182 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/app/Controller.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.app;
2 |
3 | import java.sql.Connection;
4 | import java.sql.DriverManager;
5 | import java.sql.SQLException;
6 | import java.util.List;
7 |
8 | import javafx.collections.FXCollections;
9 | import com.dukenlidb.nlidb.archive.model.NLParser;
10 | import com.dukenlidb.nlidb.archive.model.Node;
11 | import com.dukenlidb.nlidb.archive.model.NodeInfo;
12 | import com.dukenlidb.nlidb.archive.model.NodeMapper;
13 | import com.dukenlidb.nlidb.archive.model.ParseTree;
14 | import com.dukenlidb.nlidb.archive.model.ParseTree.ParseTreeIterator;
15 | import com.dukenlidb.nlidb.archive.model.SQLQuery;
16 | import com.dukenlidb.nlidb.archive.model.SchemaGraph;
17 | import com.dukenlidb.nlidb.archive.ui.UserView;
18 |
19 |
20 | /**
21 | * The controller between com.dukenlidb.nlidb.model and view.
22 | * @author keping
23 | */
24 | public class Controller {
25 | private Connection connection = null;
26 | private SchemaGraph schema;
27 | private NLParser parser;
28 | private NodeMapper nodeMapper;
29 | private ParseTree parseTree;
30 | private UserView view;
31 | /**
32 | * Iterator for nodes mapping.
33 | */
34 | private ParseTreeIterator iter;
35 | /**
36 | * Attribute for nodes mapping, to indicate the current Node.
37 | */
38 | private Node node;
39 | private boolean mappingNodes = false;
40 | private boolean selectingTree = false;
41 | private boolean processing = false;
42 | private List treeChoices;
43 | private SQLQuery query;
44 |
45 | /**
46 | * Initialize the Controller.
47 | */
48 | public Controller(UserView view) {
49 | this.view = view;
50 | startConnection();
51 |
52 | try { nodeMapper = new NodeMapper();
53 | } catch (Exception e) { e.printStackTrace(); }
54 | parser = new NLParser(); // initialize parser, takes some time
55 |
56 | System.out.println("Controller initialized.");
57 | }
58 |
59 | /**
60 | * ONLY FOR TESTING. An empty constructor.
61 | */
62 | public Controller() {
63 |
64 | }
65 |
66 | /**
67 | * Start connection with the database and read schema graph
68 | */
69 | public void startConnection() {
70 |
71 | try { Class.forName("org.postgresql.Driver"); }
72 | catch (ClassNotFoundException e1) { }
73 |
74 | System.out.println("PostgreSQL JDBC Driver Registered!");
75 |
76 | try {
77 | connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser");
78 | } catch (SQLException e) {
79 | e.printStackTrace();
80 | }
81 | System.out.println("Connection successful!");
82 |
83 | try {
84 | schema = new SchemaGraph(connection);
85 | view.setDisplay("Database Schema:\n\n"+schema.toString());
86 | } catch (SQLException e) {
87 | e.printStackTrace();
88 | }
89 |
90 | }
91 |
92 | /**
93 | * Close connection with the database.
94 | */
95 | public void closeConnection() {
96 | try {
97 | if (connection != null) { connection.close(); }
98 | } catch (SQLException e) {
99 | e.printStackTrace();
100 | }
101 | System.out.println("Connection closed.");
102 | }
103 |
104 | // ---- Methods for nodes mapping ---- //
105 | /**
106 | * Helper method for nodes mapping, displaying the currently mapping Node
107 | * and the choices on the view.
108 | * @param choices
109 | */
110 | private void setChoicesOnView(List choices) {
111 | view.setDisplay("Mapping nodes: \n"+parseTree.getSentence()+"\n");
112 | view.appendDisplay("Currently on: "+node);
113 | view.setChoices(FXCollections.observableArrayList(choices));
114 | }
115 |
116 | /**
117 | * Terminates the mapping Nodes process by setting the boolean mappingNodes false;
118 | */
119 | private void finishNodesMapping() {
120 | view.setDisplay("Nodes mapped.\n"+parseTree.getSentence());
121 | mappingNodes = false;
122 | view.removeChoiceBoxButton();
123 | processAfterNodesMapping();
124 | }
125 |
126 | /**
127 | * Start the nodes mapping process. A boolean will be set to indicate that
128 | * the application is in the process of mapping Nodes. Cannot call startMappingNodes
129 | * again during mapping Nodes. After this is called, the view shows the choices
130 | * of NodeInfos for a node, waiting for the user to choose one.
131 | */
132 | public void startMappingNodes() {
133 | if (mappingNodes) { return; }
134 | view.showNodesChoice();
135 |
136 | mappingNodes = true;
137 | iter = parseTree.iterator();
138 | if (!iter.hasNext()) {
139 | finishNodesMapping();
140 | return;
141 | }
142 |
143 | node = iter.next();
144 | List choices = nodeMapper.getNodeInfoChoices(node, schema);
145 | if (choices.size() == 1) { chooseNode(choices.get(0)); }
146 | else { setChoicesOnView(choices); }
147 | // After this wait for the button to call chooseNode
148 | }
149 |
150 | /**
151 | * Choose NodeInfo for the current Node. This method is called when the user
152 | * clicked the confirmChoice button, or automatically called when the choices
153 | * of NodeInfo contains only one element.
154 | * @param info {@link NodeInfo}
155 | */
156 | public void chooseNode(NodeInfo info) {
157 | if (!mappingNodes) { return; }
158 | // System.out.println("Now the tree is:");
159 | // System.out.println(parseTree);
160 | node.setInfo(info);
161 | if (!iter.hasNext()) {
162 | finishNodesMapping();
163 | return;
164 | }
165 | node = iter.next();
166 | List choices = nodeMapper.getNodeInfoChoices(node, schema);
167 | if (choices.size() == 1) { chooseNode(choices.get(0)); }
168 | else { setChoicesOnView(choices); }
169 | // After this wait for the button to call chooseNode
170 | }
171 | // ----------------------------------- //
172 |
173 |
174 | // ---- Methods for trees selection ---- //
175 | public void startTreeSelection() {
176 | if (selectingTree) { return; }
177 | view.showTreesChoice();
178 | selectingTree = true;
179 | treeChoices = parseTree.getAdjustedTrees();
180 | }
181 |
182 | public void showTree(int index) {
183 | view.setDisplay(treeChoices.get(index).toString());
184 | }
185 |
186 | public void chooseTree(int index) {
187 | parseTree = treeChoices.get(index);
188 | finishTreeSelection();
189 | }
190 |
191 | public void finishTreeSelection() {
192 | selectingTree = false;
193 | view.removeTreesChoices();
194 | processAfterTreeSelection();
195 | }
196 | // ------------------------------------- //
197 |
198 | public void processAfterTreeSelection() {
199 | System.out.println("The tree before implicit nodes insertion: ");
200 | System.out.println(parseTree);
201 | parseTree.insertImplicitNodes();
202 | System.out.println("Going to do translation for tree: ");
203 | System.out.println(parseTree);
204 | query = parseTree.translateToSQL(schema);
205 | view.setDisplay(query.toString());
206 | processing = false;
207 | }
208 |
209 | public void processAfterNodesMapping() {
210 | System.out.println("Going to remove meaningless nodes for tree: ");
211 | System.out.println(parseTree);
212 | parseTree.removeMeaninglessNodes();
213 | parseTree.mergeLNQN();
214 | startTreeSelection();
215 | }
216 |
217 | /**
218 | * Process natural language and return an sql translateNL.
219 | * @param nl
220 | * @return
221 | */
222 | public void processNaturalLanguage(String input) {
223 | if (processing) { view.appendDisplay("\nCurrently processing a sentence!\n"); }
224 | processing = true;
225 | parseTree = new ParseTree(input, parser);
226 | startMappingNodes();
227 | }
228 |
229 | }
230 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/WordNet.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.io.File;
4 | import java.net.URL;
5 | import java.util.ArrayList;
6 | import java.util.HashSet;
7 | import java.util.List;
8 | import java.util.Set;
9 |
10 | import edu.mit.jwi.IRAMDictionary;
11 | import edu.mit.jwi.RAMDictionary;
12 | import edu.mit.jwi.data.ILoadPolicy;
13 | import edu.mit.jwi.item.IIndexWord;
14 | import edu.mit.jwi.item.ISynset;
15 | import edu.mit.jwi.item.ISynsetID;
16 | import edu.mit.jwi.item.IWordID;
17 | import edu.mit.jwi.item.POS;
18 | import edu.mit.jwi.item.Pointer;
19 | import edu.mit.jwi.morph.WordnetStemmer;
20 |
21 | public class WordNet {
22 | String sep = File.separator;
23 | String wordNetDir = "lib" + sep + "WordNet-3.0" + sep + "dict";
24 | URL url;
25 | IRAMDictionary dict;
26 | WordnetStemmer stemmer;
27 |
28 | public WordNet() throws Exception {
29 | url = new URL("file", null, wordNetDir);
30 | dict = new RAMDictionary(url, ILoadPolicy.NO_LOAD);
31 | dict.open();
32 | System.out.println("Loading wordNet...");
33 | dict.load(true); // load dictionary into memory
34 | System.out.println("WordNet loaded.");
35 |
36 | stemmer = new WordnetStemmer(dict);
37 | }
38 |
39 | /**
40 | * Find the similarity of two nouns.
41 | * @param word1
42 | * @param word2
43 | * @return
44 | */
45 | public double similarity(String word1, String word2) {
46 | // System.out.println("Finding similarity between: "+word1+" and "+word2);
47 | // remove all special characters from words
48 | if (word1.equals("") || word2.equals("")) { return 0.0; }
49 | word1 = word1.replaceAll("[^a-zA-Z0-9]", "");
50 | word2 = word2.replaceAll("[^a-zA-Z0-9]", "");
51 | if (word1.equals("") || word2.equals("")) { return 0.0; }
52 | // ? why NullPointerException here ??? Doesn't seem to be my fault!
53 | // Here special symbols in word causes Exception.
54 | List stems1 = stemmer.findStems(word1, POS.NOUN);
55 | List stems2 = stemmer.findStems(word2, POS.NOUN);
56 |
57 | if (stems1.isEmpty() || stems2.isEmpty()) {
58 | // System.out.println("One word cannot be identified in WordNet");
59 | return 0.0;
60 | }
61 |
62 | ArrayList> visited1, visited2;
63 | visited1 = new ArrayList<>();
64 | visited2 = new ArrayList<>();
65 |
66 | List wordIDs1 = new ArrayList<>();
67 | for (String stem : stems1) {
68 | IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN);
69 | if (indexWord != null) {
70 | wordIDs1.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs());
71 | }
72 | }
73 | if (wordIDs1.isEmpty()) { return 0.0; }
74 | List synsets1 = new ArrayList<>();
75 | for (IWordID wID : wordIDs1) { synsets1.add(dict.getWord(wID).getSynset()); }
76 | visited1.add(new HashSet (synsets1));
77 |
78 | List wordIDs2 = new ArrayList<>();
79 | for (String stem : stems2) {
80 | IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN);
81 | if (indexWord != null) {
82 | wordIDs2.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs());
83 | }
84 | }
85 | if (wordIDs2.isEmpty()) { return 0.0; }
86 | List synsets2 = new ArrayList<>();
87 | for (IWordID wID : wordIDs2) { synsets2.add(dict.getWord(wID).getSynset()); }
88 | visited2.add(new HashSet (synsets2));
89 |
90 | boolean commonFound = false;
91 | ISynset commonSynset = null;
92 | boolean endSearch1 = false;
93 | boolean endSearch2 = false;
94 |
95 | int commonSynsetPos1 = -1;
96 | int commonSynsetPos2 = -1;
97 |
98 | while (!commonFound && !(endSearch1 && endSearch2)) {
99 | int sz1 = visited1.size();
100 | int sz2 = visited2.size();
101 | if (!commonFound && !endSearch1) { // check the newest of 1 against all of 2
102 | for (int i = 0; i < sz2; i++) {
103 | if (intersection(visited1.get(sz1-1), visited2.get(i)) != null) {
104 | commonSynsetPos1 = sz1-1;
105 | commonSynsetPos2 = i;
106 | commonSynset = intersection(visited1.get(sz1-1), visited2.get(i));
107 | commonFound = true;
108 | break;
109 | }
110 | }
111 | }
112 | if (!commonFound && !endSearch2) { // check the newest of 2 against all of 1
113 | for (int i = 0; i < sz1; i++) {
114 | if (intersection(visited1.get(i), visited2.get(sz2-1)) != null) {
115 | commonSynsetPos1 = i;
116 | commonSynsetPos2 = sz2-1;
117 | commonSynset = intersection(visited1.get(i), visited2.get(sz2-1));
118 | commonFound = true;
119 | break;
120 | }
121 | }
122 | }
123 | if (!commonFound) {
124 | if (!endSearch1) {
125 | Set hyperSet1 = getHyperSet(visited1.get(sz1-1));
126 | if (hyperSet1.isEmpty()) { endSearch1 = true; }
127 | else { visited1.add(hyperSet1); }
128 | }
129 | if (!endSearch2) {
130 | Set hyperSet2 = getHyperSet(visited2.get(sz2-1));
131 | if (hyperSet2.isEmpty()) { endSearch2 = true; }
132 | else { visited2.add(hyperSet2); }
133 | }
134 | }
135 | }
136 |
137 | if (commonSynset == null) { return 0.0; }
138 |
139 | // System.out.println("Common ancestor synset found: ");
140 | // System.out.println(commonSynset.getWord(1).getLemma());
141 | // System.out.println(commonSynset.getGloss());
142 | // System.out.println("Common synset pos1: "+commonSynsetPos1);
143 | // System.out.println("Common synset pos2: "+commonSynsetPos2);
144 | // System.out.println("Depth of this common ancestor is:"+findDepth(commonSynset));
145 |
146 | int N1 = commonSynsetPos1;
147 | int N2 = commonSynsetPos2;
148 | int N3 = findDepth(commonSynset);
149 |
150 | return 2*N3 / (double) (N1+N2+2*N3);
151 | }
152 |
153 | private int findDepth(ISynset synset) {
154 | if (synset.getRelatedSynsets(Pointer.HYPERNYM).isEmpty()) { return 0; }
155 | List> list = new ArrayList<>();
156 | Set set = new HashSet<>();
157 | set.add(synset);
158 | list.add(set);
159 | boolean topReached = false;
160 | int depth = -1;
161 | while (!topReached) {
162 | Set nextSet = new HashSet<>();
163 | for (ISynset syn : list.get(list.size()-1)) {
164 | List hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM);
165 | if (!hyperIDs.isEmpty()) {
166 | for (ISynsetID hyperID : hyperIDs) { nextSet.add(dict.getSynset(hyperID)); }
167 | } else {
168 | topReached = true;
169 | depth = list.size()-1;
170 | break;
171 | }
172 | }
173 | list.add(nextSet);
174 | }
175 | return depth;
176 | }
177 |
178 | private Set getHyperSet(Set set) {
179 | Set hyperSet = new HashSet<>();
180 | for (ISynset syn : set) {
181 | List hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM);
182 | if (!hyperIDs.isEmpty()) {
183 | for (ISynsetID hyperID : hyperIDs) { hyperSet.add(dict.getSynset(hyperID)); }
184 | }
185 | }
186 | return hyperSet;
187 | }
188 |
189 | private ISynset intersection(Set set1, Set set2) {
190 | for (ISynset syn2 : set2) {
191 | if (set1.contains(syn2)) { return syn2; }
192 | }
193 | return null;
194 | }
195 |
196 | /**
197 | * Testing method
198 | * @param args
199 | * @throws Exception
200 | */
201 | public static void main(String[] args) throws Exception {
202 | WordNet net = new WordNet();
203 | String word1 = "scopes";
204 | String word2 = "book";
205 | System.out.printf("WUP similarity between %s and %s is: %f\n", word1, word2, net.similarity(word1, word2));
206 | // String word = "SCOPES";
207 | // List wordIDs = net.dict.getIndexWord(word, POS.NOUN).getWordIDs();
208 | // List synsets = new ArrayList<>();
209 | // for (IWordID wID : wordIDs) { synsets.add(net.dict.getWord(wID).getSynset()); }
210 | //
211 | // for (ISynset syn : synsets) {
212 | // System.out.println(syn.getGloss());
213 | // System.out.println("Words in this synset:");
214 | // for (IWord w : syn.getWords()) {
215 | // System.out.println(w.getLemma());
216 | // }
217 | // }
218 | //
219 | // ISynset hyper = net.dict.getSynset(synsets.get(0).getRelatedSynsets(Pointer.HYPERNYM).get(0));
220 | // System.out.println(hyper.getGloss());;
221 | // System.out.println(hyper.getWords().get(0).getLemma());
222 |
223 | }
224 |
225 | }
226 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SyntacticEvaluator.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.List;
4 |
5 | public class SyntacticEvaluator {
6 |
7 | int numOfInvalid;
8 |
9 | public SyntacticEvaluator() {
10 | numOfInvalid = 0;
11 | }
12 |
13 | /**
14 | * a root is invalid if:
15 | * it has no child;
16 | * it has only one child and this child is not SN;
17 | * it has more than one child and other than the first child is not ON.
18 | * @param node
19 | * @return
20 | */
21 | private static int checkROOT(Node node){
22 | int numOfInvalid = 0;
23 | List children = node.getChildren();
24 | int sizeOfChildren = children.size();
25 |
26 | if (sizeOfChildren == 0){
27 | numOfInvalid++;
28 | node.isInvalid = true;
29 | }
30 | else if (sizeOfChildren == 1 && !children.get(0).getInfo().getType().equals("SN")){
31 | numOfInvalid++;
32 | node.isInvalid = true;
33 | }
34 | else if (sizeOfChildren > 1){
35 | if (!children.get(0).getInfo().getType().equals("SN")){
36 | numOfInvalid++;
37 | node.isInvalid = true;
38 | }
39 | else {
40 | for (int j = 1; j < sizeOfChildren; j++){
41 | if (!children.get(j).getInfo().getType().equals("ON")){
42 | numOfInvalid++;
43 | node.isInvalid = true;
44 | }
45 | }
46 | }
47 | }
48 | return numOfInvalid;
49 | }
50 |
51 | /**
52 | * a SN is not valid if:
53 | * it has more than 1 child;
54 | * it has 1 child but this child is not GNP (FN or NN).
55 | * @param node
56 | * @return
57 | */
58 | private static int checkSN(Node node){
59 | int numOfInvalid = 0;
60 | List children = node.getChildren();
61 | int sizeOfChildren = children.size();
62 |
63 | //SN can only have one child from FN or NN
64 | if (sizeOfChildren != 1){
65 | numOfInvalid++;
66 | node.isInvalid = true;
67 | }
68 | else{
69 | String childType = children.get(0).getInfo().getType();
70 | if (!(childType.equals("NN") || childType.equals("FN"))){
71 | numOfInvalid++;
72 | node.isInvalid = true;
73 | }
74 | }
75 |
76 | return numOfInvalid;
77 | }
78 |
79 | /**
80 | * a ON is invalid if:
81 | * (1) in ComplexCondition (its parent is ROOT):
82 | * its number of children is not 2 (left & right subtrees);
83 | * it has 2 children, but first one is not GNP, or second one is not GNP/VN/FN.
84 | * (2) in Condition (its parent is NN):
85 | * its number of children is not 1;
86 | * it has 1 child, but the child is not VN.
87 | * @param node
88 | * @return
89 | */
90 | private static int checkON(Node node){
91 | int numOfInvalid = 0;
92 | String parentType = node.getParent().getInfo().getType();
93 | List children = node.getChildren();
94 | int sizeOfChildren = children.size();
95 |
96 | if (parentType.equals("ROOT")){
97 | if (sizeOfChildren != 2){
98 | numOfInvalid++;
99 | node.isInvalid = true;
100 | }
101 | else{
102 | for (int j = 0; j children = node.getChildren();
146 | int sizeOfChildren = children.size();
147 |
148 | //NP=NN+NN*Condition. Second NN has no child.
149 | if (parentType.equals("NN")){
150 | if (sizeOfChildren != 0){ //this rule is different from figure 7 (a), but I think this makes sense
151 | numOfInvalid++;
152 | node.isInvalid = true;
153 | }
154 | }
155 | //SN+GNP, or ON+GNP, or FN+GNP. and GNP=NP=NN+NN*Condition. First NN can have any number of children from NN,ON,VN.
156 | else if (parentType.equals("SN") || parentType.equals("FN") || parentType.equals("ON")){
157 | if (sizeOfChildren != 0){
158 | for (int j = 0; j < sizeOfChildren; j++){
159 | String childType = children.get(j).getInfo().getType();
160 | if (!(childType.equals("NN") || childType.equals("VN") || childType.equals("ON"))){
161 | numOfInvalid++;
162 | node.isInvalid = true;
163 | break;
164 | }
165 | }
166 | }
167 | }
168 |
169 | return numOfInvalid;
170 | }
171 |
172 | /**
173 | * a VN is invalid if:
174 | * it has children.
175 | * @param node
176 | * @return
177 | */
178 | private static int checkVN(Node node){
179 | int numOfInvalid = 0;
180 | //String parentType = node.getParent().getInfo().getType();
181 | List children = node.getChildren();
182 | int sizeOfChildren = children.size();
183 |
184 | if (sizeOfChildren != 0){ //VN cannot have children
185 | numOfInvalid++;
186 | node.isInvalid = true;
187 | }
188 | /*
189 | else if (!(parentType.equals("ON") || parentType.equals("NN"))){ //VN can only be child of ON and NN
190 | numOfInvalid++;
191 | node.isInvalid = true;
192 | }
193 | */
194 | return numOfInvalid;
195 | }
196 |
197 | /**
198 | * a FN is valid if:
199 | * ON+FN, or ON+GNP, or SN+GNP, or FN+GNP. and GNP=FN+GNP,
200 | * FN can be child of ON, without children or only 1 child of NN or FN,
201 | * FN can be child of SN, with only 1 child of NN or FN,
202 | * FN can be child of FN, with only 1 child of NN or FN.
203 | * @param node
204 | * @return
205 | */
206 | private static int checkFN(Node node){
207 | int numOfInvalid = 0;
208 | String parentType = node.getParent().getInfo().getType();
209 | List children = node.getChildren();
210 | int sizeOfChildren = children.size();
211 |
212 | if (sizeOfChildren == 0){
213 | if (!parentType.equals("ON")){
214 | numOfInvalid++;
215 | node.isInvalid = true;
216 | }
217 | }
218 | else if (sizeOfChildren == 1){
219 | String childType = children.get(0).getInfo().getType();
220 | if (!(parentType.equals("ON") || parentType.equals("SN") /*|| parentType.equals("FN")*/)){
221 | numOfInvalid++;
222 | node.isInvalid = true;
223 | }
224 | else if (!(childType.equals("NN") /*|| childType.equals("FN")*/)){
225 | numOfInvalid++;
226 | node.isInvalid = true;
227 | }
228 | }
229 | else{
230 | numOfInvalid++;
231 | node.isInvalid = true;
232 | }
233 |
234 | return numOfInvalid;
235 | }
236 |
237 | /**
238 | * Number of invalid tree nodes according to the grammar:
239 | * Q -> (SClause)(ComplexCindition)*
240 | * SClause -> SELECT + GNP
241 | * ComplexCondition -> ON + (LeftSubTree*RightSubTree)
242 | * LeftSubTree -> GNP
243 | * RightSubTree -> GNP | VN | FN
244 | * GNP -> (FN + GNP) | NP
245 | * NP -> NN + (NN)*(Condition)*
246 | * Condition -> VN | (ON + VN)
247 | *
248 | * +: parent-child relationship
249 | * *: sibling relationship
250 | * |: or
251 | *
252 | * Basic rule: Check invalidity only considering its children
253 | * @param T
254 | * @return
255 | */
256 | public static int numberOfInvalidNodes (ParseTree T){
257 | int numOfInvalid = 0; //number of invalid tree nodes
258 | for (Node curNode : T) {
259 | String curType = curNode.getInfo().getType();
260 | if (curType.equals("ROOT")){ //ROOT
261 | numOfInvalid = numOfInvalid + checkROOT(curNode);
262 | }
263 | if (curType.equals("SN")){ // select node
264 | numOfInvalid = numOfInvalid + checkSN(curNode);
265 | }
266 | else if (curType.equals("ON")){ //operator node
267 | numOfInvalid = numOfInvalid + checkON(curNode);
268 | }
269 | else if (curType.equals("NN")){ //name node
270 | numOfInvalid = numOfInvalid + checkNN(curNode);
271 | }
272 | else if (curType.equals("VN")){ //value node
273 | numOfInvalid = numOfInvalid + checkVN(curNode);
274 | }
275 | else if (curType.equals("FN")){ //function nodes
276 | numOfInvalid = numOfInvalid + checkFN(curNode);
277 | }
278 | }
279 | return numOfInvalid;
280 | }
281 |
282 | }
283 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SchemaGraph.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.sql.Connection;
4 | import java.sql.DatabaseMetaData;
5 | import java.sql.DriverManager;
6 | import java.sql.ResultSet;
7 | import java.sql.SQLException;
8 | import java.sql.Statement;
9 | import java.util.ArrayList;
10 | import java.util.HashMap;
11 | import java.util.HashSet;
12 | import java.util.LinkedList;
13 | import java.util.List;
14 | import java.util.Map;
15 | import java.util.Set;
16 |
17 |
18 | public class SchemaGraph {
19 |
20 | /**
21 | * table name, column name, column type
22 | */
23 | private Map> tables;
24 | //table name, column name, column values
25 | private Map>> tableRows;
26 |
27 | /**
28 | * table name, primary key (set of column names).
29 | * Two tables are connected only if pubkey of table1 is a
30 | * column of table2, but NOT the pubkey of table2. Graph no direction.
31 | */
32 | private Map> keys;
33 |
34 | /**
35 | * table1Name, table2Name
36 | */
37 | private Map> connectivity;
38 |
39 | /**
40 | * Construct a schemaGraph from database meta data.
41 | * @see document of getTables
44 | * @param meta
45 | * @throws SQLException
46 | */
47 | public SchemaGraph(Connection c) throws SQLException {
48 | System.out.println("Retrieving schema graph...");
49 | DatabaseMetaData meta = c.getMetaData();
50 | tables = new HashMap<>();
51 | tableRows = new HashMap<>();
52 | String[] types = {"TABLE"};
53 | ResultSet rsTable = meta.getTables(null, null, "%", types);
54 |
55 |
56 | Statement stmt = c.createStatement();
57 | while (rsTable.next()) {
58 | String tableName = rsTable.getString("TABLE_NAME");
59 | tables.put(tableName, new HashMap<>());
60 | tableRows.put(tableName, new HashMap<>());
61 |
62 | Map table = tables.get(tableName);
63 | Map> tableRow = tableRows.get(tableName);
64 |
65 | ResultSet rsColumn = meta.getColumns(null, null, tableName, null);
66 | while (rsColumn.next()){
67 | /*retrieve column info for each table, insert into tables*/
68 | String columnName = rsColumn.getString("COLUMN_NAME");
69 | String columnType = rsColumn.getString("TYPE_NAME");
70 | table.put(columnName, columnType);
71 | /*draw random sample of size 10000 from each table, insert into tableRows*/
72 | String query = "SELECT " + columnName + " FROM " + tableName + " ORDER BY RANDOM() LIMIT 2000;";
73 | ResultSet rows = stmt.executeQuery(query);
74 | tableRow.put(columnName, new HashSet());
75 | Set columnValues = tableRow.get(columnName);
76 | while (rows.next()){
77 | String columnValue = rows.getString(1);
78 | //testing if the last column read has a SQL NULL
79 | if (!rows.wasNull())
80 | columnValues.add(columnValue);
81 | }
82 | }
83 | }
84 | if (stmt != null) { stmt.close(); }
85 | readPrimaryKeys(meta);
86 | findConnectivity();
87 | System.out.println("Schema graph retrieved.");
88 | }
89 |
90 | private void readPrimaryKeys(DatabaseMetaData meta) throws SQLException {
91 | keys = new HashMap<>();
92 | for (String tableName : tables.keySet()) {
93 | ResultSet rsPrimaryKey = meta.getPrimaryKeys(null, null, tableName);
94 | keys.put(tableName, new HashSet());
95 | while (rsPrimaryKey.next()) {
96 | keys.get(tableName).add(rsPrimaryKey.getString("COLUMN_NAME"));
97 | }
98 | }
99 | // System.out.println(keys);
100 | }
101 |
102 | private void findConnectivity() {
103 | connectivity = new HashMap>();
104 | for (String tableName : tables.keySet()) {
105 | connectivity.put(tableName, new HashSet());
106 | }
107 | for (String table1 : tables.keySet()) {
108 | for (String table2 : tables.keySet()) {
109 | if (table1.equals(table2)) { continue; }
110 | if (!getJoinKeys(table1, table2).isEmpty()) {
111 | connectivity.get(table1).add(table2);
112 | connectivity.get(table2).add(table1);
113 | }
114 | }
115 | }
116 | }
117 |
118 | public Set getJoinKeys(String table1, String table2) {
119 | Set table1Keys = keys.get(table1);
120 | Set table2Keys = keys.get(table2);
121 | if (table1Keys.equals(table2Keys)) { return new HashSet(); }
122 | boolean keys1ContainedIn2 = true;
123 | for (String table1Key : table1Keys) {
124 | if (!tables.get(table2).containsKey(table1Key)) {
125 | keys1ContainedIn2 = false;
126 | break;
127 | }
128 | }
129 | if (keys1ContainedIn2) { return new HashSet(table1Keys); }
130 |
131 | boolean keys2ContainedIn1 = true;
132 | for (String table2Key : table2Keys) {
133 | if (!tables.get(table1).containsKey(table2Key)) {
134 | keys2ContainedIn1 = false;
135 | break;
136 | }
137 | }
138 | if (keys2ContainedIn1) { return new HashSet(table2Keys); }
139 |
140 | return new HashSet();
141 | }
142 |
143 | /**
144 | * Return a list of String as join path in the form of:
145 | * table1 table3 table2
146 | * Shortest join path is found using BFS.
147 | * The join keys can be found using {@link #getJoinKeys(String, String)}
148 | * @param table1
149 | * @param table2
150 | * @return
151 | */
152 | public List getJoinPath(String table1, String table2) {
153 | if (!tables.containsKey(table1) || !tables.containsKey(table2)) {
154 | return new ArrayList();
155 | }
156 | // Assume table1 and table2 are different.
157 | // Find shortest path using BFS.
158 | HashMap visited = new HashMap<>();
159 | for (String tableName : tables.keySet()) {
160 | visited.put(tableName, false);
161 | }
162 | HashMap prev = new HashMap<>(); // the parent tableName
163 | LinkedList queue = new LinkedList<>();
164 | queue.addLast(table1);
165 | visited.put(table1, true);
166 | boolean found = false;
167 | while (!queue.isEmpty() && !found) {
168 | String tableCurr = queue.removeFirst();
169 | for (String tableNext : connectivity.get(tableCurr)) {
170 | if (!visited.get(tableNext)) {
171 | visited.put(tableNext, true);
172 | queue.addLast(tableNext);
173 | prev.put(tableNext, tableCurr);
174 | }
175 | if (tableNext.equals(table2)) { found = true; }
176 | }
177 | }
178 |
179 | LinkedList path = new LinkedList<>();
180 | if (visited.get(table2)) {
181 | String tableEnd = table2;
182 | path.push(tableEnd);
183 | while (prev.containsKey(tableEnd)) {
184 | tableEnd = prev.get(tableEnd);
185 | path.push(tableEnd);
186 | }
187 | }
188 | return path;
189 | }
190 |
191 | public Set getTableNames() {
192 | return tables.keySet();
193 | }
194 |
195 | public Set getColumns(String tableName) {
196 | return tables.get(tableName).keySet();
197 | }
198 |
199 | public Set getValues(String tableName, String columnName){
200 | return tableRows.get(tableName).get(columnName);
201 | }
202 |
203 | @Override
204 | public String toString() {
205 | String s = "";
206 | for (String tableName : tables.keySet()) {
207 | s += "table: "+tableName+"\n";
208 | s += "{";
209 | Map columns = tables.get(tableName);
210 | for (String colName : columns.keySet()) {
211 | s += colName+": "+columns.get(colName)+"\t";
212 | }
213 | s += "}\n\n";
214 | }
215 | return s;
216 | }
217 |
218 | public static void main(String[] args) throws Exception {
219 | Connection connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser");
220 | SchemaGraph schema = new SchemaGraph(connection);
221 | System.out.println("The join path between article and authorship:");
222 | System.out.println(schema.getJoinPath("article", "authorship"));
223 | System.out.println("The join path between authorship and article:");
224 | System.out.println(schema.getJoinPath("authorship", "article"));
225 | System.out.println("The join path between inproceedings and authorship:");
226 | System.out.println(schema.getJoinPath("inproceedings", "authorship"));
227 | System.out.println("The join path between article and inproceedings:");
228 | System.out.println(schema.getJoinPath("article", "inproceedings"));
229 | System.out.println("----------------------------------------------");
230 | System.out.println("The join keys between article and authorship:");
231 | System.out.println(schema.getJoinKeys("article", "authorship"));
232 | System.out.println("The join keys between article and inproceedings:");
233 | System.out.println(schema.getJoinKeys("article", "inproceedings"));
234 | System.out.println("The join keys between inproceedings and authorship:");
235 | System.out.println(schema.getJoinKeys("inproceedings", "authorship"));
236 | }
237 | }
238 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustorTest.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.util.Collections;
4 | import java.util.List;
5 |
6 | public class TreeAdjustorTest {
7 | public static void numberOfInvalidNodesTest(){
8 | //construct a tree in the paper,
9 | //current test case is Figure 3 (a), output should be 3 (node 6 should not be invalid)
10 | ParseTree T = new ParseTree();
11 | Node[] nodes = new Node[9];
12 |
13 | nodes[0] = new Node(0, "ROOT", "--");
14 | nodes[0].info = new NodeInfo("ROOT","ROOT");
15 | nodes[1] = new Node(1, "return", "--");
16 | nodes[1].info = new NodeInfo("SN","SELECT");
17 | nodes[2] = new Node(2, "author", "--");
18 | nodes[2].info = new NodeInfo("NN", "Author");
19 | nodes[3] = new Node(3, "paper", "--");
20 | nodes[3].info = new NodeInfo("NN", ">");
21 | nodes[4] = new Node(4, "more", "--");
22 | nodes[4].info = new NodeInfo("ON", "Title");
23 | nodes[5] = new Node(5, "Bob", "--");
24 | nodes[5].info = new NodeInfo("VN", "Author");
25 | nodes[6] = new Node(6, "VLDB", "--");
26 | nodes[6].info = new NodeInfo("VN", "Journal");
27 | nodes[7] = new Node(7, "after", "--");
28 | nodes[7].info = new NodeInfo("ON", ">");
29 | nodes[8] = new Node(8, "2000", "--");
30 | nodes[8].info = new NodeInfo("VN", "Year");
31 |
32 | T.root = nodes[0];
33 | nodes[0].children.add(nodes[1]);
34 | nodes[1].parent = nodes[0];
35 | nodes[1].children.add(nodes[2]);
36 | nodes[2].parent = nodes[1];
37 | nodes[2].children.add(nodes[3]);
38 | nodes[3].parent = nodes[2];
39 | nodes[2].children.add(nodes[5]);
40 | nodes[5].parent = nodes[2];
41 | nodes[2].children.add(nodes[7]);
42 | nodes[7].parent = nodes[2];
43 | nodes[3].children.add(nodes[4]);
44 | nodes[4].parent = nodes[3];
45 | nodes[5].children.add(nodes[6]);
46 | nodes[6].parent = nodes[5];
47 | nodes[7].children.add(nodes[8]);
48 | nodes[8].parent = nodes[7];
49 |
50 | System.out.println("===========test for Running SyntacticEvaluator.numberOfInvalidNodes===========");
51 | System.out.println("Input tree: "+T.toString());
52 | System.out.println("Number of Invalid nodes: "+SyntacticEvaluator.numberOfInvalidNodes(T)+"\n");
53 | System.out.println("Invalid nodes: ");
54 | for (int i = 1; i < nodes.length; i++){
55 | if (nodes[i].isInvalid)
56 | System.out.println(nodes[i]);
57 | }
58 | }
59 |
60 | public static void mergeLNQNTest() {
61 | ParseTree T = new ParseTree();
62 | Node[] nodes = new Node[9];
63 |
64 | nodes[0] = new Node(0, "ROOT", "--");
65 | nodes[0].info = new NodeInfo("ROOT","ROOT");
66 | nodes[1] = new Node(1, "return", "--");
67 | nodes[1].info = new NodeInfo("SN","SELECT");
68 | nodes[2] = new Node(2, "conference", "--");
69 | nodes[2].info = new NodeInfo("NN", "Author");
70 | nodes[3] = new Node(3, "area", "--");
71 | nodes[3].info = new NodeInfo("NN", "Title");
72 | nodes[4] = new Node(4, "each", "--");
73 | nodes[4].info = new NodeInfo("QN", ">");
74 | nodes[5] = new Node(5, "papers", "--");
75 | nodes[5].info = new NodeInfo("NN", "Author");
76 | nodes[6] = new Node(6, "citations", "--");
77 | nodes[6].info = new NodeInfo("NN", "Journal");
78 | nodes[7] = new Node(7, "most", "--");
79 | nodes[7].info = new NodeInfo("FN", ">");
80 | nodes[8] = new Node(8, "total", "--");
81 | nodes[8].info = new NodeInfo("FN", "Year");
82 |
83 | T.root = nodes[0];
84 | nodes[0].children.add(nodes[1]);
85 | nodes[1].parent = nodes[0];
86 | nodes[1].children.add(nodes[2]);
87 | nodes[2].parent = nodes[1];
88 | nodes[2].children.add(nodes[3]);
89 | nodes[3].parent = nodes[2];
90 | nodes[2].children.add(nodes[5]);
91 | nodes[5].parent = nodes[2];
92 | nodes[3].children.add(nodes[4]);
93 | nodes[4].parent = nodes[3];
94 | nodes[5].children.add(nodes[6]);
95 | nodes[6].parent = nodes[5];
96 | nodes[6].children.add(nodes[7]);
97 | nodes[7].parent = nodes[6];
98 | nodes[6].children.add(nodes[8]);
99 | nodes[8].parent = nodes[6];
100 |
101 | System.out.println("===========test for Running mergeLNQN===========");
102 | System.out.println("Input tree: "+T.toString());
103 | ParseTree tree = T.mergeLNQN();
104 | System.out.println("Output tree: "+tree.toString());
105 | }
106 |
107 | public static void adjustTest(){
108 | ParseTree T = new ParseTree();
109 | Node[] nodes = new Node[9];
110 |
111 | nodes[0] = new Node(0, "ROOT", "--");
112 | nodes[0].info = new NodeInfo("ROOT","ROOT");
113 | nodes[1] = new Node(1, "return", "--");
114 | nodes[1].info = new NodeInfo("SN","SELECT");
115 | nodes[2] = new Node(2, "conference", "--");
116 | nodes[2].info = new NodeInfo("NN", "Author");
117 | nodes[3] = new Node(3, "area", "--");
118 | nodes[3].info = new NodeInfo("NN", "Title");
119 | nodes[4] = new Node(4, "each", "--");
120 | nodes[4].info = new NodeInfo("QN", ">");
121 | nodes[5] = new Node(5, "papers", "--");
122 | nodes[5].info = new NodeInfo("NN", "Author");
123 | nodes[6] = new Node(6, "citations", "--");
124 | nodes[6].info = new NodeInfo("NN", "Journal");
125 | nodes[7] = new Node(7, "most", "--");
126 | nodes[7].info = new NodeInfo("FN", ">");
127 | nodes[8] = new Node(8, "total", "--");
128 | nodes[8].info = new NodeInfo("FN", "Year");
129 |
130 | T.root = nodes[0];
131 | nodes[0].children.add(nodes[1]);
132 | nodes[1].parent = nodes[0];
133 | nodes[1].children.add(nodes[2]);
134 | nodes[2].parent = nodes[1];
135 | nodes[2].children.add(nodes[3]);
136 | nodes[3].parent = nodes[2];
137 | nodes[2].children.add(nodes[5]);
138 | nodes[5].parent = nodes[2];
139 | nodes[3].children.add(nodes[4]);
140 | nodes[4].parent = nodes[3];
141 | nodes[5].children.add(nodes[6]);
142 | nodes[6].parent = nodes[5];
143 | nodes[6].children.add(nodes[7]);
144 | nodes[7].parent = nodes[6];
145 | nodes[6].children.add(nodes[8]);
146 | nodes[8].parent = nodes[6];
147 |
148 | System.out.println("===========test for Running adjust() in TreeAdjustor===========");
149 | System.out.println("Input tree: "+T.toString());
150 | List treeList = TreeAdjustor.adjust(T);
151 | System.out.println("Output size: "+treeList.size());
152 | System.out.println("Output trees:");
153 | for (int j = 0; j < treeList.size(); j++){
154 | System.out.println("Tree "+j+" :");
155 | System.out.println(treeList.get(j));
156 | }
157 | }
158 |
159 | public static void getAdjustedTreesTest(){
160 | ParseTree T = new ParseTree();
161 | Node[] nodes = new Node[8];
162 |
163 | nodes[0] = new Node(0, "ROOT", "--");
164 | nodes[0].info = new NodeInfo("ROOT","ROOT");
165 | nodes[1] = new Node(1, "return", "--");
166 | nodes[1].info = new NodeInfo("SN","SELECT");
167 | nodes[2] = new Node(2, "conference", "--");
168 | nodes[2].info = new NodeInfo("NN", "Author");
169 | nodes[3] = new Node(3, "area", "--");
170 | nodes[3].info = new NodeInfo("NN", "Title");
171 | nodes[4] = new Node(4, "papers", "--");
172 | nodes[4].info = new NodeInfo("NN", "Author");
173 | nodes[5] = new Node(5, "citations", "--");
174 | nodes[5].info = new NodeInfo("NN", "Journal");
175 | nodes[6] = new Node(6, "most", "--");
176 | nodes[6].info = new NodeInfo("FN", ">");
177 | nodes[7] = new Node(7, "total", "--");
178 | nodes[7].info = new NodeInfo("FN", "Year");
179 |
180 | T.root = nodes[0];
181 | nodes[0].children.add(nodes[1]);
182 | nodes[1].parent = nodes[0];
183 | nodes[1].children.add(nodes[2]);
184 | nodes[2].parent = nodes[1];
185 | nodes[2].children.add(nodes[3]);
186 | nodes[3].parent = nodes[2];
187 | nodes[2].children.add(nodes[4]);
188 | nodes[4].parent = nodes[2];
189 | nodes[4].children.add(nodes[5]);
190 | nodes[5].parent = nodes[4];
191 | nodes[5].children.add(nodes[6]);
192 | nodes[6].parent = nodes[5];
193 | nodes[5].children.add(nodes[7]);
194 | nodes[7].parent = nodes[5];
195 |
196 | System.out.println("===========test for Running getAdjustedTrees() in TreeAdjustor===========");
197 | System.out.println("The original tree:");
198 | System.out.println(T);
199 | System.out.println("Number of possible trees for choice:");
200 | List result = TreeAdjustor.getAdjustedTrees(T);
201 | System.out.println(result.size());
202 | Collections.sort(result, (t1, t2) -> (- t1.getScore() + t2.getScore()));
203 | System.out.println("The three trees with highest scores look like:");
204 | for (int i = 0; i < 5; i++) {
205 | System.out.println(result.get(i));
206 | }
207 | }
208 |
209 | public static void testAddON (){
210 | ParseTree T = new ParseTree();
211 | Node[] nodes = new Node[8];
212 |
213 | nodes[0] = new Node(0, "ROOT", "--");
214 | nodes[0].info = new NodeInfo("ROOT","ROOT");
215 | nodes[1] = new Node(1, "return", "--");
216 | nodes[1].info = new NodeInfo("SN","SELECT");
217 | nodes[2] = new Node(2, "conference", "--");
218 | nodes[2].info = new NodeInfo("NN", "Author");
219 | nodes[3] = new Node(3, "area", "--");
220 | nodes[3].info = new NodeInfo("NN", "Title");
221 | nodes[4] = new Node(4, "papers", "--");
222 | nodes[4].info = new NodeInfo("NN", "Author");
223 | nodes[5] = new Node(5, "citations", "--");
224 | nodes[5].info = new NodeInfo("NN", "Journal");
225 | nodes[6] = new Node(6, "most", "--");
226 | nodes[6].info = new NodeInfo("FN", ">");
227 | nodes[7] = new Node(7, "total", "--");
228 | nodes[7].info = new NodeInfo("FN", "Year");
229 |
230 | T.root = nodes[0];
231 | nodes[0].children.add(nodes[1]);
232 | nodes[1].parent = nodes[0];
233 | nodes[1].children.add(nodes[2]);
234 | nodes[2].parent = nodes[1];
235 | nodes[2].children.add(nodes[3]);
236 | nodes[3].parent = nodes[2];
237 | nodes[2].children.add(nodes[4]);
238 | nodes[4].parent = nodes[2];
239 | nodes[4].children.add(nodes[5]);
240 | nodes[5].parent = nodes[4];
241 | nodes[5].children.add(nodes[6]);
242 | nodes[6].parent = nodes[5];
243 | nodes[5].children.add(nodes[7]);
244 | nodes[7].parent = nodes[5];
245 |
246 | System.out.println("===========test for Running addON() in ParseTree===========");
247 | System.out.println("The original tree:");
248 | System.out.println(T);
249 | ParseTree tree = T.addON();
250 | System.out.println("After adding ON:");
251 | System.out.println(tree);
252 | System.out.println("The original tree:");
253 | System.out.println(T);
254 | }
255 |
256 | public static void main(String[] args) {
257 | // numberOfInvalidNodesTest();
258 | // mergeLNQNTest();
259 | // adjustTest();
260 | getAdjustedTreesTest();
261 | // testAddON();
262 | }
263 |
264 | }
265 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, com.dukenlidb.nlidb.service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/doc/report/midterm/midterm.tex:
--------------------------------------------------------------------------------
1 | \documentclass[twocolumn]{article}
2 |
3 | % Feel free to add more packages
4 | \usepackage{float, amsmath, amssymb, mathtools}
5 | \usepackage{graphicx, caption, color}
6 | \usepackage{tabularx, fullpage}
7 | %\usepackage{kotex}
8 | %\usepackage{multicol}
9 | \setlength{\columnsep}{1cm}
10 | \usepackage{comment, cite, wrapfig}
11 | \usepackage[utf8]{inputenc}
12 | \usepackage[hidelinks]{hyperref}
13 | \usepackage{courier}
14 | %\usepackage{geometry}
15 | \hypersetup{breaklinks=true}
16 | \urlstyle{same}
17 |
18 | \newcommand{\red}[1]{{\bf \color{red}#1}}
19 | \newcommand{\blue}[1]{{\bf \color{blue}#1}}
20 | \newcommand{\cut}[1]{}
21 |
22 |
23 | \begin{document}
24 |
25 | \title{Natural Language Interface for Relational Database\\
26 | \small{Midterm Report}}
27 |
28 | %Authors in alphabetical order of last names
29 | \author{Yilin Gao \\
30 | \small \texttt{yilin.gao@duke.edu} \and
31 | Keping Wang \\
32 | \small \texttt{kw238@duke.edu} \and
33 | Chengkang Xu \\
34 | \small \texttt{cx33@duke.edu} }
35 |
36 | \date{\today}
37 | \maketitle
38 |
39 | %%%================================================================%%%
40 | \section{Introduction}\label{sec:introduction}
41 |
42 | Writing SQL queries can be difficult, especially when it involves complex logic. As more and more non-expert users are accessing relational databases, it is very important to simplify their process of writing SQL queries. This project is going to build a Natural Language Interface for relational DataBases (NLIDB), closely following Li and Jagadish (2014)\cite{li2014}. NLIDB will be a tool for everyone to query data easily from relational databases.
43 |
44 | Translating natural language into an SQl query isn't an easy job. Not only because of the disambiguity of natural language, but also that users may make mistakes in writing natural language input, such as mis-spelling. We want the users to feel at ease using our interface, not afraid of being mis-interpreted by the NLIDB, even if they cannot remember the exact names of the database column names. So we follow Li and Jagadish (2014)\cite{li2014} to use an interactive interface to let user make choices in several ambiguous phases of the translation.
45 |
46 | The com.dukenlidb.nlidb.main components for translating a natural language to an SQL query are as follows:
47 |
48 | \begin{enumerate}
49 | \item Parse the natural language input into a parse tree using dependency syntax parser.
50 | \item Map the nodes in the parse tree to SQL keywords, table names, column names, and values. Here users may choose the desired mapping from ranked options.
51 | \item Adjust the structure of the parse tree to make it follow the structure of an SQL query. Here users may choose the desired structure from ranked options.
52 | \item Translate the parse tree to an SQL query.
53 | \end{enumerate}
54 |
55 | Up until this midterm report, we have completed steps 1 - 2 above. We have built an interactive graphical user interface (GUI), and established connection with a database to experiment with the two steps. Now the user can already choose the desired node mappings from the choices offered by our application.
56 |
57 | Before the final report, we will finish steps 3 - 4 and tune the com.dukenlidb.nlidb.model with some hand-written natural language and SQL query pairs.
58 |
59 | %%%================================================================%%%
60 | \section{Related Work}
61 |
62 | Early day NLIDB systems were usually based on small scale database, which requires a small set of supported queries. Their parsing mechanism could only support ad-hoc methods and rules. Thus, early work would produce ambiguity if the database is scalable and natural language queries are "open-domain". Moreover, without the help of machine learning, early NLIDB systems cannot update their parsing methods as they accumulate more data.\cite{QATutorial}
63 |
64 | Our approach involves machine learning in parsing the natural language input into a parse tree. Then we adjust the structure of the parse tree to obey with the SQL syntax. Our approach can handle natural language input with more complicated structures than the simple key-word matching method.
65 |
66 | NLIDB here is a concrete application of the natural language QA systems.\cite{QATutorial} Currently, the mainstream approach for QA is the semantic parsing of questions. It can map natural language questions to logic forms or structured queries, and produce accurate answers when the query is complete and clear. However, the accuracy of answers will decrease if the input language is ambiguous, or if the logic relationship of the query is complicated. Due to our lack of training data, our NLIDB system cannot adopt the popular RNN (LSTM) for a direct and efficient translation. Still we are trying to allow more input ambiguity and structural complexity by letting the users choose the mappings and structures interactively.
67 |
68 | %%%================================================================%%%
69 | \section{Problem Definition}
70 |
71 | For this NLIDB, we have to first develop a GUI, and then design the \texttt{ParseTree} class. Then we need to develop parse tree node mapper, parse tree structure adjuster, and SQL query translator.
72 |
73 | There are three com.dukenlidb.nlidb.main problems that we face. The first one is the use of data structure in \texttt{ParseTree}. The second is what algorithms to use for each phase of the translation. The last problem is to specify the rules for different phases, such as what word should be mapped to the ``SELECT'' key word, and what rules should a legal parse tree follow before being translated to an SQL query.
74 |
75 | %%%================================================================%%%
76 | \section{Algorithms}
77 |
78 | In the natural language parsing phase, we use the feature based pos-tagger\cite{toutanova2003feature} and the neural network dependency parser\cite{chen2014fast} from the Stanford NLP package.
79 |
80 | In the node mapping phase, other than mapping words with hard coded rules, we compare words with table and column names in the database, which requires a word similarity score. The similarity score is the maximum of two subscores. The first is lexical similarity (similarity in spelling), which is Jaccord coefficient here. The second is semantic similarity, for which we use WUP similarity\cite{wu1994verbs}. To compute WUP similarity, we have to do a breadth-first-search to find the lowest common ancestor of two words in the WordNet. The calculation of word similarity will be explained in detail in the next section.
81 |
82 | More algorithms will be needed for parse tree adjustment and SQL query generation in the future.
83 |
84 | %%%================================================================%%%
85 | \section{System Design}
86 |
87 | \begin{figure*}[ht]
88 | \centering
89 | \includegraphics[width=0.8\linewidth]{figures/nlidb_system_diagram.pdf}
90 | \caption{System Diagram}
91 | \end{figure*}
92 |
93 | Our current system is implemented in Java, using maven as the project management tool. The source code are divided into three parts: com.dukenlidb.nlidb.model, control, and view. The com.dukenlidb.nlidb.model part takes care of how to realize major functions of the natural language database interface, like parsing natural language, mapping nodes, adjusting node tree structure, and translating the tree into SQL query. The controller wraps many models as attribute variables, and it takes charge of the interaction between database and the view (GUI). And the view part uses JavaFX to design a GUI.
94 |
95 | Figure 1 is a diagram of our system. The boxes with solid frame lines are the ones we've already written, and the boxes with dashed frame lines are to be completed in the future.
96 |
97 | Below we’ll introduce the design ideas on two steps that we’ve completed: parsing natural language into a parse tree, and mapping the nodes of the parse tree to SQL components.
98 |
99 | \subsection{Natural Language Parser}
100 | We write the NLParser class to parse natural language from the user input in GUI to a dependency parse tree. The NLParser is just a wrapper of the Standford NLP pos-tagger and dependency syntax parser. A natural language sentence is first tagged with part-of-speech labels, and then parsed with dependency parser to a ParseTree.
101 |
102 | A ParseTree consists of an array of Nodes. Each Node has information about the natural language word and its corresponding SQL component. A Node also contains parent and children links pointing to other Nodes in the ParseTree.
103 |
104 | \subsection{Node Mapper}
105 | Then we map each of the Node into an SQL component. We iterate over the tree and map each Node according to a certain Node Type in Figure 2, according to predefined rules. There are 7 node types in total, and 5 of them, SN, ON, FN, QN, and LN have hard-coded mapping rules. For example, map word “return” or “Return” to an “SN” node with value “SELECT”. A word will first be searched against these five Node Types. If there is no match, the search will go on to the remaining two types, NN and VN.
106 |
107 | \begin{figure}[ht]
108 | \centering
109 | \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_rules.png}
110 | \caption[caption for nodes mapping rules]{Nodes Mapping Rules\protect\footnotemark}
111 | \end{figure}
112 | \footnotetext{Taken from \cite{li2014}.}
113 |
114 | The remaining two types, Name Node and Value Node, are decided by searching over the database for matching names or values. The matching of word to names or values are decided by the word similarity score of two words.The word similarity score here is the maximum of semantic similarity and lexical similarity.
115 |
116 | Semantic similarity is the WUP similarity\cite{wu1994verbs} function using WordNet. WordNet is a net of synonym sets (synsets) connected with semantic and lexical pointers. Two most important semantic pointers are hypernym and hyponym, which connect the synsets to the tree that we are interested here, as Figure 3. In Figure 3, the WUP similarity between $C1$ and $C2$ is:
117 |
118 | $$ Sim_{WUP} = \frac{2*N3}{N1+N2+2*N3} $$
119 |
120 | \begin{figure}[ht]
121 | \centering
122 | \includegraphics[width=0.7\linewidth]{figures/wordnet_tree.png}
123 | \caption[caption for wordnet tree]{WUP word similarity.\protect\footnotemark }
124 | \end{figure}
125 | \footnotetext{Taken from \cite{wu1994verbs}.}
126 |
127 | One thing to note about WordNet is that each word can be in multiple synsets, and each synset can have multiple parents, so we use breadth-first-search to find the lowest of all possible common parents of two words.
128 |
129 |
130 | For lexical similarity between two words, we use the Jaccard coefficient:
131 |
132 | $$ J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$
133 |
134 | where $A$ and $B$ are the set of characters of the two words respectively. The Jaccard coefficient may not be as good for measuring the lexical similarity of two words (as edit distance), but it is currently still used because it is a measure in range (0,1), which makes it easily compared with the WUP semantic similarity.
135 |
136 | To search over the database, we first visit the database, retrieve its schema, and store the Schema Graph as an attribute variable in the Controller class, so that each node mapping task don’t have to go through the slow database query. The Schema contains the table names, the column names of each column, and some sample distinct values of each column, such that they can be searched over to map Name Node or Value Node.
137 |
138 | Once we have the word similarity scores of one word to names and values in database, we rank different mapping choices by their similarity score, and return the highest several choices to the GUI for the user to choose. Here we add another node type for the user to choose from, that is “UNKNOWN”, which means that node doesn’t correspond to any meaningful SQL component. These meaningless nodes will be removed in later steps.
139 |
140 | Figure 4 is an example of a parse tree with nodes mapped to SQL components. The left part is a parse tree, and the right part is the mappings of all its nodes.
141 |
142 | \begin{figure}[ht]
143 | \centering
144 | \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_example.png}
145 | \caption[caption for nodes mapping example]{Node Mapping Example.\protect\footnotemark }
146 | \end{figure}
147 | \footnotetext{Taken from \cite{li2014}.}
148 |
149 | \subsection{Implicit Node}
150 | The com.dukenlidb.nlidb.main idea of inserting implicit node into parse tree is to make sure that two nodes which are being mapped have corresponding schema in database. Assuming invalid nodes are removed from the tree properly, there shoud be a tree with at most three branches. The leftmost tree should contain select node (SN) and name node (NN). If a name node in the left tree does not have ancestor, then it is the core node of the left tree. If the type of core node in left tree is different from right tree, the real core node in right tree is deemed hidden, i.e. implicit. The implicit core node may cause unresonable comparision between two variables of different types due to the change of semantic meaning. An example of implicit node: return all author who wrote more than 100 paper. In our previous process of the parse tree, the right subtree should contain only the number 100, which is a value node (VN). In order to make the tree semantically meaningful, nodes in left subtree are copied over to the right subtree.
151 |
152 | After inserting name node based on the core node comparision, next step is to check the constrain for both core node. For example: if left core node has constrain of year greater than 2007 and area of "Database" , right core node should also have the same constrain. If right node does not conform to this constrain, then constrains nodes should be copied from the left subtree to right subtree.
153 |
154 | Our implementation of processing the implicit nodes insertion starts from the root of the tree. It checks if any nodes below select Node (SN) is missing in the middle treel. If there is, copy it over to the middle tree. Then repeat the same procedure to the middle tree and rightmost subtree. After the name node is copied over, it starts from the middle tree to check if there is any constrain missing in the rightmost tree. If there is, copy those over to the right tree. Finally, if the root of subtree is an ON (operator node), and the first node connect to root in the subtree is a name node, there may be a function node missing. Our implementation tries to insert a function node in between to make the subtree semantically meaningful.
155 |
156 | %%%================================================================%%%
157 | \section{Experiments}
158 | The JavaFX application runs on JVM, and we’ve tested it on an Ubuntu 16.04 machine. We are using JDBC to connect to the PostgreSQL database of dblp, which we used in homework 1.
159 |
160 | Our program has already finished part of final target.
161 |
162 | \begin{figure}[ht]
163 | \centering
164 | \includegraphics[width=0.8\linewidth]{figures/program_structure.png}
165 | \caption{Program Structure}
166 | \end{figure}
167 |
168 | Figure 5 is a detailed structure on programs that we have already finished or at least conceived.
169 |
170 | We have programmed a GUI in \texttt{UserView.java} and a connection between database and GUI in \texttt{Controller.java}. To realize natural language query, our first step in implementing the translation process is to parse the natural language into SQL keywords using a predefined natural language parser called Stanford NLP. The parsing process is written in \texttt{NLParser.java} and \texttt{ParserTree.java}. After we get the parser tree, we map each tree node (word in initial natural language input) to certain component of SQL and database. The mapping is written in \texttt{NodeMapper.java}.
171 |
172 | \begin{figure*}[ht]
173 | \centering
174 | \includegraphics[width=0.7\linewidth]{figures/gui_nodes_mapping.png}
175 | \caption{GUI during Node Mapping}
176 | \end{figure*}
177 |
178 | Figure 6 is a screenshot of our application during the nodes mapping stage. The upper left part is where the user input comes. The bottom left part is supposed to be the translated SQL query (which hasn’t been completed). The upper left part shows the current information on nodes mapping. The choice box showing “NN: inproceedings.title” contains a drop down list of node types and values for the user to choose from. Once the user confirms the choice by pressing the “confirm choice” button, the app will go on to map the next word. The mapping choices will only be shown to the user if the word doesn’t match with the five predefined node types.
179 |
180 | As for the node mapper, currently we’ve only defined very limited number of explicit rules for nodes mapping. There are only a few predefined keywords, such as return, equals, all, etc (thus limited SQL query functions as well). We plan to tune the app after we’ve completed writing the whole process of translation. The nodes mapping for name nodes and value nodes doesn’t work perfectly well, maybe in the future we will try some more sensible measures of word similarity. But it is ok for now, since the users can almost always find the right name node or value node from the multiple choices.
181 |
182 | As for basic GUI functions, we may need to design a much fancier GUI as the last step of our program.
183 |
184 | %%%================================================================%%%
185 | \section{Contributions of Project Members}
186 |
187 | \begin{itemize}
188 | \item {\bf Yilin Gao:} GUI implementation, controller design, report writing.
189 | \item {\bf Keping Wang:} database connection, schema retrieval, Stanford NLP parser usage, parse tree design, word similarity score, report writing.
190 | \item {\bf Chengkang Xu:} node mapping, meaningless nodes removal, inserting implicit nodes, report writing.
191 | \end{itemize}
192 |
193 |
194 | \Urlmuskip=0mu plus 1mu\relax
195 | \bibliographystyle{abbrv}
196 | \bibliography{nlidb}
197 |
198 | \end{document}
199 |
--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/ParseTree.java:
--------------------------------------------------------------------------------
1 | package com.dukenlidb.nlidb.archive.model;
2 |
3 | import java.io.StringReader;
4 | import java.util.ArrayList;
5 | import java.util.Collections;
6 | import java.util.Iterator;
7 | import java.util.LinkedList;
8 | import java.util.List;
9 |
10 | import edu.stanford.nlp.ling.HasWord;
11 | import edu.stanford.nlp.ling.TaggedWord;
12 | import edu.stanford.nlp.process.DocumentPreprocessor;
13 | import edu.stanford.nlp.trees.GrammaticalStructure;
14 | import edu.stanford.nlp.trees.TypedDependency;
15 |
16 | public class ParseTree implements IParseTree {
17 |
18 | /**
19 | * Order of parse tree reformulation (used in getAdjustedTrees())
20 | */
21 | int edit;
22 | // We no longer use an array to store the nodes!
23 | /**
24 | * Root Node. Supposed to be "ROOT".
25 | */
26 | Node root;
27 |
28 | /**
29 | * Empty constructor, only for testing.
30 | */
31 | public ParseTree() { }
32 |
33 | /**
34 | * Construct a parse tree using the stanford NLP parser. Only one sentence.
35 | * Here we are omitting the information of dependency labels (tags).
36 | * @param text input text.
37 | */
38 | public ParseTree(String text, NLParser parser) {
39 | // pre-processing the input text
40 | DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
41 | List sentence = null;
42 | for (List sentenceHasWord : tokenizer) {
43 | sentence = sentenceHasWord;
44 | break;
45 | }
46 | // part-of-speech tagging
47 | List tagged = parser.tagger.tagSentence(sentence);
48 | // dependency syntax parsing
49 | GrammaticalStructure gs = parser.parser.predict(tagged);
50 |
51 | // Reading the parsed sentence into ParseTree
52 | int N = sentence.size()+1;
53 | Node[] nodes = new Node[N];
54 | root = new Node(0, "ROOT", "ROOT");
55 | nodes[0] = root;
56 | for (int i = 0; i < N-1; i++) {
57 | nodes[i+1] = new Node(i+1,
58 | sentence.get(i).word(), tagged.get(i).tag());
59 | }
60 | for (TypedDependency typedDep : gs.allTypedDependencies()) {
61 | int from = typedDep.gov().index();
62 | int to = typedDep.dep().index();
63 | // String label = typedDep.reln().getShortName(); // omitting the label
64 | nodes[to].parent = nodes[from];
65 | nodes[from].children.add(nodes[to]);
66 | }
67 | }
68 |
69 | public ParseTree(Node node) {
70 | root = node.clone();
71 | }
72 | public ParseTree(ParseTree other) {
73 | this(other.root);
74 | }
75 |
76 | @Override
77 | public int size() {
78 | return root.genNodesArray().length;
79 | }
80 |
81 | @Override
82 | public int getEdit() {
83 | return edit;
84 | }
85 |
86 | @Override
87 | public void setEdit(int edit){
88 | this.edit = edit;
89 | }
90 |
91 | /**
92 | * Helper method for {@link #removeMeaninglessNodes()}.
93 | * (1) If curr node is meaning less, link its children to its parent.
94 | * (2) Move on to remove the meaningless nodes of its children.
95 | */
96 | private void removeMeaninglessNodes(Node curr) {
97 | if (curr == null) { return; }
98 | List currChildren = new ArrayList<>(curr.getChildren());
99 | for (Node child : currChildren) {
100 | removeMeaninglessNodes(child);
101 | }
102 | if (curr != root && curr.getInfo().getType().equals("UNKNOWN")) {
103 | curr.parent.getChildren().remove(curr);
104 | for (Node child : curr.getChildren()) {
105 | curr.parent.getChildren().add(child);
106 | child.parent = curr.parent;
107 | }
108 | }
109 |
110 | }
111 |
112 | /**
113 | * Remove a node from tree if its NodeInfo is ("UNKNOWN", "meaningless").
114 | * To remove the meaningless node, link the children of this node
115 | * to its parent.
116 | */
117 | @Override
118 | public void removeMeaninglessNodes() {
119 | if (root.getChildren().get(0).getInfo() == null) {
120 | System.out.println("ERR! Node info net yet mapped!");
121 | }
122 | // Remove meaningless nodes.
123 | removeMeaninglessNodes(root);
124 | }
125 |
126 | @Override
127 |
128 | public void insertImplicitNodes() {
129 |
130 | List childrenOfRoot = root.getChildren();
131 |
132 | // no condition
133 | if (childrenOfRoot.size() <= 1) {
134 |
135 |
136 | return;
137 | }
138 |
139 | //phase 1, add nodes under select to left subtree
140 |
141 | System.out.println("Phase 1, add nodes under select node to left subtree");
142 |
143 | int IndexOfSN = 0;
144 | for (int i = 0; i < childrenOfRoot.size(); i ++) {
145 |
146 | if (childrenOfRoot.get(i).getInfo().getType().equals("SN")) {
147 |
148 | IndexOfSN = i;
149 | break;
150 | }
151 | }
152 |
153 | //start from the name node
154 |
155 | Node SN = childrenOfRoot.get(IndexOfSN);
156 | List SN_children = SN.getChildren();
157 |
158 | int IndexOfSN_NN = 0;
159 |
160 |
161 | for (int i = 0; i < SN_children.size(); i ++) {
162 |
163 | if (SN_children.get(i).getInfo().getType().equals("NN")) {
164 |
165 | IndexOfSN_NN = i;
166 | break;
167 | }
168 | }
169 |
170 | //add them to left subtree of all branches
171 |
172 | Node copy;
173 | int indexOfAppendedNode;
174 | Node SN_NN = SN_children.get(IndexOfSN_NN);
175 |
176 | for (int i = 0; i < childrenOfRoot.size(); i ++) {
177 |
178 | if (i != IndexOfSN) {
179 |
180 | Node [] nodes_SN_NN = childrenOfRoot.get(i).genNodesArray();
181 | indexOfAppendedNode = nameNodeToBeAppended(nodes_SN_NN);
182 |
183 | if (indexOfAppendedNode != -1) {
184 |
185 | copy = SN_NN.clone();
186 | copy.setOutside(true);
187 |
188 | nodes_SN_NN[indexOfAppendedNode].setChild(copy);
189 | copy.setParent(nodes_SN_NN[indexOfAppendedNode]);
190 | }
191 | }
192 | }
193 |
194 | System.out.println(toString() + '\n');
195 |
196 |
197 | //phase 2, compare left core node with right core node
198 |
199 | System.out.println("Phase 2, core node insertion");
200 |
201 | int indexOfRightCoreNode = -1;
202 | int indexOfLeftCoreNode = -1;
203 |
204 | for (int i = 0; i < childrenOfRoot.size(); i ++) {
205 |
206 | if (i != IndexOfSN) {
207 |
208 | Node [] nodes = childrenOfRoot.get(i).genNodesArray();
209 | int startOfRightBranch = endOfLeftBranch(nodes) + 1;
210 | int sizeOfRightTree = nodes[startOfRightBranch].getChildren().size() + 1;
211 |
212 | //if right tree only contains numbers, skip it
213 |
214 | if (sizeOfRightTree != 1 || !isNumeric(nodes[startOfRightBranch].getWord())) {
215 |
216 | indexOfLeftCoreNode = coreNode(nodes, true);
217 | indexOfRightCoreNode = coreNode(nodes, false);
218 |
219 | //if left core node exists
220 |
221 | if (indexOfLeftCoreNode != -1) {
222 |
223 | boolean doInsert = false;
224 |
225 | //if right subtree neither have core node nor it only contains number
226 | if (indexOfRightCoreNode == -1) {
227 |
228 | //copy core node only
229 |
230 | doInsert = true;
231 | }
232 |
233 | //if right core node & left core node are different schema
234 |
235 | else if (!nodes[indexOfRightCoreNode].getInfo().
236 | ExactSameSchema(nodes[indexOfLeftCoreNode].getInfo())) {
237 |
238 | //copy core node only
239 |
240 | doInsert = true;
241 | }
242 |
243 | if (doInsert) {
244 |
245 | copy = nodes[indexOfLeftCoreNode].clone();
246 | copy.children = new ArrayList();
247 | copy.setOutside(true);
248 |
249 |
250 | boolean insertAroundFN = false;
251 |
252 | int indexOfNewRightCN = IndexToInsertCN(nodes);
253 |
254 | if (indexOfNewRightCN == -1) {
255 |
256 | for (int j = nodes.length - 1; j > endOfLeftBranch(nodes); j --) {
257 |
258 | if (nodes[j].getInfo().getType().equals("FN")) {
259 |
260 | indexOfNewRightCN = j + 1;
261 | insertAroundFN = true;
262 | break;
263 | }
264 | }
265 | }
266 |
267 | if (insertAroundFN) {
268 |
269 | //THIS ONLY HANDLES FN NODE HAS NO CHILD OR ONE NAME NODE CHILD
270 |
271 | List FN_children = nodes[indexOfNewRightCN - 1].getChildren();
272 |
273 | for (int j = 0; j < FN_children.size(); j ++) {
274 |
275 | copy.setChild(FN_children.get(j));
276 | FN_children.get(j).setParent(copy);
277 | }
278 |
279 | copy.setParent(nodes[indexOfNewRightCN - 1]);
280 | nodes[indexOfNewRightCN - 1].children = new ArrayList();
281 | nodes[indexOfNewRightCN - 1].setChild(copy);
282 | }
283 |
284 | else {
285 |
286 | //if right subtree only contains VN, adjust index
287 |
288 | if (indexOfNewRightCN == -1) {
289 |
290 | indexOfNewRightCN = endOfLeftBranch(nodes) + 1;
291 | }
292 |
293 | copy.setChild(nodes[indexOfNewRightCN]);
294 | copy.setParent(nodes[indexOfNewRightCN].getParent());
295 | nodes[indexOfNewRightCN].getParent().removeChild(nodes[indexOfNewRightCN]);
296 | nodes[indexOfNewRightCN].getParent().setChild(copy);
297 | nodes[indexOfNewRightCN].setParent(copy);
298 |
299 | }
300 | }
301 |
302 | System.out.println(toString());
303 |
304 | //phase 3, map each NV under left core node to right core node
305 |
306 | System.out.println("Phase 3, transfer constrain nodes from left to right");
307 |
308 | List NV_children_left = nodes[indexOfLeftCoreNode].getChildren();
309 |
310 | for (int j = 0; j < NV_children_left.size(); j ++) {
311 |
312 | Node [] nodes_new = childrenOfRoot.get(i).genNodesArray();
313 | indexOfRightCoreNode = coreNode(nodes_new, false);
314 | List NV_children_right = nodes_new[indexOfRightCoreNode].getChildren();
315 |
316 | boolean found_NV = false;
317 |
318 | Node curr_left = NV_children_left.get(j);
319 | String curr_left_type = curr_left.getInfo().getType();
320 |
321 | for (int k = 0; k < NV_children_right.size(); k ++) {
322 |
323 | //compare
324 |
325 | Node curr_right = NV_children_right.get(k);
326 |
327 | //strictly compare, exact match ON
328 |
329 | if (curr_left_type.equals("ON")) {
330 |
331 | if (curr_left.equals(curr_right)) {
332 |
333 | found_NV = true;
334 | break;
335 | }
336 | }
337 |
338 | else {
339 |
340 | if (curr_left.getInfo().sameSchema(curr_right.getInfo())) {
341 |
342 | found_NV = true;
343 | break;
344 | }
345 | }
346 | }
347 |
348 | if (!found_NV) {
349 |
350 | //insert
351 |
352 | copy = curr_left.clone();
353 | nodes_new[indexOfRightCoreNode].setChild(copy);
354 | copy.setOutside(true);
355 | copy.setParent(nodes_new[indexOfRightCoreNode]);
356 | }
357 | }
358 |
359 | System.out.println(toString());
360 |
361 | //phase 4, insert function node
362 |
363 | System.out.println("Phase 4, insert missing function node");
364 |
365 | Node [] nodes_final_temp = childrenOfRoot.get(i).genNodesArray();
366 |
367 | int indexOfLeftFN_Tail = -1;
368 |
369 | for (int j = indexOfLeftCoreNode; j > 0; j --) {
370 |
371 | if (nodes_final_temp[j].getInfo().getType().equals("FN")) {
372 |
373 | indexOfLeftFN_Tail = j;
374 | break;
375 | }
376 | }
377 |
378 | if (indexOfLeftFN_Tail != -1) {
379 |
380 | //ASSUMPTION: if FN exists, it always before core node
381 |
382 | for (int k = 1; k < indexOfLeftFN_Tail + 1; k ++) {
383 |
384 | Node [] nodes_final = childrenOfRoot.get(i).genNodesArray();
385 | indexOfRightCoreNode = coreNode(nodes_final, false);
386 |
387 | boolean found_FN = false;
388 |
389 | for (int j = endOfLeftBranch(nodes_final) + 1; j < indexOfRightCoreNode; j ++) {
390 |
391 | if (nodes_final[j].getInfo().ExactSameSchema(nodes_final[k].getInfo())) {
392 |
393 | found_FN = true;
394 | }
395 | }
396 |
397 | if(!found_FN) {
398 | copy = nodes_final[k].clone();
399 | copy.setOutside(true);
400 | copy.children = new ArrayList();
401 |
402 | nodes[0].removeChild(nodes_final[endOfLeftBranch(nodes_final) + 1]);
403 | nodes[0].setChild(copy);
404 |
405 | copy.setParent(nodes[0]);
406 | copy.setChild(nodes[endOfLeftBranch(nodes_final) + 1]);
407 | nodes[endOfLeftBranch(nodes_final) + 1].setParent(copy);
408 | }
409 | }
410 | }
411 | System.out.println(toString());
412 | }
413 | }
414 | }
415 | }
416 | }
417 |
418 | /**
419 | * find the index in the right tree to append core node
420 | */
421 |
422 | public int IndexToInsertCN (Node [] nodes) {
423 |
424 |
425 | for (int i = endOfLeftBranch(nodes) + 1; i < nodes.length; i ++) {
426 |
427 | if (nodes[i].getInfo().getType().equals("NN")) {
428 |
429 | return i;
430 | }
431 | }
432 |
433 | return -1;
434 | }
435 |
436 | /**
437 | * Appending the name node under SELECT to the last name node in leftsubtree
438 | */
439 |
440 | public int nameNodeToBeAppended (Node [] nodes) {
441 |
442 | for (int i = endOfLeftBranch(nodes); i > 0; i --) {
443 |
444 | if (nodes[i].getInfo().getType().equals("NN")) {
445 |
446 | return i;
447 | }
448 | }
449 |
450 | return -1;
451 | }
452 |
453 | /**
454 | * find the index of the last node in the left subtree
455 | */
456 |
457 | public int endOfLeftBranch (Node [] nodes) {
458 |
459 | for (int i = 2; i < nodes.length; i ++) {
460 |
461 | if(nodes[i].getParent().equals(nodes[0])) {
462 |
463 | return i - 1;
464 | }
465 |
466 | }
467 |
468 | return -1;
469 | }
470 |
471 | /**
472 | * check if right branch contains only number
473 | */
474 | public boolean isNumeric(String str) {
475 | try {
476 | double d = Double.parseDouble(str);
477 | }
478 | catch(NumberFormatException e) {
479 | return false;
480 | }
481 | return true;
482 | }
483 |
484 | /**
485 | * find index of core node
486 | */
487 |
488 | public int coreNode (Node [] nodes, boolean left) {
489 |
490 | int startIndex = 1;
491 | int endIndex = endOfLeftBranch(nodes);
492 |
493 | if (!left) {
494 |
495 | startIndex = endOfLeftBranch(nodes) + 1;
496 | endIndex = nodes.length - 1;
497 | }
498 |
499 | for (int i = startIndex; i <= endIndex; i ++) {
500 |
501 | if (nodes[i].getInfo().getType().equals("NN")) {
502 |
503 | return i;
504 | }
505 | }
506 |
507 | return -1;
508 | }
509 |
510 |
511 | @Override
512 | public ParseTree mergeLNQN(){
513 | Node[] nodes = this.root.genNodesArray();
514 | for (int i=0; i getAdjustedTrees() {
550 | List result = TreeAdjustor.getAdjustedTrees(this);
551 | Collections.sort(result, (t1, t2) -> {
552 | if (t1.getScore() != t2.getScore()) {
553 | return - t1.getScore() + t2.getScore();
554 | } else {
555 | return t1.getEdit() - t2.getEdit();
556 | }
557 | });
558 | return result.subList(0, 4);
559 | }
560 |
561 | /**
562 | * Only for testing.
563 | * @return
564 | */
565 | @Deprecated
566 | public SQLQuery translateToSQL() {
567 | return translateToSQL(null);
568 | }
569 |
570 | @Override
571 | public SQLQuery translateToSQL(SchemaGraph schema) {
572 | SQLTranslator translator = new SQLTranslator(root, schema);
573 | return translator.getResult();
574 | }
575 |
576 | @Override
577 | public int hashCode() {
578 | final int prime = 31;
579 | int result = 17;
580 | result = prime * result + ((root == null) ? 0 : root.hashCode());
581 | return result;
582 | }
583 |
584 | @Override
585 | public boolean equals(Object obj) {
586 | if (this == obj)
587 | return true;
588 | if (obj == null)
589 | return false;
590 | if (getClass() != obj.getClass())
591 | return false;
592 | ParseTree other = (ParseTree) obj;
593 | if (root == null) {
594 | if (other.root != null)
595 | return false;
596 | } else if (!root.equals(other.root))
597 | return false;
598 | return true;
599 | }
600 |
601 | /**
602 | * Return an array of nodes in the tree, shallow copy.
603 | * @return
604 | */
605 | public Node[] genNodesArray() {
606 | return root.genNodesArray();
607 | }
608 |
609 | /**
610 | * Pre-order iterator
611 | * @author keping
612 | */
613 | public class ParseTreeIterator implements Iterator {
614 | LinkedList stack = new LinkedList<>();
615 | ParseTreeIterator() {
616 | stack.push(root);
617 | }
618 | @Override
619 | public boolean hasNext() {
620 | return !stack.isEmpty();
621 | }
622 | @Override
623 | public Node next() {
624 | Node curr = stack.pop();
625 | List children = curr.getChildren();
626 | for (int i = children.size()-1; i >= 0; i--) {
627 | stack.push(children.get(i));
628 | }
629 | return curr;
630 | }
631 | }
632 |
633 | /**
634 | * The default iterator in ParseTree returns the Nodes
635 | * using pre-order of the tree.
636 | */
637 | @Override
638 | public ParseTreeIterator iterator() { return new ParseTreeIterator(); }
639 |
640 | /**
641 | * Get the natural language sentence corresponding to this
642 | * parse tree.
643 | * @return sentence
644 | */
645 | public String getSentence() {
646 | StringBuilder sb = new StringBuilder();
647 | boolean first = true;
648 | for (Node node : this) {
649 | if (first) {
650 | sb.append(node.getWord());
651 | first = false;
652 | } else {
653 | sb.append(" ").append(node.getWord());
654 | }
655 | }
656 | return sb.toString();
657 | }
658 |
659 | /**
660 | * toString like "curr -> [child1, child2, ...]"
661 | * @param curr
662 | * @return
663 | */
664 | private String nodeToString(Node curr) {
665 | if (curr == null) { return ""; }
666 | String s = curr.toString() + " -> ";
667 | s += curr.getChildren().toString() + "\n";
668 | for (Node child : curr.getChildren()) {
669 | s += nodeToString(child);
670 | }
671 | return s;
672 | }
673 |
674 | @Override
675 | public String toString() {
676 | StringBuilder sb = new StringBuilder();
677 | sb.append("Sentence: ").append(getSentence()).append("\n");
678 | sb.append(nodeToString(root));
679 | return sb.toString();
680 | }
681 |
682 | /**
683 | * Score of a tree measures the syntactic legality of
684 | * the tree. It is negative number of Invalid nodes.
685 | * @return
686 | */
687 | public int getScore(){
688 | return - SyntacticEvaluator.numberOfInvalidNodes(this);
689 | }
690 |
691 | }
692 |
--------------------------------------------------------------------------------