├── src
    ├── main
    │   ├── resources
    │   │   ├── public
    │   │   │   └── .gitkeep
    │   │   ├── application-dev.properties
    │   │   └── application.properties
    │   └── java
    │   │   └── com
    │   │       └── dukenlidb
    │   │           └── nlidb
    │   │               ├── model
    │   │                   ├── request
    │   │                   │   ├── ExecuteSQLRequest.java
    │   │                   │   ├── TranslateNLRequest.java
    │   │                   │   └── ConnectDBRequest.java
    │   │                   ├── response
    │   │                   │   ├── MessageResponse.java
    │   │                   │   ├── QueryResponse.java
    │   │                   │   ├── TranslateResponse.java
    │   │                   │   ├── ConnectResponse.java
    │   │                   │   └── StatusMessageResponse.java
    │   │                   ├── UserSession.java
    │   │                   └── DBConnectionConfig.java
    │   │               ├── main
    │   │                   └── Application.java
    │   │               ├── service
    │   │                   ├── DBConnectionService.java
    │   │                   ├── CookieService.java
    │   │                   ├── RedisService.java
    │   │                   └── SQLExecutionService.java
    │   │               ├── archive
    │   │                   ├── model
    │   │                   │   ├── NLParser.java
    │   │                   │   ├── WordSimilarity.java
    │   │                   │   ├── QueryTree.java
    │   │                   │   ├── ParserDemo.java
    │   │                   │   ├── ImplicitNodeTest.java
    │   │                   │   ├── IParseTree.java
    │   │                   │   ├── SQLQuery.java
    │   │                   │   ├── NodeInfo.java
    │   │                   │   ├── NodeMapper.java
    │   │                   │   ├── SQLTranslator.java
    │   │                   │   ├── Node.java
    │   │                   │   ├── TreeAdjustor.java
    │   │                   │   ├── ParseTreeTest.java
    │   │                   │   ├── WordNet.java
    │   │                   │   ├── SyntacticEvaluator.java
    │   │                   │   ├── SchemaGraph.java
    │   │                   │   ├── TreeAdjustorTest.java
    │   │                   │   └── ParseTree.java
    │   │                   ├── ui
    │   │                   │   └── UserView.java
    │   │                   └── app
    │   │                   │   └── Controller.java
    │   │               └── controller
    │   │                   └── Controller.java
    └── test
    │   └── java
    │       └── com
    │           └── dukenlidb
    │               └── nlidb
    │                   └── model
    │                       └── UserSessionTest.java
├── doc
    ├── ref
    │   ├── 3.png
    │   ├── 7.png
    │   ├── node type.png
    │   └── queries.png
    ├── report
    │   ├── final
    │   │   ├── final.pdf
    │   │   ├── figures
    │   │   │   ├── translation.png
    │   │   │   ├── wordnet_tree.png
    │   │   │   ├── tree_adjustor2.png
    │   │   │   ├── tree_adjustor3.png
    │   │   │   ├── gui_nodes_mapping.png
    │   │   │   ├── gui_translation.png
    │   │   │   ├── program_structure.png
    │   │   │   ├── gui_tree_adjustor1.png
    │   │   │   ├── nodes_mapping_rules.png
    │   │   │   ├── nlidb_system_diagram.png
    │   │   │   └── nodes_mapping_example.png
    │   │   ├── compile.py
    │   │   └── nlidb.bib
    │   ├── midterm
    │   │   ├── midterm.pdf
    │   │   ├── template.pdf
    │   │   ├── figures
    │   │   │   ├── wordnet_tree.png
    │   │   │   ├── gui_nodes_mapping.png
    │   │   │   ├── program_structure.png
    │   │   │   ├── nlidb_system_diagram.pdf
    │   │   │   ├── nodes_mapping_rules.png
    │   │   │   └── nodes_mapping_example.png
    │   │   ├── compile.py
    │   │   ├── README.md
    │   │   ├── nlidb.bib
    │   │   └── midterm.tex
    │   └── .gitignore
    ├── edu.mit.jwi_2.4.0_manual.pdf
    ├── Verb Semantics and Lexical Selection.pdf
    └── Constructing an Interactive Natural Language Interface for Relational Databases.pdf
├── .dockerignore
├── client
    ├── public
    │   ├── favicon.ico
    │   ├── manifest.json
    │   └── index.html
    ├── src
    │   ├── utils
    │   │   ├── new-id.js
    │   │   └── registerServiceWorker.js
    │   ├── index.js
    │   ├── app
    │   │   ├── reducer.js
    │   │   ├── actions.js
    │   │   ├── index.js
    │   │   ├── components
    │   │   │   └── search-bar.js
    │   │   ├── sagas.js
    │   │   └── app.js
    │   ├── styles
    │   │   └── button.js
    │   ├── store.js
    │   ├── requests.js
    │   └── common
    │   │   └── form.js
    ├── .gitignore
    ├── .eslintrc.json
    └── package.json
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── .gitignore
├── Dockerfile.test
├── Dockerfile
├── Jenkinsfile
├── gradlew.bat
├── README.md
├── gradlew
└── LICENSE


/src/main/resources/public/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/ref/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/3.png


--------------------------------------------------------------------------------
/doc/ref/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/7.png


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | build/
2 | client/build/
3 | client/node_modules/
4 | out/
5 | .gradle/
6 | 


--------------------------------------------------------------------------------
/doc/ref/node type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/node type.png


--------------------------------------------------------------------------------
/doc/ref/queries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/queries.png


--------------------------------------------------------------------------------
/client/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/client/public/favicon.ico


--------------------------------------------------------------------------------
/doc/report/final/final.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/final.pdf


--------------------------------------------------------------------------------
/doc/report/midterm/midterm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/midterm.pdf


--------------------------------------------------------------------------------
/doc/report/midterm/template.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/template.pdf


--------------------------------------------------------------------------------
/doc/edu.mit.jwi_2.4.0_manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/edu.mit.jwi_2.4.0_manual.pdf


--------------------------------------------------------------------------------
/doc/report/.gitignore:
--------------------------------------------------------------------------------
1 | auto/
2 | midterm/README.md
3 | !*/*.pdf
4 | !*/*.py
5 | !*/*.bib
6 | !*/*.tex
7 | !*/*.md
8 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/src/main/resources/application-dev.properties:
--------------------------------------------------------------------------------
1 | server.address=localhost
2 | server.port=8080
3 | 
4 | redis.host=localhost
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | .DS_Store
 3 | 
 4 | *.iml
 5 | .idea/
 6 | 
 7 | # build files
 8 | target/
 9 | build/
10 | out/
11 | .gradle/


--------------------------------------------------------------------------------
/doc/report/final/figures/translation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/translation.png


--------------------------------------------------------------------------------
/doc/report/final/figures/wordnet_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/wordnet_tree.png


--------------------------------------------------------------------------------
/doc/report/final/figures/tree_adjustor2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/tree_adjustor2.png


--------------------------------------------------------------------------------
/doc/report/final/figures/tree_adjustor3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/tree_adjustor3.png


--------------------------------------------------------------------------------
/doc/report/midterm/figures/wordnet_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/wordnet_tree.png


--------------------------------------------------------------------------------
/doc/Verb Semantics and Lexical Selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/Verb Semantics and Lexical Selection.pdf


--------------------------------------------------------------------------------
/doc/report/final/figures/gui_nodes_mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_nodes_mapping.png


--------------------------------------------------------------------------------
/doc/report/final/figures/gui_translation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_translation.png


--------------------------------------------------------------------------------
/doc/report/final/figures/program_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/program_structure.png


--------------------------------------------------------------------------------
/doc/report/final/figures/gui_tree_adjustor1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_tree_adjustor1.png


--------------------------------------------------------------------------------
/doc/report/final/figures/nodes_mapping_rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nodes_mapping_rules.png


--------------------------------------------------------------------------------
/doc/report/midterm/figures/gui_nodes_mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/gui_nodes_mapping.png


--------------------------------------------------------------------------------
/doc/report/midterm/figures/program_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/program_structure.png


--------------------------------------------------------------------------------
/doc/report/final/figures/nlidb_system_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nlidb_system_diagram.png


--------------------------------------------------------------------------------
/doc/report/final/figures/nodes_mapping_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nodes_mapping_example.png


--------------------------------------------------------------------------------
/doc/report/midterm/figures/nlidb_system_diagram.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nlidb_system_diagram.pdf


--------------------------------------------------------------------------------
/doc/report/midterm/figures/nodes_mapping_rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nodes_mapping_rules.png


--------------------------------------------------------------------------------
/client/src/utils/new-id.js:
--------------------------------------------------------------------------------
1 | let lastId = 0;
2 | 
3 | export default function (prefix = 'id') {
4 |   lastId += 1;
5 |   return `${prefix}${lastId}`;
6 | }
7 | 


--------------------------------------------------------------------------------
/doc/report/midterm/figures/nodes_mapping_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nodes_mapping_example.png


--------------------------------------------------------------------------------
/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | server.address=0.0.0.0
2 | server.port=80
3 | 
4 | redis.host=cache-nlidb.gr7mqq.0001.use1.cache.amazonaws.com
5 | redis.port=6379
6 | 


--------------------------------------------------------------------------------
/doc/Constructing an Interactive Natural Language Interface for Relational Databases.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/Constructing an Interactive Natural Language Interface for Relational Databases.pdf


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/request/ExecuteSQLRequest.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.request;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class ExecuteSQLRequest {
 7 | 
 8 |     String query;
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/request/TranslateNLRequest.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.request;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class TranslateNLRequest {
 7 | 
 8 |     String input;
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/response/MessageResponse.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.response;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class MessageResponse {
 7 | 
 8 |     String message;
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/response/QueryResponse.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.response;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class QueryResponse {
 7 | 
 8 |     String queryResult;
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/response/TranslateResponse.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.response;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class TranslateResponse {
 7 | 
 8 |     String translateResult;
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/response/ConnectResponse.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.response;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class ConnectResponse {
 7 | 
 8 |     boolean success;
 9 |     String databaseUrl;
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/response/StatusMessageResponse.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.response;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class StatusMessageResponse {
 7 | 
 8 |     boolean success;
 9 |     String message;
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Sun Oct 08 21:21:59 EDT 2017
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.5-rc-2-all.zip
7 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/request/ConnectDBRequest.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model.request;
 2 | 
 3 | import lombok.Value;
 4 | 
 5 | @Value
 6 | public class ConnectDBRequest {
 7 | 
 8 |     String host;
 9 |     String port;
10 |     String database;
11 |     String username;
12 |     String password;
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/doc/report/final/compile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import subprocess, sys
 4 | 
 5 | commands = [
 6 |     ['pdflatex', sys.argv[1] + '.tex'],
 7 |     ['bibtex', sys.argv[1] + '.aux'],
 8 |     ['pdflatex', sys.argv[1] + '.tex'],
 9 |     ['pdflatex', sys.argv[1] + '.tex']    
10 | ]
11 | 
12 | for c in commands:
13 |         subprocess.call(c)
14 | 


--------------------------------------------------------------------------------
/doc/report/midterm/compile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import subprocess, sys
 4 | 
 5 | commands = [
 6 |     ['pdflatex', sys.argv[1] + '.tex'],
 7 |     ['bibtex', sys.argv[1] + '.aux'],
 8 |     ['pdflatex', sys.argv[1] + '.tex'],
 9 |     ['pdflatex', sys.argv[1] + '.tex']    
10 | ]
11 | 
12 | for c in commands:
13 |         subprocess.call(c)
14 | 


--------------------------------------------------------------------------------
/client/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "React App",
 3 |   "name": "Create React App Sample",
 4 |   "icons": [
 5 |     {
 6 |       "src": "favicon.ico",
 7 |       "sizes": "192x192",
 8 |       "type": "image/png"
 9 |     }
10 |   ],
11 |   "start_url": "./index.html",
12 |   "display": "standalone",
13 |   "theme_color": "#000000",
14 |   "background_color": "#ffffff"
15 | }
16 | 


--------------------------------------------------------------------------------
/client/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/ignore-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | 
 6 | # testing
 7 | /coverage
 8 | 
 9 | # production
10 | /build
11 | 
12 | # misc
13 | .DS_Store
14 | .env.local
15 | .env.development.local
16 | .env.test.local
17 | .env.production.local
18 | 
19 | npm-debug.log*
20 | yarn-debug.log*
21 | yarn-error.log*
22 | 


--------------------------------------------------------------------------------
/client/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom';
 3 | import { Provider } from 'react-redux';
 4 | import store from './store';
 5 | import App from './app';
 6 | import registerServiceWorker from './utils/registerServiceWorker';
 7 | 
 8 | ReactDOM.render(
 9 |   (
10 |     <Provider store={store}>
11 |       <App />
12 |     </Provider>
13 |   ),
14 |   document.getElementById('root'),
15 | );
16 | registerServiceWorker();
17 | 


--------------------------------------------------------------------------------
/doc/report/midterm/README.md:
--------------------------------------------------------------------------------
1 | ### How to render .pdf from .tex file:
2 | 
3 | 1. Make sure you've installed latex and python.
4 | 2. Run in cmd/terminal: `python compile.py $(filename)`. $(filename) is the name of the .tex file you want to compile without .suffix. For example, here it is `python compile.py midterm`.
5 | 3. Windows and Unix file systems have different line breaks. I suggest editing the .tex file on a Unix system.
6 | 
7 | #### To add more reference, add entry in `nlidb.bib`
8 | 


--------------------------------------------------------------------------------
/client/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "airbnb",
 3 |   "env": {
 4 |     "browser": true,
 5 |     "es6": true,
 6 |     "jest": true
 7 |   },
 8 |   "rules": {
 9 |     "react/jsx-filename-extension": [
10 |       1, { "extensions": [".js", ".jsx"] }
11 |     ],
12 |     "react/prop-types": [
13 |       0, { }
14 |     ],
15 |     "react/no-array-index-key": [
16 |       0, { }
17 |     ],
18 |     "padded-blocks": [
19 |       "error",
20 |       { "blocks": "never" }
21 |     ]
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/main/Application.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.main;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | import org.springframework.context.annotation.ComponentScan;
 6 | 
 7 | @SpringBootApplication
 8 | @ComponentScan("com.dukenlidb.nlidb")
 9 | public class Application {
10 | 
11 |     public static void main(String[] args) {
12 |         SpringApplication.run(Application.class, args);
13 |     }
14 |     
15 | }
16 | 


--------------------------------------------------------------------------------
/client/src/app/reducer.js:
--------------------------------------------------------------------------------
 1 | import { fromJS } from 'immutable';
 2 | import * as actions from './actions';
 3 | 
 4 | const initialState = fromJS({
 5 |   connected: false,
 6 |   connectErrorMsg: null,
 7 |   databaseUrl: null,
 8 |   translateResult: null,
 9 |   queryResult: null,
10 | });
11 | 
12 | const reducers = (state = initialState, action) => {
13 | 
14 |   switch (action.type) {
15 |     case actions.SET_APP_STATE:
16 |       return state.merge(action.payload);
17 |     default:
18 |       return state;
19 |   }
20 | };
21 | 
22 | export default reducers;
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/service/DBConnectionService.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.service;
 2 | 
 3 | import org.springframework.stereotype.Service;
 4 | import com.dukenlidb.nlidb.model.DBConnectionConfig;
 5 | 
 6 | import java.sql.Connection;
 7 | import java.sql.DriverManager;
 8 | import java.sql.SQLException;
 9 | 
10 | @Service
11 | public class DBConnectionService {
12 | 
13 |     public Connection getConnection(DBConnectionConfig config) throws SQLException {
14 |         return DriverManager.getConnection(config.getUrl(), config.getProperties());
15 |     }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/client/src/styles/button.js:
--------------------------------------------------------------------------------
 1 | import { css } from 'styled-components';
 2 | 
 3 | const buttonStyle = css`
 4 |   background-color: #fff;
 5 |   border: none;
 6 |   cursor: pointer;
 7 |   padding: 6px 10px;
 8 |   text-align: center;
 9 |   display: inline-block;
10 |   font-size: 14px;
11 |   outline: none;
12 |   box-shadow: 0px 0.5px 2px 1.5px #aaa;
13 |   border: 1px solid #aaa;
14 |   :active {
15 |     box-shadow: none;
16 |     border: 1px solid #777;
17 |   }
18 |   cursor: ${props => (props.disabled ? 'default' : 'pointer')};
19 |   ${props => (props.disabled ? 'opacity: 0.65;' : '')}
20 | `;
21 | 
22 | export default buttonStyle;
23 | 


--------------------------------------------------------------------------------
/client/src/store.js:
--------------------------------------------------------------------------------
 1 | import { createStore, combineReducers, applyMiddleware, compose } from 'redux';
 2 | import createSagaMiddleware from 'redux-saga';
 3 | import appReducer from './app/reducer';
 4 | import appSagas from './app/sagas';
 5 | 
 6 | const reducers = combineReducers({
 7 |   app: appReducer,
 8 | });
 9 | 
10 | const sagaMiddleware = createSagaMiddleware();
11 | 
12 | /* eslint-disable no-underscore-dangle */
13 | const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose;
14 | /* eslint-enable */
15 | 
16 | const store = createStore(reducers, composeEnhancers(
17 |   applyMiddleware(sagaMiddleware),
18 | ));
19 | 
20 | sagaMiddleware.run(appSagas);
21 | 
22 | export default store;
23 | 


--------------------------------------------------------------------------------
/Dockerfile.test:
--------------------------------------------------------------------------------
 1 | FROM node:8.6-alpine
 2 | 
 3 | # directory automatically created
 4 | WORKDIR /usr/nlidb
 5 | 
 6 | # install dependencies packages first to utilize docker layer caching
 7 | # COPY dest path could be relative to WORKDIR, or absolute. dest dir must end with /
 8 | COPY client/package.json client/
 9 | RUN cd client && npm install
10 | 
11 | # copy everything to filesystem of container
12 | COPY client client/
13 | 
14 | # build react bundle
15 | RUN cd client && npm run build
16 | 
17 | 
18 | FROM frolvlad/alpine-oraclejdk8:full
19 | WORKDIR /usr/nlidb
20 | COPY src src
21 | COPY gradle gradle
22 | COPY gradlew ./
23 | COPY build.gradle ./
24 | COPY --from=0 /usr/nlidb/client/build/ src/main/resources/public/
25 | 
26 | CMD ["./gradlew", "test"]
27 | 


--------------------------------------------------------------------------------
/client/src/requests.js:
--------------------------------------------------------------------------------
 1 | const request = (url, payload) =>
 2 |   fetch(url, {
 3 |     method: 'POST',
 4 |     headers: { 'Content-Type': 'application/json' },
 5 |     credentials: 'include',
 6 |     body: JSON.stringify(payload),
 7 |   });
 8 | 
 9 | export const connectUserSession = () =>
10 |   request('/api/connect/user/');
11 | 
12 | export const disconnect = () =>
13 |   request('/api/disconnect');
14 | 
15 | export const connectToDB = ({ host, port, database, username, password }) =>
16 |   request('/api/connect/db', { host, port, database, username, password });
17 | 
18 | export const translateNL = ({ input }) =>
19 |   request('/api/translate/nl', { input });
20 | 
21 | export const executeSQL = ({ query }) =>
22 |   request('/api/execute/sql', { query });
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/NLParser.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | import edu.stanford.nlp.parser.nndep.DependencyParser;
 4 | import edu.stanford.nlp.tagger.maxent.MaxentTagger;
 5 | 
 6 | /**
 7 |  * Natural language parser, a wrapper of the Stanford NLP parser.
 8 |  * @author keping
 9 |  *
10 |  */
11 | public class NLParser {
12 | 	MaxentTagger tagger;
13 | 	DependencyParser parser;
14 | 
15 | 	public NLParser() {
16 | 		String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
17 | 		String modelPath = DependencyParser.DEFAULT_MODEL;
18 | 		tagger = new MaxentTagger(taggerPath);
19 | 		parser = DependencyParser.loadFromModelFile(modelPath);
20 | 	}
21 | 	
22 | }
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:8.6-alpine
 2 | 
 3 | # directory automatically created
 4 | WORKDIR /usr/nlidb
 5 | 
 6 | # install dependencies packages first to utilize docker layer caching
 7 | # COPY dest path could be relative to WORKDIR, or absolute. dest dir must end with /
 8 | COPY client/package.json client/
 9 | RUN cd client && npm install
10 | 
11 | # copy everything to filesystem of container
12 | COPY client client/
13 | 
14 | # build react bundle
15 | RUN cd client && npm run build
16 | 
17 | 
18 | FROM frolvlad/alpine-oraclejdk8:full
19 | WORKDIR /usr/nlidb
20 | COPY src src
21 | COPY gradle gradle
22 | COPY gradlew ./
23 | COPY build.gradle ./
24 | COPY --from=0 /usr/nlidb/client/build/ src/main/resources/public/
25 | 
26 | EXPOSE 80
27 | CMD ["./gradlew", "-Dspring.profiles.active=prod", "bootRun"]
28 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/UserSession.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model;
 2 | 
 3 | 
 4 | import com.fasterxml.jackson.annotation.JsonUnwrapped;
 5 | import com.fasterxml.jackson.core.JsonProcessingException;
 6 | import com.fasterxml.jackson.databind.ObjectMapper;
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Data;
 9 | 
10 | import java.io.IOException;
11 | 
12 | @Data
13 | @AllArgsConstructor
14 | public class UserSession {
15 | 
16 |     @JsonUnwrapped
17 |     private DBConnectionConfig dbConnectionConfig;
18 | 
19 |     public String serialize() throws JsonProcessingException {
20 |         return new ObjectMapper().writeValueAsString(this);
21 |     }
22 | 
23 |     public static UserSession deserialize(String str) throws IOException {
24 |         return new ObjectMapper().readValue(str, UserSession.class);
25 |     }
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/client/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "nlidb-client",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "proxy": "http://localhost:8080",
 6 |   "dependencies": {
 7 |     "immutable": "^3.8.2",
 8 |     "react": "^16.0.0",
 9 |     "react-dom": "^16.0.0",
10 |     "react-redux": "^5.0.6",
11 |     "react-scripts": "1.0.14",
12 |     "redux": "^3.7.2",
13 |     "redux-saga": "^0.15.6",
14 |     "styled-components": "^2.2.1"
15 |   },
16 |   "scripts": {
17 |     "start": "react-scripts start",
18 |     "build": "react-scripts build",
19 |     "test": "react-scripts test --env=jsdom",
20 |     "eject": "react-scripts eject"
21 |   },
22 |   "devDependencies": {
23 |     "eslint": "^4.8.0",
24 |     "eslint-config-airbnb": "^15.1.0",
25 |     "eslint-plugin-import": "^2.7.0",
26 |     "eslint-plugin-jsx-a11y": "^5.1.1",
27 |     "eslint-plugin-react": "^7.4.0"
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/model/DBConnectionConfig.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model;
 2 | 
 3 | 
 4 | import com.fasterxml.jackson.annotation.JsonIgnore;
 5 | import lombok.Builder;
 6 | import lombok.Value;
 7 | 
 8 | import java.util.Properties;
 9 | 
10 | @Builder
11 | @Value
12 | public class DBConnectionConfig {
13 | 
14 |     String host;
15 |     String port;
16 |     String database;
17 |     String username;
18 |     String password;
19 | 
20 |     @JsonIgnore
21 |     public String getUrl() {
22 |         return "jdbc:postgresql://"
23 |                 + host + ":" + port
24 |                 + "/" + database;
25 |     }
26 | 
27 |     @JsonIgnore
28 |     public Properties getProperties() {
29 |         Properties props = new Properties();
30 |         props.setProperty("user", username);
31 |         props.setProperty("password", password);
32 |         return props;
33 |     }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/test/java/com/dukenlidb/nlidb/model/UserSessionTest.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.model;
 2 | 
 3 | import org.junit.Before;
 4 | import org.junit.Test;
 5 | 
 6 | import static org.junit.Assert.assertEquals;
 7 | 
 8 | public class UserSessionTest {
 9 | 
10 |     private UserSession session;
11 | 
12 |     @Before
13 |     public void init() {
14 |         DBConnectionConfig config = DBConnectionConfig
15 |                 .builder()
16 |                 .host("hostname")
17 |                 .port("portnum")
18 |                 .database("db")
19 |                 .username("username1")
20 |                 .password("passwd")
21 |                 .build();
22 |         session = new UserSession(config);
23 |     }
24 | 
25 |     @Test
26 |     public void serialize() throws Exception {
27 |         session.serialize();
28 |     }
29 | 
30 |     @Test
31 |     public void deserialize() throws Exception {
32 |         assertEquals(session, UserSession.deserialize(session.serialize()));
33 |     }
34 | 
35 | }


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/service/CookieService.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.service;
 2 | 
 3 | import org.springframework.stereotype.Service;
 4 | 
 5 | import javax.servlet.http.Cookie;
 6 | import javax.servlet.http.HttpServletResponse;
 7 | 
 8 | @Service
 9 | public class CookieService {
10 | 
11 |     public static final String COOKIE_NAME = "nlidbUser";
12 |     public static final String USER_NONE = "none";
13 | 
14 |     public void setUserIdCookie(HttpServletResponse res, String userId) {
15 |         Cookie cookie = new Cookie(COOKIE_NAME, userId);
16 |         cookie.setHttpOnly(true);
17 |         cookie.setMaxAge(3600 * 24);
18 |         cookie.setPath("/");
19 |         res.addCookie(cookie);
20 |     }
21 | 
22 |     public void expireUserIdCookie(HttpServletResponse res, String userId) {
23 |         Cookie cookie = new Cookie(COOKIE_NAME, userId);
24 |         cookie.setHttpOnly(true);
25 |         cookie.setMaxAge(0);
26 |         cookie.setPath("/");
27 |         res.addCookie(cookie);
28 |     }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/client/src/app/actions.js:
--------------------------------------------------------------------------------
 1 | 
 2 | export const CONNECT_USER_SESSION = 'app/connect-user-session';
 3 | export const connectUserSession = () => ({
 4 |   type: CONNECT_USER_SESSION,
 5 | });
 6 | 
 7 | export const DISCONNECT = 'app/disconnect';
 8 | export const disconnect = () => ({
 9 |   type: DISCONNECT,
10 | });
11 | 
12 | export const CONNECT_TO_DB = 'app/connect-to-db';
13 | export const connectToDB = payload => ({
14 |   type: CONNECT_TO_DB, payload,
15 | });
16 | 
17 | export const CONNECT_TO_DEMODB = 'app/connect-to-demodb';
18 | export const connectToDemoDB = () => ({
19 |   type: CONNECT_TO_DEMODB,
20 | });
21 | 
22 | export const TRANSLATE_NL = 'app/translateNL-nl';
23 | export const translateNL = payload => ({
24 |   type: TRANSLATE_NL, payload,
25 | });
26 | 
27 | export const EXECUTE_SQL = 'app/execute-sql';
28 | export const executeSQL = payload => ({
29 |   type: EXECUTE_SQL, payload,
30 | });
31 | 
32 | export const SET_APP_STATE = 'app/set-app-state';
33 | export const setAppState = payload => ({
34 |   type: SET_APP_STATE, payload,
35 | });
36 | 


--------------------------------------------------------------------------------
/client/src/app/index.js:
--------------------------------------------------------------------------------
 1 | import { connect } from 'react-redux';
 2 | import * as actions from './actions';
 3 | import App from './app';
 4 | 
 5 | const mapStateToProps = state => ({
 6 |   connected: state.app.get('connected'),
 7 |   connectErrorMsg: state.app.get('connectErrorMsg'),
 8 |   databaseUrl: state.app.get('databaseUrl'),
 9 |   translateResult: state.app.get('translateResult'),
10 |   queryResult: state.app.get('queryResult'),
11 | });
12 | 
13 | const mapDispatchToProps = dispatch => ({
14 |   connectUserSession: () => {
15 |     dispatch(actions.connectUserSession());
16 |   },
17 |   disconnect: () => {
18 |     dispatch(actions.disconnect());
19 |   },
20 |   connectToDB: (payload) => {
21 |     dispatch(actions.connectToDB(payload));
22 |   },
23 |   connectToDemoDB: () => {
24 |     dispatch(actions.connectToDemoDB());
25 |   },
26 |   translateNL: (payload) => {
27 |     dispatch(actions.translateNL(payload));
28 |   },
29 |   executeSQL: (payload) => {
30 |     dispatch(actions.executeSQL(payload));
31 |   },
32 | });
33 | 
34 | export default connect(mapStateToProps, mapDispatchToProps)(App);
35 | 


--------------------------------------------------------------------------------
/client/src/app/components/search-bar.js:
--------------------------------------------------------------------------------
 1 | import React, { Component } from 'react';
 2 | import styled from 'styled-components';
 3 | import buttonStyle from '../../styles/button';
 4 | 
 5 | const Wrapper = styled.form`
 6 |   margin: 20px auto;
 7 |   padding: 20px;
 8 |   display: flex;
 9 |   justify-content: center;
10 |   align-items: center;
11 | `;
12 | 
13 | const LabelText = styled.div`
14 |   display: inline-block;
15 |   line-height: 31xpx;
16 | `;
17 | 
18 | const Input = styled.input`
19 |   height: 25px;
20 |   width: 600px;
21 |   margin: 0 5px;
22 |   font-size: 16px;
23 |   line-height: 25px;
24 | `;
25 | 
26 | const SubmitButton = styled.input`
27 |   ${() => buttonStyle}
28 | `;
29 | 
30 | class SearchBar extends Component {
31 |   constructor(props) {
32 |     super(props);
33 |     this.state = {
34 |       input: '',
35 |     };
36 |   }
37 | 
38 |   handleChange(event) {
39 |     this.setState({ input: event.target.value });
40 |   }
41 | 
42 |   handleSubmit(event) {
43 |     const { state: { input }, props: { submit } } = this;
44 |     event.preventDefault();
45 |     submit(input);
46 |   }
47 | 
48 |   render() {
49 |     const { title, buttonTitle } = this.props;
50 | 
51 |     return (
52 |       <Wrapper onSubmit={e => this.handleSubmit(e)}>
53 |         <LabelText>{title}</LabelText>
54 |         <Input value={this.state.input} onChange={event => this.handleChange(event)} />
55 |         <SubmitButton type="submit" value={buttonTitle} />
56 |       </Wrapper>
57 |     );
58 |   }
59 | }
60 | 
61 | export default SearchBar;
62 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/service/RedisService.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.service;
 2 | 
 3 | import com.fasterxml.jackson.core.JsonProcessingException;
 4 | import com.dukenlidb.nlidb.model.UserSession;
 5 | import org.springframework.beans.factory.annotation.Autowired;
 6 | import org.springframework.beans.factory.annotation.Value;
 7 | import org.springframework.stereotype.Service;
 8 | import redis.clients.jedis.Jedis;
 9 | 
10 | import java.io.IOException;
11 | 
12 | @Service
13 | public class RedisService {
14 | 
15 |     private Jedis jedis;
16 | 
17 |     @Autowired
18 |     public RedisService(
19 |             @Value("${redis.host}") String host,
20 |             @Value("${redis.port}") int port
21 |     ) {
22 |         jedis = new Jedis(host, port);
23 |     }
24 | 
25 |     public boolean hasUser(String userId) {
26 |         return jedis.exists(userId);
27 |     }
28 | 
29 |     public void removeUser(String userId) {
30 |         jedis.del(userId);
31 |     }
32 | 
33 |     public void refreshUser(String userId) {
34 |         jedis.expire(userId, 3600 * 24);
35 |     }
36 | 
37 |     public UserSession getUserSession(String userId)
38 |             throws IOException {
39 |         String sessionStr = jedis.get(userId);
40 |         return UserSession.deserialize(sessionStr);
41 |     }
42 | 
43 |     public void setUserSession(String userId, UserSession session)
44 |             throws JsonProcessingException {
45 |         jedis.set(userId, session.serialize());
46 |         jedis.expire(userId, 3600 * 24);
47 |     }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | pipeline {
 2 |   agent any
 3 |   stages {
 4 |     stage('Test') {
 5 |       steps {
 6 |         sh '''#!/bin/bash
 7 | docker build -t nlidb/test --file=Dockerfile.test ${WORKSPACE}
 8 | docker run nlidb/test
 9 | docker rm $(docker ps -aq --filter status=exited)
10 | docker rmi $(docker images -aq --filter dangling=true)
11 |            '''
12 |       }
13 |     }
14 |     stage('Deploy') {
15 |       when {
16 |         branch 'master'
17 |       }
18 |       steps {
19 |         sh '''#!/bin/bash
20 | docker build -t nlidb/main --file=Dockerfile ${WORKSPACE}
21 | docker save -o /tmp/nlidb-main.img nlidb/main
22 | echo scping the image file...
23 | scp -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem /tmp/nlidb-main.img centos@34.231.141.223:/home/centos/
24 | echo stopping and removing the previously running nlidb container
25 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker stop $(docker ps -aq --filter ancestor=nlidb/main)'
26 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker rm $(docker ps -aq --filter ancestor=nlidb/main)'
27 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker rmi $(docker images -aq --filter dangling=true)'
28 | echo loading the new image and start the container
29 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker load -i nlidb-main.img; docker run -d -p 8080:80 nlidb/main;'
30 |            '''
31 |       }
32 |     }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/client/public/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 6 |     <meta name="theme-color" content="#000000">
 7 |     <!--
 8 |       manifest.json provides metadata used when your web app is added to the
 9 |       homescreen on Android. See https://developers.google.com/web/fundamentals/engage-and-retain/web-app-manifest/
10 |     -->
11 |     <link rel="manifest" href="%PUBLIC_URL%/manifest.json">
12 |     <link rel="shortcut icon" href="%PUBLIC_URL%/favicon.ico">
13 |     <!--
14 |       Notice the use of %PUBLIC_URL% in the tags above.
15 |       It will be replaced with the URL of the `public` folder during the build.
16 |       Only files inside the `public` folder can be referenced from the HTML.
17 | 
18 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
19 |       work correctly both with client-side routing and a non-root public URL.
20 |       Learn how to configure a non-root public URL by running `npm run build`.
21 |     -->
22 |     <title>React App</title>
23 |   </head>
24 |   <body>
25 |     <noscript>
26 |       You need to enable JavaScript to run this app.
27 |     </noscript>
28 |     <div id="root"></div>
29 |     <!--
30 |       This HTML file is a template.
31 |       If you open it directly in the browser, you will see an empty page.
32 | 
33 |       You can add webfonts, meta tags, or analytics to this file.
34 |       The build step will place the bundled scripts into the <body> tag.
35 | 
36 |       To begin the development, run `npm start` or `yarn start`.
37 |       To create a production bundle, use `npm run build` or `yarn build`.
38 |     -->
39 |   </body>
40 | </html>
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/WordSimilarity.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | import java.util.HashSet;
 4 | import java.util.Set;
 5 | 
 6 | /**
 7 |  * A class with only static methods to help calculate similarity between two words.
 8 |  * @author keping
 9 |  *
10 |  */
11 | public final class WordSimilarity {
12 | 	private WordSimilarity() { } 
13 | 	
14 | 	/**
15 | 	 * WordNet WUP similarity.
16 | 	 * @param word1
17 | 	 * @param word2
18 | 	 * @return
19 | 	 */
20 | 	private static double semanticalSimilarity(String word1, String word2, WordNet wordNet) {
21 | 		return wordNet.similarity(word1, word2);
22 | 	}
23 | 	
24 | 	/**
25 | 	 * Jaccord Coefficient
26 | 	 * @param word1
27 | 	 * @param word2
28 | 	 * @return
29 | 	 */
30 | 	private static double lexicalSimilarity(String word1, String word2) {
31 | 		Set<Character> charSet1 = new HashSet<>();
32 | 		Set<Character> charSet2 = new HashSet<>();
33 | 		Set<Character> commonSet= new HashSet<>();
34 | 		for (char c : word1.toCharArray()) { charSet1.add(c); }
35 | 		for (char c : word2.toCharArray()) { charSet2.add(c); }
36 | 		for (char c : charSet1) {
37 | 			if (charSet2.contains(c)) { commonSet.add(c); }
38 | 		}
39 | 		double jaccord = commonSet.size() / (double) (charSet1.size() +
40 | 				charSet2.size() + commonSet.size());
41 | 		return Math.sqrt(jaccord);
42 | 	}
43 | 	
44 | 	/**
45 | 	 * The similarity score between two words. This score is a combination of 
46 | 	 * semantic similarity and lexical similarity.
47 | 	 * @param word1
48 | 	 * @param word2
49 | 	 * @return similarity score between word1 and word2
50 | 	 */
51 | 	public static double getSimilarity(String word1, String word2, WordNet wordNet) {
52 | 		return Math.max(semanticalSimilarity(word1, word2, wordNet),
53 | 				lexicalSimilarity(word1, word2));
54 | 	}
55 | 	
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/service/SQLExecutionService.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.service;
 2 | 
 3 | import com.dukenlidb.nlidb.model.DBConnectionConfig;
 4 | import org.postgresql.util.PSQLException;
 5 | import org.springframework.beans.factory.annotation.Autowired;
 6 | import org.springframework.stereotype.Service;
 7 | 
 8 | import java.sql.*;
 9 | 
10 | @Service
11 | public class SQLExecutionService {
12 | 
13 |     private DBConnectionService dbConnectionService;
14 | 
15 |     @Autowired
16 |     public SQLExecutionService(DBConnectionService dbConnectionService) {
17 |         this.dbConnectionService = dbConnectionService;
18 |     }
19 | 
20 |     public String executeSQL(DBConnectionConfig config, String query)
21 |             throws SQLException {
22 |         try {
23 |             Connection conn = dbConnectionService.getConnection(config);
24 |             Statement stmt = conn.createStatement();
25 |             ResultSet rs = stmt.executeQuery(query);
26 |             ResultSetMetaData rsmd = rs.getMetaData();
27 |             int numCols = rsmd.getColumnCount();
28 |             StringBuilder sb = new StringBuilder();
29 | 
30 |             // SQL column index start from 1
31 |             for (int col = 1; col <= numCols; col++) {
32 |                 sb.append(rsmd.getColumnName(col)).append("\t");
33 |             }
34 |             sb.append("\n");
35 | 
36 |             while (rs.next()) {
37 |                 for (int col = 1; col <= numCols; col++) {
38 |                     sb.append(rs.getString(col)).append("\t");
39 |                 }
40 |                 sb.append("\n");
41 |             }
42 | 
43 |             rs.close();
44 |             stmt.close();
45 |             conn.close();
46 | 
47 |             return sb.toString();
48 |         } catch (PSQLException e) {
49 |             return e.getMessage();
50 |         }
51 |     }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/QueryTree.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | import java.util.PriorityQueue;
 4 | import java.util.HashMap;
 5 | import java.util.List;
 6 | import java.util.ArrayList;
 7 | 
 8 | public class QueryTree {
 9 | 	List<ParseTree> results;
10 | 	
11 | 	public QueryTree() {}
12 | 	
13 | 	public QueryTree (ParseTree T){
14 | 		results = new ArrayList<ParseTree>();
15 | 		PriorityQueue<ParseTree> Q = new PriorityQueue<ParseTree>();
16 | 		Q.add(T);
17 | 		HashMap<Integer, ParseTree> H = new HashMap<Integer, ParseTree>();
18 | 		H.put(hashing(T), T);
19 | 		T.setEdit(0);
20 | 		
21 | 		while (Q.size() > 0){
22 | 			ParseTree oriTree = Q.poll();
23 | 			List<ParseTree> treeList = adjuster(oriTree);
24 | 			double treeScore = evaluate(oriTree);
25 | 			
26 | 			for (int i = 0; i < treeList.size(); i++){
27 | 				ParseTree currentTree = treeList.get(i);
28 | 				int hashValue = hashing(currentTree);
29 | 				if (oriTree.getEdit()<10 && !H.containsKey(hashValue)){
30 | 					H.put(hashValue, currentTree);
31 | 					currentTree.setEdit(oriTree.getEdit()+1);
32 | 					if (evaluate(currentTree) >= treeScore){
33 | 						Q.add(currentTree);
34 | 						results.add(currentTree);
35 | 					}
36 | 				}
37 | 			}
38 | 		}
39 | 	}
40 | 	
41 | 	public List<ParseTree> adjuster (ParseTree T){
42 | 		List<ParseTree> treeList = new ArrayList<ParseTree>();
43 | 		
44 | 		//TODO: generate all possible parse trees in one subtree move operation
45 | 		
46 | 		return treeList;
47 | 	}
48 | 	
49 | 	public double evaluate (ParseTree T){
50 | 		double score = 0;
51 | 		
52 | 		//TODO: generate the evaluation criteria
53 | 		return score;
54 | 	}
55 | 	
56 | 	public int hashing (ParseTree T){
57 | 		int hashValue = 0;
58 | 		
59 | 		//TODO: how to get a reasonable hash value for each parse tree (with different node orders)
60 | 		return hashValue;
61 | 	}
62 | }
63 | 


--------------------------------------------------------------------------------
/doc/report/final/nlidb.bib:
--------------------------------------------------------------------------------
 1 | @book{cowbook,
 2 |  author = {Ramakrishnan, Raghu and Gehrke, Johannes},
 3 |  title = {Database Management Systems},
 4 |  year = {2003},
 5 |  isbn = {0072465638, 9780072465631},
 6 |  edition = {3},
 7 |  publisher = {McGraw-Hill, Inc.},
 8 |  address = {New York, NY, USA}
 9 | }
10 | 
11 | @article{li2014,
12 |   title={Constructing an interactive natural language interface for relational databases},
13 |   author={Li, Fei and Jagadish, HV},
14 |   journal={Proceedings of the VLDB Endowment},
15 |   volume={8},
16 |   number={1},
17 |   pages={73--84},
18 |   year={2014},
19 |   publisher={VLDB Endowment}
20 | }
21 | 
22 | @misc{QATutorial,
23 | 	title = {Question and Answer Tutorial},
24 | 	howpublished = {\url{https://github.com/scottyih/Slides/blob/master/QA\%20Tutorial.pdf}},
25 | 	note = {Accessed: 25-09-2016}
26 | }
27 | 
28 | @inproceedings{wu1994verbs,
29 |   title={Verbs semantics and lexical selection},
30 |   author={Wu, Zhibiao and Palmer, Martha},
31 |   booktitle={Proceedings of the 32nd annual meeting on Association for Computational Linguistics},
32 |   pages={133--138},
33 |   year={1994},
34 |   organization={Association for Computational Linguistics}
35 | }
36 | 
37 | @inproceedings{chen2014fast,
38 |   title={A Fast and Accurate Dependency Parser using Neural Networks.},
39 |   author={Chen, Danqi and Manning, Christopher D},
40 |   booktitle={EMNLP},
41 |   pages={740--750},
42 |   year={2014}
43 | }
44 | 
45 | @inproceedings{toutanova2003feature,
46 |   title={Feature-rich part-of-speech tagging with a cyclic dependency network},
47 |   author={Toutanova, Kristina and Klein, Dan and Manning, Christopher D and Singer, Yoram},
48 |   booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1},
49 |   pages={173--180},
50 |   year={2003},
51 |   organization={Association for Computational Linguistics}
52 | }


--------------------------------------------------------------------------------
/doc/report/midterm/nlidb.bib:
--------------------------------------------------------------------------------
 1 | @book{cowbook,
 2 |  author = {Ramakrishnan, Raghu and Gehrke, Johannes},
 3 |  title = {Database Management Systems},
 4 |  year = {2003},
 5 |  isbn = {0072465638, 9780072465631},
 6 |  edition = {3},
 7 |  publisher = {McGraw-Hill, Inc.},
 8 |  address = {New York, NY, USA}
 9 | }
10 | 
11 | @article{li2014,
12 |   title={Constructing an interactive natural language interface for relational databases},
13 |   author={Li, Fei and Jagadish, HV},
14 |   journal={Proceedings of the VLDB Endowment},
15 |   volume={8},
16 |   number={1},
17 |   pages={73--84},
18 |   year={2014},
19 |   publisher={VLDB Endowment}
20 | }
21 | 
22 | @misc{QATutorial,
23 | 	title = {Question and Answer Tutorial},
24 | 	howpublished = {\url{https://github.com/scottyih/Slides/blob/master/QA\%20Tutorial.pdf}},
25 | 	note = {Accessed: 25-09-2016}
26 | }
27 | 
28 | @inproceedings{wu1994verbs,
29 |   title={Verbs semantics and lexical selection},
30 |   author={Wu, Zhibiao and Palmer, Martha},
31 |   booktitle={Proceedings of the 32nd annual meeting on Association for Computational Linguistics},
32 |   pages={133--138},
33 |   year={1994},
34 |   organization={Association for Computational Linguistics}
35 | }
36 | 
37 | @inproceedings{chen2014fast,
38 |   title={A Fast and Accurate Dependency Parser using Neural Networks.},
39 |   author={Chen, Danqi and Manning, Christopher D},
40 |   booktitle={EMNLP},
41 |   pages={740--750},
42 |   year={2014}
43 | }
44 | 
45 | @inproceedings{toutanova2003feature,
46 |   title={Feature-rich part-of-speech tagging with a cyclic dependency network},
47 |   author={Toutanova, Kristina and Klein, Dan and Manning, Christopher D and Singer, Yoram},
48 |   booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1},
49 |   pages={173--180},
50 |   year={2003},
51 |   organization={Association for Computational Linguistics}
52 | }


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/ParserDemo.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | import java.io.StringReader;
 4 | import java.util.List;
 5 | 
 6 | import edu.stanford.nlp.ling.HasWord;
 7 | import edu.stanford.nlp.ling.TaggedWord;
 8 | import edu.stanford.nlp.parser.nndep.DependencyParser;
 9 | import edu.stanford.nlp.process.DocumentPreprocessor;
10 | import edu.stanford.nlp.tagger.maxent.MaxentTagger;
11 | import edu.stanford.nlp.trees.GrammaticalStructure;
12 | import edu.stanford.nlp.util.logging.Redwood;
13 | 
14 | public class ParserDemo {
15 | 
16 | 	/** A logger for this class */
17 | 	private static Redwood.RedwoodChannels log = Redwood.channels(ParserDemo.class);
18 | 
19 | 	public static void main(String[] args) {
20 | 		String modelPath = DependencyParser.DEFAULT_MODEL;
21 | 		String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
22 | 
23 | 		for (int argIndex = 0; argIndex < args.length;) {
24 | 			switch (args[argIndex]) {
25 | 			case "-tagger":
26 | 				taggerPath = args[argIndex + 1];
27 | 				argIndex += 2;
28 | 				break;
29 | 			case "-com.dukenlidb.nlidb.model":
30 | 				modelPath = args[argIndex + 1];
31 | 				argIndex += 2;
32 | 				break;
33 | 			default:
34 | 				throw new RuntimeException("Unknown argument " + args[argIndex]);
35 | 			}
36 | 		}
37 | 
38 | 		String text = "Return authors who have more papers than Bob in VLDB after 2000";
39 | 
40 | 		MaxentTagger tagger = new MaxentTagger(taggerPath);
41 | 		DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
42 | 
43 | 		DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
44 | 		for (List<HasWord> sentence : tokenizer) {
45 | 			List<TaggedWord> tagged = tagger.tagSentence(sentence);
46 | 			GrammaticalStructure gs = parser.predict(tagged);
47 | 
48 | 			// Print typed dependencies
49 | 			log.info(gs);
50 | 		}
51 | 		
52 | 	}
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/ImplicitNodeTest.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | public class ImplicitNodeTest {
 4 | 
 5 | 	public void test() {
 6 | 		
 7 | 		ParseTree tree = new ParseTree();
 8 | 		Node[] nodes = new Node[20];
 9 | 
10 | 		nodes[0] = new Node(0, "ROOT", "ROOT");
11 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
12 | 
13 | 		nodes[1] = new Node(1, "return", "--"); // posTag not useful
14 | 		nodes[1].info = new NodeInfo("SN", "SELECT");
15 | 
16 | 		nodes[2] = new Node(2, "author", "--");
17 | 		nodes[2].info = new NodeInfo("NN", "authorship.author");
18 | 
19 | 		nodes[3] = new Node(3, "more", "--");
20 | 		nodes[3].info = new NodeInfo("ON", ">");
21 | 
22 | 		nodes[4] = new Node(4, "paper", "--");
23 | 		nodes[4].info = new NodeInfo("NN", "in.pubkey");
24 | 
25 | 		nodes[5] = new Node(5, "VLDB", "--");
26 | 		nodes[5].info = new NodeInfo("VN", "in.area");
27 | 
28 | 		nodes[6] = new Node(6, "after", "--");
29 | 		nodes[6].info = new NodeInfo("ON", ">");
30 | 
31 | 		nodes[7] = new Node(7, "2000", "--");
32 | 		nodes[7].info = new NodeInfo("VN", "in.year");
33 | 
34 | 		nodes[8] = new Node(8, "Bob", "--");
35 | 		nodes[8].info = new NodeInfo("VN", "authorship.author");
36 | 		
37 | 		tree.root = nodes[0];
38 | 		tree.root.setChild(nodes[1]);
39 | 		tree.root.setChild(nodes[3]);
40 | 		nodes[1].setParent(nodes[0]);
41 | 		nodes[3].setParent(nodes[0]);
42 | 
43 | 		nodes[1].setChild(nodes[2]);
44 | 		nodes[2].setParent(nodes[1]);
45 | 
46 | 		nodes[3].setChild(nodes[4]);
47 | 		nodes[3].setChild(nodes[8]);
48 | 		nodes[4].setParent(nodes[3]);
49 | 		nodes[8].setParent(nodes[3]);
50 | 
51 | 		nodes[4].setChild(nodes[5]);
52 | 		nodes[4].setChild(nodes[6]);
53 | 		nodes[5].setParent(nodes[4]);
54 | 		nodes[6].setParent(nodes[4]);
55 | 
56 | 		nodes[6].setChild(nodes[7]);
57 | 		nodes[7].setParent(nodes[6]);
58 | 		
59 | 		System.out.println("Before");
60 | 		System.out.println(tree.toString());
61 | 		
62 | 		tree.insertImplicitNodes();
63 | 		
64 | 		System.out.println("After");
65 | 		System.out.println(tree.toString());
66 | 		
67 | 	}
68 | 	
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/IParseTree.java:
--------------------------------------------------------------------------------
 1 | package com.dukenlidb.nlidb.archive.model;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | /**
 6 |  * An interface for a parse tree. 
 7 |  * @author keping
 8 |  *
 9 |  */
10 | public interface IParseTree extends Iterable<Node> {
11 | 
12 | 	/**
13 | 	 * Get size of the ParseTree, including the root Node.
14 | 	 * @return number of nodes
15 | 	 */
16 | 	public int size();
17 | 	
18 | 	/**
19 | 	 * Return the number of edit of the ParseTree.
20 | 	 */
21 | 	public int getEdit();
22 | 	
23 | 	/**
24 | 	 * Set the number of edit of the ParseTree.
25 | 	 */
26 | 	public void setEdit(int edit);
27 | 	
28 | 	/**
29 | 	 * Restructure the parse tree by removing meaningless nodes.
30 | 	 * The Node object should contain information indicating whether
31 | 	 * it is meaningful. It is meaningful if it corresponds to an SQL component.
32 | 	 */
33 | 	public void removeMeaninglessNodes();
34 | 	
35 | 	/**
36 | 	 * Restructure the parse tree by merging Logic Nodes and Quantifier Nodes with their parents.
37 | 	 */
38 | 	public ParseTree mergeLNQN();
39 | 	
40 | 	/**
41 | 	 * Insert implicit nodes, mostly about the symmetry for comparison.
42 | 	 */
43 | 	public void insertImplicitNodes();
44 | 	
45 | 	/**
46 | 	 * Get a list of structurally adjusted parse trees.
47 | 	 * @return a list of adjusted trees
48 | 	 */
49 | 	public List<ParseTree> getAdjustedTrees();
50 | 	
51 | 	/**
52 | 	 * Translate the parse tree into an SQL translateNL.
53 | 	 * @return
54 | 	 */
55 | 	public SQLQuery translateToSQL(SchemaGraph schema);
56 | 	
57 | 	/**
58 | 	 * Convert the tree to a String for easier debugging.
59 | 	 * @return string representation
60 | 	 */
61 | 	public String toString();
62 | 	
63 | 	/**
64 | 	 * Check equality for two IParseTree objects, for searching 
65 | 	 * them in a HashMap.
66 | 	 * @param other
67 | 	 * @return true if they are equal
68 | 	 */
69 | 	public boolean equals(Object obj);
70 | 	
71 | 	/**
72 | 	 * Get the hashCode for the parse tree. So that trees can be 
73 | 	 * stored in a HashMap and equal trees can be seen as one.
74 | 	 * @return hashCode for the object
75 | 	 */
76 | 	public int hashCode();
77 | }
78 | 


--------------------------------------------------------------------------------
/client/src/common/form.js:
--------------------------------------------------------------------------------
 1 | import React, { Component } from 'react';
 2 | import styled from 'styled-components';
 3 | import buttonStyle from '../styles/button';
 4 | import newId from '../utils/new-id';
 5 | 
 6 | const Wrapper = styled.form`
 7 |   margin: 20px;
 8 |   width: 230px;
 9 | `;
10 | 
11 | const FormItem = styled.div`
12 |   display: flex;
13 |   justify-content: flex-end;
14 |   margin-top: 20px;
15 | `;
16 | const SubmitFormItem = FormItem.extend`
17 |   justify-content: center;
18 |   margin-top: 30px;
19 | `;
20 | 
21 | const LabelText = styled.label`
22 |   padding-right: 5px;
23 | `;
24 | 
25 | const SubmitInput = styled.input`
26 |   ${() => buttonStyle}
27 | `;
28 | 
29 | const Message = styled.div`
30 |   text-align: center;
31 | `;
32 | 
33 | class Form extends Component {
34 | 
35 |   constructor(props) {
36 |     super(props);
37 |     const { fields } = this.props;
38 |     this.state = fields.reduce((obj, field) => (
39 |       Object.assign(obj, { [field.name]: field.initialValue || '' })
40 |     ), {});
41 |   }
42 | 
43 |   handleSubmit(event) {
44 |     event.preventDefault();
45 |     this.props.button.submit(this.state);
46 |   }
47 | 
48 |   render() {
49 |     const { fields, message, button } = this.props;
50 |     const ids = fields.map(() => newId());
51 |     return (
52 |       <Wrapper onSubmit={e => this.handleSubmit(e)}>
53 |         {message ? <Message>{message}</Message> : null}
54 |         {fields.map((field, idx) => (
55 |           <FormItem key={idx}>
56 |             <label htmlFor={ids[idx]}>
57 |               <LabelText>{field.displayName || field.name}:</LabelText>
58 |               <input
59 |                 id={ids[idx]}
60 |                 type={field.type || 'text'}
61 |                 value={this.state[field.name]}
62 |                 onChange={(event) => {
63 |                   this.setState({
64 |                     [field.name]: event.target.value,
65 |                   });
66 |                 }}
67 |               />
68 |             </label>
69 |           </FormItem>
70 |         ))}
71 |         <SubmitFormItem>
72 |           <SubmitInput type="submit" value={button.name} />
73 |         </SubmitFormItem>
74 |       </Wrapper>
75 |     );
76 |   }
77 | 
78 | }
79 | 
80 | export default Form;
81 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/client/src/app/sagas.js:
--------------------------------------------------------------------------------
 1 | import { call, put, takeLatest } from 'redux-saga/effects';
 2 | import * as requests from '../requests';
 3 | import * as actions from './actions';
 4 | 
 5 | function* handleConnectUserSession() {
 6 |   const res = yield call(requests.connectUserSession);
 7 |   const { success, databaseUrl } = yield res.json();
 8 |   if (success) {
 9 |     yield put(actions.setAppState({
10 |       connected: true,
11 |       databaseUrl,
12 |     }));
13 |   }
14 | }
15 | 
16 | function* handleDisconnect() {
17 |   const res = yield call(requests.disconnect);
18 |   if (res.status === 200) {
19 |     yield put(actions.setAppState({
20 |       connected: false,
21 |       connectErrorMsg: null,
22 |       databaseUrl: null,
23 |       translateResult: null,
24 |       queryResult: null,
25 |     }));
26 |   }
27 | }
28 | 
29 | function* handleConnectToDB(action) {
30 |   const res = yield call(requests.connectToDB, action.payload);
31 |   const body = yield res.json();
32 |   if (res.status === 200) {
33 |     yield put(actions.setAppState({
34 |       connected: true,
35 |       connectErrorMsg: null,
36 |       databaseUrl: body.databaseUrl,
37 |     }));
38 |   } else {
39 |     yield put(actions.setAppState({
40 |       connectErrorMsg: body.message,
41 |     }));
42 |   }
43 | }
44 | 
45 | function* handleConnectToDemoDB() {
46 |   const res = yield call(requests.connectToDB, {
47 |     host: 'db-nlidb.cjna4ta2n7it.us-east-1.rds.amazonaws.com',
48 |     port: 5432,
49 |     database: 'demodb',
50 |     username: 'keping',
51 |     password: 'kepingwang'
52 |   });
53 |   const body = yield res.json();
54 |   if (res.status === 200) {
55 |     yield put(actions.setAppState({
56 |       connected: true,
57 |       connectErrorMsg: null,
58 |       databaseUrl: body.databaseUrl,
59 |     }));
60 |   } else {
61 |     yield put(actions.setAppState({
62 |       connectErrorMsg: body.message,
63 |     }));
64 |   }
65 | }
66 | 
67 | function* handleTranslateNL(action) {
68 |   const res = yield call(requests.translateNL, action.payload);
69 |   const body = yield res.json();
70 |   yield put(actions.setAppState({
71 |     translateResult: body.translateResult,
72 |   }));
73 | }
74 | 
75 | function* handleExecuteSQL(action) {
76 |   const res = yield call(requests.executeSQL, action.payload);
77 |   const body = yield res.json();
78 |   yield put(actions.setAppState({
79 |     queryResult: body.queryResult,
80 |   }));
81 | }
82 | 
83 | function* sagas() {
84 |   yield takeLatest(actions.CONNECT_USER_SESSION, handleConnectUserSession);
85 |   yield takeLatest(actions.DISCONNECT, handleDisconnect);
86 |   yield takeLatest(actions.CONNECT_TO_DB, handleConnectToDB);
87 |   yield takeLatest(actions.CONNECT_TO_DEMODB, handleConnectToDemoDB);
88 |   yield takeLatest(actions.TRANSLATE_NL, handleTranslateNL);
89 |   yield takeLatest(actions.EXECUTE_SQL, handleExecuteSQL);
90 | }
91 | 
92 | export default sagas;
93 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Natural Language Interface to DataBases (NLIDB)
 2 | 
 3 | ### [The final report is here](./doc/report/final/final.pdf).
 4 | 
 5 | ### How it works.
 6 | 1. Parse the input and map nodes to SQL components and database attributes.
 7 | ![nodes_mapping_example](./doc/report/final/figures/nodes_mapping_example.png)
 8 | ![gui_nodes_mapping](./doc/report/final/figures/gui_nodes_mapping.png)
 9 | 
10 | 2. Adjust the structure of the parse tree to make it syntactically valid.
11 | ![gui_tree_adjustor](./doc/report/final/figures/gui_tree_adjustor1.png)
12 | 
13 | 3. Translate the parse tree to an SQL query.
14 | ![gui_translation](./doc/report/final/figures/gui_translation.png)
15 | 
16 | ******
17 | 
18 | change some thing to set CI
19 | 
20 | ### Grammar rules of syntactically valid parse trees:
21 | 
22 | 1. Q -> (SClause)(ComplexCondition)\*
23 | 2. SClause -> SELECT + GNP
24 | 3. ComplexCondition -> ON + (leftSubtree\*rightSubtree)
25 | 4. leftSubtree -> GNP
26 | 5. rightSubtree -> GNP | VN | MIN | MAX
27 | 6. GNP -> (FN + GNP) | NP
28 | 7. NP -> NN + (NN)\*(condition)\*
29 | 8. condition -> VN | (ON + VN)
30 | 
31 | Note:  
32 | All terminal nodes are defined in the paper.  
33 | \+ represents a parent-child relationship.  
34 | \* represents a sibling relationship.  
35 | One Query (Q) can must have one SClause and zero or more ComplexConditions.  
36 | A ComplexCondition must have one ON, with a leftSubtree and a rightSubtree.  
37 | An NP is: one NN (since an SQL query has to select at least one attribute), whose children
38 | are multiple NNs and Conditions. (All other selected attributes and conditions are stacked
39 | here to form a wide "NP" tree.)    
40 | 
41 | *****
42 | 
43 | ### For developers:
44 | 
45 | This is a project managed using maven. Just in case, if you don't know about maven, checkout this wonderful [tutorial](https://www.udemy.com/apachemaven/), which you have to pay for though...
46 | 
47 | Right now it uses the dblp database on local machine. To connect to the database, make sure you have database "dblp" on your localhost with post 5432, accessible to user "dblpuser" with password "dblpuser". Or modify the `startConnection()` method in class `Controller` to connect to database.
48 | 
49 | To get hands on the development, import it into eclipse, but first make sure you've installed the following eclipse plugins:
50 | 
51 | 1. m2eclipse (for using maven in eclipse)
52 | 2. e(fx)clipse (for using javafx smoothly in eclipse)
53 | 
54 | To use WordNet inside the project (I'm using MIT JWI as the interface, which is already included in maven `pom.xml`):
55 | 
56 | 1. Create a folder "lib" in the project base directory.
57 | 2. Download [WordNet](https://wordnet.princeton.edu/wordnet/download/) into that "lib" directory just created.
58 | 3. Extract the downloaded WordNet. 
59 | 4. Finally just make sure "$(basedir)/lib/WordNet-3.0/dict/" exists. (Or you have to modify the path inside class `com.dukenlidb.nlidb.model.WordNet`.)
60 | 
61 | The entry point of the application is the `com.dukenlidb.nlidb.main()` method in `UserView` class. 
62 | 


--------------------------------------------------------------------------------
/client/src/app/app.js:
--------------------------------------------------------------------------------
  1 | import React, { Component } from 'react';
  2 | import styled from 'styled-components';
  3 | import Form from '../common/form';
  4 | import SearchBar from './components/search-bar';
  5 | import buttonStyle from '../styles/button';
  6 | 
  7 | const Wrapper = styled.div`
  8 |   max-width: 1200px;
  9 |   margin: auto;
 10 | `;
 11 | 
 12 | const Title = styled.h1`
 13 |   text-align: center;
 14 | `;
 15 | 
 16 | const ResultText = styled.div`
 17 |   margin: 0 auto;
 18 |   width: 800px;
 19 | `;
 20 | 
 21 | const Text = styled.div`
 22 |   text-align: center;
 23 |   margin: 0 5px;
 24 | `;
 25 | 
 26 | const Button = styled.button`
 27 |   ${() => buttonStyle}
 28 | `;
 29 | 
 30 | const StatusBar = styled.div`
 31 |   display: flex;
 32 |   justify-content: center;
 33 |   align-items: center;
 34 | `;
 35 | 
 36 | const VerticalCenterDiv = styled.div`
 37 |   display: flex;
 38 |   flex-direction: column;
 39 |   align-items: center;
 40 | `;
 41 | 
 42 | class App extends Component {
 43 | 
 44 |   componentDidMount() {
 45 |     const { connectUserSession } = this.props;
 46 |     connectUserSession();
 47 |   }
 48 | 
 49 |   render() {
 50 |     const {
 51 |       connected, connectErrorMsg, databaseUrl, translateResult, queryResult,
 52 |       disconnect, connectToDB, connectToDemoDB, translateNL, executeSQL,
 53 |     } = this.props;
 54 | 
 55 |     return (
 56 |       <Wrapper>
 57 |         <Title>Natural Language Interface to DataBases</Title>
 58 |         {
 59 |           connected
 60 |             ? (
 61 |               <div>
 62 |                 <StatusBar>
 63 |                   <Text>connected to {databaseUrl}</Text>
 64 |                   <Button onClick={disconnect}>disconnect</Button>
 65 |                 </StatusBar>
 66 |                 <SearchBar
 67 |                   title={'Natural Language Input:'}
 68 |                   submit={input => translateNL({ input })}
 69 |                   buttonTitle={'translate'}
 70 |                 />
 71 |                 <ResultText>{translateResult}</ResultText>
 72 |                 <SearchBar
 73 |                   title={'SQL Query:'}
 74 |                   submit={input => executeSQL({ query: input })}
 75 |                   buttonTitle={'execute'}
 76 |                 />
 77 |                 <ResultText>
 78 |                   <pre>{queryResult}</pre>
 79 |                 </ResultText>
 80 |               </div>
 81 |             )
 82 |             : (
 83 |               <VerticalCenterDiv>
 84 |                 <Button onClick={connectToDemoDB}>connect to demo DB</Button>
 85 |                 <Form
 86 |                   key={1}
 87 |                   message={connectErrorMsg}
 88 |                   fields={[
 89 |                     { name: 'host' },
 90 |                     { name: 'port' },
 91 |                     { name: 'database' },
 92 |                     { name: 'username' },
 93 |                     { name: 'password', type: 'password' },
 94 |                   ]}
 95 |                   button={{
 96 |                     name: 'connect',
 97 |                     submit: connectToDB,
 98 |                   }}
 99 |                 />
100 |               </VerticalCenterDiv>
101 |             )
102 |         }
103 |       </Wrapper>
104 |     );
105 |   }
106 | }
107 | 
108 | export default App;
109 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SQLQuery.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collection;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.List;
  8 | import java.util.Map;
  9 | 
 10 | /**
 11 |  * Just a wrapper for a String of sql translateNL.
 12 |  * @author keping
 13 |  */
 14 | public class SQLQuery {
 15 | 	private List<SQLQuery> blocks;
 16 | 	private Map<String, Collection<String>> map;
 17 | 	
 18 | 	SQLQuery() {
 19 | 		map = new HashMap<>();
 20 | 		map.put("SELECT", new ArrayList<String>());
 21 | 		map.put("FROM", new HashSet<String>());
 22 | 		map.put("WHERE", new HashSet<String>());
 23 | 		blocks = new ArrayList<SQLQuery>();
 24 | 	}
 25 | 
 26 | 	@Deprecated
 27 | 	public SQLQuery(String s) {
 28 | 		
 29 | 	}
 30 | 	
 31 | 	/**
 32 | 	 * Get the String translateNL insides the SQLQuery.
 33 | 	 * @return
 34 | 	 */
 35 | 	String get() { return toString(); }
 36 | 	
 37 | 	public void addBlock(SQLQuery query) {
 38 | 		blocks.add(query);
 39 | 		add("FROM", "BLOCK"+blocks.size());
 40 | 	}
 41 | 	
 42 | 	Collection<String> getCollection(String keyWord) { return map.get(keyWord); }
 43 | 	
 44 | 	/**
 45 | 	 * Add (key, value) to the SQL Query.
 46 | 	 * For example, (SELECT, article.title) or (FROM, article).
 47 | 	 * @param key
 48 | 	 * @param val
 49 | 	 */
 50 | 	void add(String key, String value) {
 51 | 		map.get(key).add(value);
 52 | 	}
 53 | 	
 54 | 	
 55 | 	/**
 56 | 	 * Serve for the toString() method.
 57 | 	 * @param SELECT (or FROM)
 58 | 	 * @return one line of arguments of that translateNL (SELECT, FROM)
 59 | 	 */
 60 | 	private StringBuilder toSBLine(Collection<String> SELECT) {
 61 | 		StringBuilder sb = new StringBuilder();
 62 | 		for (String val : SELECT) {
 63 | 			if (sb.length() == 0) {
 64 | 				sb.append(val);
 65 | 			} else {
 66 | 				sb.append(", ").append(val);
 67 | 			}
 68 | 		}
 69 | 		return sb;
 70 | 	}
 71 | 	
 72 | 	/**
 73 | 	 * Similar to {@link #toSBLine(Collection)}, but that incorporates
 74 | 	 * the information of "AND" and "OR".
 75 | 	 * @param WHERE
 76 | 	 * @return
 77 | 	 */
 78 | 	private StringBuilder toSBLineCondition(Collection<String> WHERE) {
 79 | 		StringBuilder sb = new StringBuilder();
 80 | 		for (String val : WHERE) {
 81 | 			if (sb.length() == 0) {
 82 | 				sb.append(val);
 83 | 			} else {
 84 | 				// currently only allow for "AND"
 85 | 				// TODO: add "OR"
 86 | 				sb.append(" AND ").append(val);
 87 | 			}
 88 | 		}
 89 | 		return sb;
 90 | 	}
 91 | 	
 92 | 	@Override
 93 | 	public String toString() {
 94 | 		if (map.get("SELECT").isEmpty() || map.get("FROM").isEmpty()) {
 95 | 			return "Illegal Query"; 
 96 | 		}
 97 | 		StringBuilder sb = new StringBuilder();
 98 | 		for (int i = 0; i < blocks.size(); i++) {
 99 | 			sb.append("BLOCK"+(i+1)+":").append("\n");
100 | 			sb.append(blocks.get(i).toString()).append("\n");
101 | 			sb.append("\n");
102 | 		}
103 | 		sb.append("SELECT ").append(toSBLine(map.get("SELECT"))).append("\n");
104 | 		sb.append("FROM ").append(toSBLine(map.get("FROM"))).append("\n");
105 | 		if (!map.get("WHERE").isEmpty()) {
106 | 			sb.append("WHERE ").append(toSBLineCondition(map.get("WHERE"))).append("\n");
107 | 		}
108 | 		sb.append(";\n");
109 | 		return sb.toString();
110 | 	}
111 | 	
112 | 	
113 | }
114 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/NodeInfo.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.Comparator;
  4 | 
  5 | /**
  6 |  * Immutable class indicating the SQL component for a Node.
  7 |  * @author keping
  8 |  *
  9 |  */
 10 | public class NodeInfo {
 11 | 	// TODO: all fields should be private in final version.
 12 | 	private String type; 
 13 | 	private String value;
 14 | 	/**
 15 | 	 * Similarity score of the Node to the column/table name in schema.
 16 | 	 */
 17 | 	private double score = 1.0;
 18 | 	
 19 | 	public NodeInfo(String type, String value) {
 20 | 		this.type = type;
 21 | 		this.value = value;
 22 | 	}
 23 | 	public NodeInfo(String type, String value, double score) {
 24 | 		this(type, value);
 25 | 		this.score = score;
 26 | 	}
 27 | 	public NodeInfo(NodeInfo ni){
 28 | 		this.type = ni.type;
 29 | 		this.value = ni.value;
 30 | 		this.score = ni.score;
 31 | 	}
 32 | 	@Override
 33 | 	public String toString() {
 34 | 		return type+": "+value;
 35 | 	}
 36 | 	public String getType() { return type; }
 37 | 	public String getValue() {
 38 | 		return value;
 39 | 	}
 40 | 	
 41 | 	public double getScore(){
 42 | 		return score;
 43 | 	}
 44 | 	
 45 | 	public static class ReverseScoreComparator implements Comparator<NodeInfo> {
 46 | 		@Override
 47 | 		public int compare(NodeInfo a, NodeInfo b) {
 48 | 			if (a.score < b.score) { return 1; }
 49 | 			else if (a.score > b.score) { return -1; }
 50 | 			else { return 0; }
 51 | 		}
 52 | 	}
 53 | 
 54 | 	@Override
 55 | 	public int hashCode() {
 56 | 		final int prime = 31;
 57 | 		int result = 1;
 58 | 		result = prime * result + ((type == null) ? 0 : type.hashCode());
 59 | 		result = prime * result + ((value == null) ? 0 : value.hashCode());
 60 | 		return result;
 61 | 	}
 62 | 	@Override
 63 | 	public boolean equals(Object obj) {
 64 | 		if (this == obj)
 65 | 			return true;
 66 | 		if (obj == null)
 67 | 			return false;
 68 | 		if (getClass() != obj.getClass())
 69 | 			return false;
 70 | 		NodeInfo other = (NodeInfo) obj;
 71 | 		if (type == null) {
 72 | 			if (other.type != null)
 73 | 				return false;
 74 | 		} else if (!type.equals(other.type))
 75 | 			return false;
 76 | 		if (value == null) {
 77 | 			if (other.value != null)
 78 | 				return false;
 79 | 		} else if (!value.equals(other.value))
 80 | 			return false;
 81 | 		return true;
 82 | 	}
 83 | 
 84 | 	public boolean ExactSameSchema (NodeInfo other) {
 85 | 
 86 | 		if (type == null || other.getType() == null || value == null || other.getValue() == null) {
 87 | 			return false;
 88 | 		}
 89 | 
 90 | 		if (type.equals(other.getType()) && value.equals(other.getValue())) {
 91 | 
 92 | 			return true;
 93 | 		}
 94 | 
 95 | 		return false;
 96 | 	}
 97 | 
 98 | 	public boolean sameSchema (NodeInfo other) {
 99 | 
100 | 		if (type == null || other.getType() == null || value == null || other.getValue() == null) {
101 | 			return false;
102 | 		}
103 | 
104 | 		int indexOfDot_Other = other.getValue().indexOf('.');
105 | 
106 | 		int indexOfDot = value.indexOf('.');
107 | 
108 | 		if (indexOfDot_Other == -1) {
109 | 
110 | 			indexOfDot_Other = other.getValue().length();
111 | 		}
112 | 
113 | 		if (indexOfDot == -1) {
114 | 
115 | 			indexOfDot = value.length();
116 | 		}
117 | 
118 | 		if (other.getValue().substring(0, indexOfDot_Other - 1)
119 | 			.equals(value.substring(0, indexOfDot - 1))) {
120 | 
121 | 			return true;
122 | 		}
123 | 
124 | 
125 | 		return false;
126 | 	}
127 | 	
128 | }
129 | 


--------------------------------------------------------------------------------
/client/src/utils/registerServiceWorker.js:
--------------------------------------------------------------------------------
  1 | // In production, we register a com.dukenlidb.nlidb.service worker to serve assets from local cache.
  2 | 
  3 | // This lets the app load faster on subsequent visits in production, and gives
  4 | // it offline capabilities. However, it also means that developers (and users)
  5 | // will only see deployed updates on the "N+1" visit to a page, since previously
  6 | // cached resources are updated in the background.
  7 | 
  8 | // To learn more about the benefits of this com.dukenlidb.nlidb.model, read https://goo.gl/KwvDNy.
  9 | // This link also includes instructions on opting out of this behavior.
 10 | 
 11 | const isLocalhost = Boolean(
 12 |   window.location.hostname === 'localhost' ||
 13 |     // [::1] is the IPv6 localhost address.
 14 |     window.location.hostname === '[::1]' ||
 15 |     // 127.0.0.1/8 is considered localhost for IPv4.
 16 |     window.location.hostname.match(
 17 |       /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
 18 |     )
 19 | );
 20 | 
 21 | export default function register() {
 22 |   if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
 23 |     // The URL constructor is available in all browsers that support SW.
 24 |     const publicUrl = new URL(process.env.PUBLIC_URL, window.location);
 25 |     if (publicUrl.origin !== window.location.origin) {
 26 |       // Our com.dukenlidb.nlidb.service worker won't work if PUBLIC_URL is on a different origin
 27 |       // from what our page is served on. This might happen if a CDN is used to
 28 |       // serve assets; see https://github.com/facebookincubator/create-react-app/issues/2374
 29 |       return;
 30 |     }
 31 | 
 32 |     window.addEventListener('load', () => {
 33 |       const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
 34 | 
 35 |       if (!isLocalhost) {
 36 |         // Is not local host. Just register com.dukenlidb.nlidb.service worker
 37 |         registerValidSW(swUrl);
 38 |       } else {
 39 |         // This is running on localhost. Lets check if a com.dukenlidb.nlidb.service worker still exists or not.
 40 |         checkValidServiceWorker(swUrl);
 41 |       }
 42 |     });
 43 |   }
 44 | }
 45 | 
 46 | function registerValidSW(swUrl) {
 47 |   navigator.serviceWorker
 48 |     .register(swUrl)
 49 |     .then(registration => {
 50 |       registration.onupdatefound = () => {
 51 |         const installingWorker = registration.installing;
 52 |         installingWorker.onstatechange = () => {
 53 |           if (installingWorker.state === 'installed') {
 54 |             if (navigator.serviceWorker.controller) {
 55 |               // At this point, the old content will have been purged and
 56 |               // the fresh content will have been added to the cache.
 57 |               // It's the perfect time to display a "New content is
 58 |               // available; please refresh." message in your web app.
 59 |               console.log('New content is available; please refresh.');
 60 |             } else {
 61 |               // At this point, everything has been precached.
 62 |               // It's the perfect time to display a
 63 |               // "Content is cached for offline use." message.
 64 |               console.log('Content is cached for offline use.');
 65 |             }
 66 |           }
 67 |         };
 68 |       };
 69 |     })
 70 |     .catch(error => {
 71 |       console.error('Error during com.dukenlidb.nlidb.service worker registration:', error);
 72 |     });
 73 | }
 74 | 
 75 | function checkValidServiceWorker(swUrl) {
 76 |   // Check if the com.dukenlidb.nlidb.service worker can be found. If it can't reload the page.
 77 |   fetch(swUrl)
 78 |     .then(response => {
 79 |       // Ensure com.dukenlidb.nlidb.service worker exists, and that we really are getting a JS file.
 80 |       if (
 81 |         response.status === 404 ||
 82 |         response.headers.get('content-type').indexOf('javascript') === -1
 83 |       ) {
 84 |         // No com.dukenlidb.nlidb.service worker found. Probably a different app. Reload the page.
 85 |         navigator.serviceWorker.ready.then(registration => {
 86 |           registration.unregister().then(() => {
 87 |             window.location.reload();
 88 |           });
 89 |         });
 90 |       } else {
 91 |         // Service worker found. Proceed as normal.
 92 |         registerValidSW(swUrl);
 93 |       }
 94 |     })
 95 |     .catch(() => {
 96 |       console.log(
 97 |         'No internet connection found. App is running in offline mode.'
 98 |       );
 99 |     });
100 | }
101 | 
102 | export function unregister() {
103 |   if ('serviceWorker' in navigator) {
104 |     navigator.serviceWorker.ready.then(registration => {
105 |       registration.unregister();
106 |     });
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/NodeMapper.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.List;
  8 | import java.util.Map;
  9 | import java.util.Set;
 10 | 
 11 | /**
 12 |  * A class to help map word {@link Node} in {@link ParseTree}
 13 |  * to SQL components (represented by class {@link NodeInfo}).
 14 |  * @author keping
 15 |  *
 16 |  */
 17 | public class NodeMapper {
 18 | 	private WordNet wordNet;
 19 | 	/**
 20 | 	 * Key is the word. Value is the corresponding SQL component.
 21 | 	 * For example: ("return", ("SN", "SELECT"))
 22 | 	 */
 23 | 	private Map<String, NodeInfo> map;
 24 | 	
 25 | 	
 26 | 	/**
 27 | 	 * Initialize the NodeMapper. (The mapper could be made configurable. It can also initialize
 28 | 	 * by reading mappings from a file)
 29 | 	 * @throws Exception 
 30 | 	 */
 31 | 	public NodeMapper() throws Exception {
 32 | 		wordNet = new WordNet();
 33 | 		map = new HashMap<String, NodeInfo>();
 34 | 		map.put("return", new NodeInfo("SN", "SELECT")); // Select Node
 35 | 		
 36 | 		map.put("equals", new NodeInfo("ON", "="));		 // Operator Node
 37 | 		map.put("less",    new NodeInfo("ON", "<"));
 38 | 		map.put("greater",    new NodeInfo("ON", ">"));
 39 | 		map.put("not",    new NodeInfo("ON", "!="));    //TODO: not is a operator node or logic node?
 40 | 		map.put("before", new NodeInfo("ON", "<"));
 41 | 		map.put("after", new NodeInfo("ON", ">"));
 42 | 		map.put("more",    new NodeInfo("ON", ">"));
 43 | 		map.put("older",    new NodeInfo("ON", ">"));
 44 | 		map.put("newer", new NodeInfo("ON", "<"));
 45 | 		
 46 | 		map.put("fn",     new NodeInfo("FN", "AVG"));	 // Function Node
 47 | 		map.put("average",     new NodeInfo("FN", "AVG"));
 48 | 		map.put("most",     new NodeInfo("FN", "MAX"));
 49 | 		map.put("total",     new NodeInfo("FN", "SUM"));
 50 | 		map.put("number", new NodeInfo("FN","COUNT"));
 51 | 
 52 | 		map.put("all",    new NodeInfo("QN", "ALL"));	 // Quantifier Node
 53 | 		map.put("any",    new NodeInfo("QN", "ANY"));
 54 | 		map.put("each",    new NodeInfo("QN", "EACH"));
 55 | 		
 56 | 		map.put("and",    new NodeInfo("LN", "AND"));	 // Logic Node
 57 | 		map.put("or",    new NodeInfo("LN", "OR"));
 58 | 		
 59 | 
 60 | 	}
 61 | 	
 62 | 	/**
 63 | 	 * <p>Return the a ranked list of candidate NodeInfos for this Node. This method
 64 | 	 * will be called by the controller, and then the candidates will be passed on
 65 | 	 * to the view for user to choose. If there is only one candidate in the list, 
 66 | 	 * the choice is automatically made.</p>
 67 | 	 * <p>The length of the list of NodeInfos is at least 1. We will have special type
 68 | 	 * in NodeInfo if the Node doesn't correspond to any SQL component (the Node is
 69 | 	 * meaningless).</p>
 70 | 	 * <p>The returned list contains at most 6 elements.</p>
 71 | 	 * <p>Treat all input as lower case.</p>
 72 | 	 * @param node
 73 | 	 * @param schema
 74 | 	 * @return a ranked of NodeInfo
 75 | 	 */
 76 | 	public List<NodeInfo> getNodeInfoChoices(Node node, SchemaGraph schema) {
 77 | 		List<NodeInfo> result = new ArrayList<NodeInfo>();   //final output
 78 | 		if (node.getWord().equals("ROOT")) {
 79 | 			result.add(new NodeInfo("ROOT", "ROOT"));
 80 | 			return result;
 81 | 		}
 82 | 		Set<NodeInfo> valueNodes = new HashSet<NodeInfo>();  //used to store (type, value, score) of 100 sample values for every column in every table
 83 | 		String word = node.getWord().toLowerCase(); // all words as lower case
 84 | 		
 85 | 		if (map.containsKey(word)) {
 86 | 			result.add(map.get(word));
 87 | 			return result;
 88 | 		}
 89 | 				
 90 | 		for (String tableName : schema.getTableNames()) {
 91 | 			result.add(new NodeInfo("NN", tableName,
 92 | 					WordSimilarity.getSimilarity(word, tableName, wordNet)));    //map name nodes(table names)
 93 | 			for (String colName : schema.getColumns(tableName)) {
 94 | 				result.add(new NodeInfo("NN", tableName+"."+colName,
 95 | 						WordSimilarity.getSimilarity(word, colName, wordNet)));    //map name nodes (attribute names)
 96 | 				for (String value : schema.getValues(tableName, colName)) {
 97 | 					if (word == null || value == null) {
 98 | 						System.out.println("Comparing "+word+" and "+value);
 99 | 						System.out.println("In table "+tableName+", column "+colName);
100 | 					}
101 | 					valueNodes.add(new NodeInfo("VN", tableName+"."+colName,
102 | 							WordSimilarity.getSimilarity(word, value, wordNet)));    //add every sample value into valueNodes
103 | 				}
104 | 			}
105 | 		}
106 | 		
107 | 		//map value nodes (table values), to get the value node with highest similarity, add its (type, value, score) into result
108 | 		// we want all candidates, not only the one with the highest similarity
109 | 		result.addAll(valueNodes);
110 | 		result.add(new NodeInfo("UNKNOWN", "meaningless", 1.0));
111 | 		Collections.sort(result, new NodeInfo.ReverseScoreComparator());
112 | 		return result;
113 | 	}
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/ui/UserView.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.ui;
  2 | 
  3 | import com.dukenlidb.nlidb.archive.app.Controller;
  4 | import javafx.application.Application;
  5 | import javafx.application.Platform;
  6 | import javafx.collections.FXCollections;
  7 | import javafx.collections.ObservableList;
  8 | import javafx.geometry.Insets;
  9 | import javafx.scene.Scene;
 10 | import javafx.scene.control.Button;
 11 | import javafx.scene.control.ComboBox;
 12 | import javafx.scene.control.Label;
 13 | import javafx.scene.control.TextArea;
 14 | import javafx.scene.layout.HBox;
 15 | import javafx.scene.layout.VBox;
 16 | import javafx.scene.text.Text;
 17 | import javafx.stage.Stage;
 18 | import com.dukenlidb.nlidb.archive.model.NodeInfo;
 19 | 
 20 | public class UserView extends Application {
 21 | 	private static final String TEST_TEXT = "Return the number of authors who published theory papers before 1980.";
 22 | 	// "Return the number of authors who published theory papers before 1980."
 23 | 	
 24 | 	Stage stage; // the window
 25 | 	Scene scene; // the com.dukenlidb.nlidb.main content in the window
 26 | 	Controller ctrl;
 27 | 	Button btnTranslate;
 28 | 	Text display;
 29 | 	ComboBox<NodeInfo> choiceBox; // use scrollable comboBox instead of choiceBox
 30 | 	Button btnConfirmChoice;
 31 | 	ComboBox<Integer> treeChoice;
 32 | 	Button btnTreeConfirm;
 33 | 	HBox hb;
 34 | 	VBox vb1, vb2;
 35 | 	
 36 | 	
 37 | 	public void setDisplay(String text) {
 38 | 		display.setText(text);
 39 | 	}
 40 | 	
 41 | 	public void appendDisplay(String text) {
 42 | 		display.setText(display.getText()+text);
 43 | 	}
 44 | 	
 45 | 	public void showNodesChoice() {
 46 | 		vb2.getChildren().addAll(choiceBox, btnConfirmChoice);
 47 | 	}
 48 | 	
 49 | 	public void removeChoiceBoxButton() {
 50 | 		vb2.getChildren().remove(choiceBox);
 51 | 		vb2.getChildren().remove(btnConfirmChoice);
 52 | 	}
 53 | 	
 54 | 	public void setChoices(ObservableList<NodeInfo> choices) {
 55 | 		choiceBox.setItems(choices);
 56 | 		choiceBox.setValue(choices.get(0));
 57 | 	}
 58 | 	
 59 | 	public NodeInfo getChoice() {
 60 | 		return choiceBox.getValue();
 61 | 	}
 62 | 	
 63 | 	public void showTreesChoice() {
 64 | 		vb2.getChildren().addAll(treeChoice, btnTreeConfirm);
 65 | 	}
 66 | 	
 67 | 	public void removeTreesChoices() {
 68 | 		vb2.getChildren().removeAll(treeChoice, btnTreeConfirm);
 69 | 	}
 70 | 	
 71 | 	@Override
 72 | 	public void start(Stage primaryStage) throws Exception {
 73 | 		
 74 | 		stage = primaryStage;
 75 | 		stage.setTitle("Window for NLIDB");
 76 | 		
 77 | 		Label label1 = new Label("Welcome to Natural Language Interface to DataBase!");
 78 | 		
 79 | 		Label lblInput = new Label("Natural Language Input:");
 80 | 		TextArea fieldIn = new TextArea();
 81 | 		fieldIn.setPrefHeight(100);
 82 | 		fieldIn.setPrefWidth(100);
 83 | 		fieldIn.setWrapText(true);
 84 | 		fieldIn.setText(TEST_TEXT);
 85 | 		
 86 | 		btnTranslate = new Button("translate");
 87 | 		
 88 | 		// Define action of the translate button.
 89 | 		btnTranslate.setOnAction(e -> {
 90 | 			ctrl.processNaturalLanguage(fieldIn.getText());
 91 | 		});
 92 | 		
 93 | 		display = new Text();
 94 | 		display.setWrappingWidth(500);
 95 | 		display.prefHeight(300);
 96 | 		display.setText("Default display text");
 97 | 
 98 | 		// choices and button for nodes mapping
 99 | 		choiceBox = new ComboBox<NodeInfo>();
100 | 		choiceBox.setVisibleRowCount(6);
101 | 		btnConfirmChoice = new Button("confirm choice");
102 | 		btnConfirmChoice.setOnAction(e -> {
103 | 			ctrl.chooseNode(getChoice());
104 | 		});
105 | 		
106 | 		// choices and button for tree selection
107 | 		treeChoice = new ComboBox<Integer>(); // ! only show 3 choices now
108 | 		treeChoice.setItems(FXCollections.observableArrayList(0,1,2));
109 | 		treeChoice.getSelectionModel().selectedIndexProperty().addListener((ov, oldV, newV) -> {
110 | 			ctrl.showTree(treeChoice.getItems().get((Integer) newV));
111 | 		});
112 | 		btnTreeConfirm = new Button("confirm tree choice");
113 | 		btnTreeConfirm.setOnAction(e -> {
114 | 			ctrl.chooseTree(treeChoice.getValue());
115 | 		});
116 | 		
117 | 		vb1 = new VBox();
118 | 		vb1.setSpacing(10);
119 | 		vb1.getChildren().addAll(
120 | 				label1,
121 | 				lblInput,fieldIn,
122 | 				btnTranslate
123 | 				);
124 | 		
125 | 		vb2 = new VBox();
126 | 		vb2.setSpacing(20);
127 | 		vb2.getChildren().addAll(display);
128 | 		
129 | 		hb = new HBox();
130 | 		hb.setPadding(new Insets(15, 12, 15, 12));
131 | 		hb.setSpacing(10);
132 | 		hb.getChildren().addAll(vb1, vb2);
133 | 		
134 | 		scene = new Scene(hb, 800, 450);
135 | 		
136 | 		stage.setScene(scene);
137 | 		ctrl = new Controller(this);
138 | 		stage.show();
139 | 		
140 | 	}
141 | 	
142 | 	@Override
143 | 	public void stop() throws Exception {
144 | 		super.stop();
145 | 		if (ctrl != null) {
146 | 			ctrl.closeConnection();
147 | 		}
148 | 		Platform.exit();
149 | 		System.exit(0);
150 | 	}
151 | 	
152 | 	public static void main(String[] args) {
153 | 		try {
154 | 		Application.launch(args);
155 | 		} catch (Exception e) { e.printStackTrace(); }
156 | 	}
157 | 
158 | }
159 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SQLTranslator.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.List;
  5 | import java.util.Set;
  6 | 
  7 | /**
  8 |  * See the paper by Fei Li and H. V. Jagadish for the defined grammar.
  9 |  * @author keping
 10 |  *
 11 |  */
 12 | public class SQLTranslator {
 13 | 	private SQLQuery query;
 14 | 	private SchemaGraph schema;
 15 | 	private int blockCounter = 1;
 16 | 	
 17 | 	public SQLTranslator(Node root, SchemaGraph schema) {
 18 | 		this(root, schema, false);
 19 | 	}
 20 | 	
 21 | 	/**
 22 | 	 * Translating a block, starting from translateGNP.
 23 | 	 * @param root
 24 | 	 * @param schema
 25 | 	 */
 26 | 	public SQLTranslator(Node root, SchemaGraph schema, boolean block) {
 27 | 		if (!block) {
 28 | 			this.schema = schema;
 29 | 			query = new SQLQuery();
 30 | 			
 31 | 			translateSClause(root.getChildren().get(0));
 32 | 			if (root.getChildren().size() >= 2) {
 33 | 				translateComplexCondition(root.getChildren().get(1));
 34 | 			}
 35 | 			
 36 | 			if (schema != null) addJoinPath();
 37 | 		} else {
 38 | 			this.schema = schema;
 39 | 			query = new SQLQuery();
 40 | 			translateGNP(root);
 41 | 		}
 42 | 	}
 43 | 	
 44 | 	public SQLQuery getResult() { return query; } 
 45 | 	
 46 | 	
 47 | 	private static boolean isNumber(String str) {
 48 | 	    int length = str.length();
 49 | 	    if (length == 0) { return false; }
 50 | 	    int i = 0;
 51 | 	    if (str.charAt(0) == '-') {
 52 | 	        if (length == 1) { return false; }
 53 | 	        i = 1;
 54 | 	    }
 55 | 	    for (; i < length; i++) {
 56 | 	        char c = str.charAt(i);
 57 | 	        if (c < '0' || c > '9' && c != '.') { return false; }
 58 | 	    }
 59 | 	    return true;
 60 | 	}
 61 | 	
 62 | 	private void translateCondition(Node node) {
 63 | 		String attribute = "ATTRIBUTE";
 64 | 		String compareSymbol = "=";
 65 | 		String value = "VALUE";
 66 | 		if (node.getInfo().getType().equals("VN")) {
 67 | 			attribute = node.getInfo().getValue();
 68 | 			value = node.getWord();
 69 | 		} else if (node.getInfo().getType().equals("ON")) {
 70 | 			compareSymbol = node.getInfo().getValue();
 71 | 			Node VN = node.getChildren().get(0);
 72 | 			attribute = VN.getInfo().getValue();
 73 | 			value = VN.getWord();
 74 | 		}
 75 | 		if (!isNumber(value)) { value = "\""+value+"\""; }
 76 | 		query.add("WHERE", attribute+" "+compareSymbol+" "+value);
 77 | 		query.add("FROM", attribute.split("\\.")[0]);
 78 | 	}
 79 | 
 80 | 	private void translateNN(Node node) {
 81 | 		translateNN(node, "");
 82 | 	}
 83 | 	private void translateNN(Node node, String valueFN) {
 84 | 		if (!node.getInfo().getType().equals("NN")) { return; }
 85 | 		if (!valueFN.equals("")) {
 86 | 			query.add("SELECT", valueFN+"("+node.getInfo().getValue()+")");
 87 | 		} else {
 88 | 			query.add("SELECT", node.getInfo().getValue());
 89 | 		}
 90 | 		query.add("FROM", node.getInfo().getValue().split("\\.")[0]);		
 91 | 	}
 92 | 	
 93 | 	private void translateNP(Node node) {
 94 | 		translateNP(node, "");
 95 | 	}
 96 | 	private void translateNP(Node node, String valueFN) {
 97 | 		translateNN(node, valueFN);
 98 | 		for (Node child : node.getChildren()) {
 99 | 			if (child.getInfo().getType().equals("NN")) {
100 | 				translateNN(child);
101 | 			} else if (child.getInfo().getType().equals("ON") ||
102 | 					child.getInfo().getType().equals("VN")){
103 | 				translateCondition(child);
104 | 			}
105 | 		}
106 | 	}
107 | 	
108 | 	private void translateGNP(Node node) {
109 | 		if (node.getInfo().getType().equals("FN")) {
110 | 			if (node.getChildren().isEmpty()) { return; }
111 | 			translateNP(node.getChildren().get(0), node.getInfo().getValue());
112 | 		} else if (node.getInfo().getType().equals("NN")) {
113 | 			translateNP(node);
114 | 		}
115 | 	}
116 | 	
117 | 	private void translateComplexCondition(Node node) {
118 | 		if (!node.getInfo().getType().equals("ON")) { return; }
119 | 		if (node.getChildren().size() != 2) { return; }
120 | 		SQLTranslator transLeft = new SQLTranslator(node.getChildren().get(0), schema, true);
121 | 		SQLTranslator transRight= new SQLTranslator(node.getChildren().get(1), schema, true);
122 | 		query.addBlock(transLeft.getResult());
123 | 		query.addBlock(transRight.getResult());
124 | 		query.add("WHERE", "BLOCK"+(blockCounter++)+" "+node.getInfo().getValue()+" "+"BLOCK"+(blockCounter++));
125 | 	}
126 | 	
127 | 	private void translateSClause(Node node) {
128 | 		if (!node.getInfo().getType().equals("SN")) { return; }
129 | 		translateGNP(node.getChildren().get(0));
130 | 	}
131 | 	
132 | 	private void addJoinKeys(String table1, String table2) {
133 | 		Set<String> joinKeys = schema.getJoinKeys(table1, table2);
134 | 		for (String joinKey : joinKeys) {
135 | 			query.add("WHERE", table1+"."+joinKey+" = "+table2+"."+joinKey);
136 | 		}
137 | 	}
138 | 	
139 | 	private void addJoinPath(List<String> joinPath) {
140 | 		for (int i = 0; i < joinPath.size()-1; i++) {
141 | 			addJoinKeys(joinPath.get(i), joinPath.get(i+1));
142 | 		}
143 | 	}
144 | 	
145 | 	private void addJoinPath() {
146 | 		List<String> fromTables = new ArrayList<String>(query.getCollection("FROM"));
147 | 		if (fromTables.size() <= 1) { return; }
148 | 		for (int i = 0; i < fromTables.size()-1; i++) {
149 | 			for (int j = i+1; j < fromTables.size(); j++) {
150 | 				List<String> joinPath = schema.getJoinPath(fromTables.get(i), fromTables.get(j));
151 | 				addJoinPath(joinPath);
152 | 			}
153 | 		}
154 | 	}
155 | 
156 | }
157 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/Node.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.LinkedList;
  5 | import java.util.List;
  6 | 
  7 | /**
  8 |  * Interface for a parse tree node.
  9 |  * @author keping
 10 |  *
 11 |  */
 12 | public class Node {
 13 | 	
 14 | 	/**
 15 | 	 * record if the node is copied over
 16 | 	 */
 17 | 	boolean outside = false;
 18 | 	
 19 | 	private int index = 0;
 20 | 	/**
 21 | 	 * Information indicating the corresponding SQL component of the Node.
 22 | 	 */
 23 | 	NodeInfo info = null;
 24 | 	/**
 25 | 	 * The natural language word of the Node. This is the only field of 
 26 | 	 * the Node object that is immutable.
 27 | 	 */
 28 | 	String word;
 29 | 	/**
 30 | 	 * Part-of-speech tag for the Node.
 31 | 	 */
 32 | 	String posTag;
 33 | 	
 34 | 	/**
 35 | 	 * Parent of the node can be directly modified by ParseTree.
 36 | 	 */
 37 | 	Node parent = null; // package private
 38 | 	/**
 39 | 	 * Children of the node can be directly modified by ParseTree.
 40 | 	 */
 41 | 	List<Node> children = new ArrayList<Node>(); // package private
 42 | 	
 43 | 	//for testing purpose
 44 | 	boolean isInvalid = false;
 45 | 	
 46 | 	public Node(int index, String word, String posTag){
 47 | 		this(index, word, posTag, null);
 48 | 	}
 49 | 	
 50 | 	public Node(int index, String word, String posTag, NodeInfo info) {
 51 | 		this.index = index;
 52 | 		this.word = word;
 53 | 		this.posTag = posTag;
 54 | 		this.info = info;
 55 | 	}
 56 | 	
 57 | 	public Node(String word, String posTag, NodeInfo info) {
 58 | 		this(0, word, posTag, info);
 59 | 		
 60 | 	}
 61 | 	
 62 | 	private Node clone(Node node){
 63 | 		if (node == null) return null;
 64 | 		Node copy = new Node(node.index, node.word, node.posTag, node.info);
 65 | 		for (Node child : node.children){
 66 | 			Node copyChild = clone(child);
 67 | 			copyChild.parent = copy;
 68 | 			copy.children.add(copyChild);
 69 | 		}
 70 | 		return copy;
 71 | 	}
 72 | 	public Node clone(){
 73 | 		return clone(this);
 74 | 	}
 75 | 	
 76 | 
 77 | 	public NodeInfo getInfo() { return info; }
 78 | 	public void setInfo(NodeInfo info) { this.info = info; }
 79 | 	public String getWord() { return word; }
 80 | 	public void setWord(String word) {this.word = word;}
 81 | 	public String getPosTag() { return posTag; }
 82 | 	public List<Node> getChildren() { return children; }
 83 | 	public void setChild(Node child) {this.children.add(child);}
 84 | 	public Node getParent() {return parent;}
 85 | 	public void setParent(Node parent) {this.parent = parent;}
 86 | 	public void setOutside(boolean outside) {this.outside = outside;}
 87 | 	public boolean getOutside() {return this.outside;}
 88 | 	
 89 | 	public void removeChild (Node child) {
 90 | 
 91 | 		for (int i = 0; i < children.size(); i ++) {
 92 | 
 93 | 			if (children.get(i).equals(child)) {
 94 | 
 95 | 				children.remove(i);
 96 | 				return;
 97 | 			}
 98 | 		}
 99 | 	}
100 | 	
101 | 	public void printNodeArray () {
102 | 		
103 | 		Node [] nodes = genNodesArray();
104 | 		
105 | 		for (int i = 0; i < nodes.length; i++) {
106 | 			System.out.println("type: " + nodes[i].getInfo().getType() + " value: " + nodes[i].getInfo().getValue());
107 | 		}
108 | 	}
109 | 	
110 | 
111 | 	/**
112 | 	 * Generate an array of the nodes tree with this as root
113 | 	 * using pre-order traversal;
114 | 	 * @return
115 | 	 */
116 | 	public Node[] genNodesArray() {
117 | 		List<Node> nodesList = new ArrayList<>();
118 | 		LinkedList<Node> stack = new LinkedList<>();
119 | 		stack.push(this);
120 | 		while (!stack.isEmpty()) {
121 | 			Node curr = stack.pop();
122 | 			nodesList.add(curr);
123 | 			List<Node> currChildren = curr.getChildren();
124 | 			for (int i = currChildren.size()-1; i >= 0; i--) {
125 | 				stack.push(currChildren.get(i));	
126 | 			}
127 | 		}
128 | 		int N = nodesList.size();
129 | 		Node[] nodes = new Node[N];
130 | 		for (int i = 0; i < N; i++) {
131 | 			nodes[i] = nodesList.get(i);
132 | 		}
133 | 		return nodes;
134 | 	}
135 | 	
136 | 	/**
137 | 	 * Only includes posTag, word, info, and children.
138 | 	 * Return the hashCode of the tree represented by this node. 
139 | 	 */
140 | 	@Override
141 | 	public int hashCode() { // exclude parent.
142 | 		final int prime = 31;
143 | 		int result = 17;
144 | 		result = prime * result + index;
145 | 		result = prime * result + ((posTag == null) ? 0 : posTag.hashCode());
146 | 		result = prime * result + ((word == null) ? 0 : word.hashCode());
147 | 		result = prime * result + ((info == null) ? 0 : info.hashCode());
148 | 		if (children != null) {
149 | 			for (Node child : children) {
150 | 				result = prime * result + child.hashCode();	
151 | 			}
152 | 		}
153 | 
154 | 		return result;
155 | 	}
156 | 
157 | 	/**
158 | 	 * Only considers word, posTag, info, and children (recursively).
159 | 	 * See whether two trees represented by two nodes are equal.
160 | 	 */
161 | 	@Override
162 | 	public boolean equals(Object obj) { // exclude parent
163 | 		if (this == obj) { return true; }
164 | 		if (obj == null) { return false; }
165 | 		if (getClass() != obj.getClass()) { return false; }
166 | 		Node other = (Node) obj;
167 | 		if (index != other.index) { return false; }
168 | 		if (!word.equals(other.word)) { return false; }
169 | 		if (!posTag.equals(other.posTag)) { return false; }
170 | 		if (info != other.info) {
171 | 			if (info == null || other.info == null) { return false; }
172 | 			if (!info.equals(other.info)) { return false; }
173 | 		}
174 | 		if (children != other.children) {
175 | 			if (children == null || other.children == null) { return false; }
176 | 			if (children.size() != other.children.size()) { return false; }
177 | 			for (int i = 0; i < children.size(); i++) {
178 | 				if (!children.get(i).equals(other.children.get(i))) { return false; }	
179 | 			}
180 | 		}
181 | 		return true;
182 | 	}
183 | 
184 | 	public String toString() {
185 | 		String s = "("+index+")"+word;
186 | 		if (info != null) {
187 | 			s += "("+info.getType()+":"+info.getValue()+")";
188 | 		}
189 | 		return s;
190 | 	}
191 | }
192 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env sh
  2 | 
  3 | ##############################################################################
  4 | ##
  5 | ##  Gradle start up script for UN*X
  6 | ##
  7 | ##############################################################################
  8 | 
  9 | # Attempt to set APP_HOME
 10 | # Resolve links: $0 may be a link
 11 | PRG="$0"
 12 | # Need this for relative symlinks.
 13 | while [ -h "$PRG" ] ; do
 14 |     ls=`ls -ld "$PRG"`
 15 |     link=`expr "$ls" : '.*-> \(.*\)$'`
 16 |     if expr "$link" : '/.*' > /dev/null; then
 17 |         PRG="$link"
 18 |     else
 19 |         PRG=`dirname "$PRG"`"/$link"
 20 |     fi
 21 | done
 22 | SAVED="`pwd`"
 23 | cd "`dirname \"$PRG\"`/" >/dev/null
 24 | APP_HOME="`pwd -P`"
 25 | cd "$SAVED" >/dev/null
 26 | 
 27 | APP_NAME="Gradle"
 28 | APP_BASE_NAME=`basename "$0"`
 29 | 
 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 31 | DEFAULT_JVM_OPTS=""
 32 | 
 33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 34 | MAX_FD="maximum"
 35 | 
 36 | warn ( ) {
 37 |     echo "$*"
 38 | }
 39 | 
 40 | die ( ) {
 41 |     echo
 42 |     echo "$*"
 43 |     echo
 44 |     exit 1
 45 | }
 46 | 
 47 | # OS specific support (must be 'true' or 'false').
 48 | cygwin=false
 49 | msys=false
 50 | darwin=false
 51 | nonstop=false
 52 | case "`uname`" in
 53 |   CYGWIN* )
 54 |     cygwin=true
 55 |     ;;
 56 |   Darwin* )
 57 |     darwin=true
 58 |     ;;
 59 |   MINGW* )
 60 |     msys=true
 61 |     ;;
 62 |   NONSTOP* )
 63 |     nonstop=true
 64 |     ;;
 65 | esac
 66 | 
 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 68 | 
 69 | # Determine the Java command to use to start the JVM.
 70 | if [ -n "$JAVA_HOME" ] ; then
 71 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
 72 |         # IBM's JDK on AIX uses strange locations for the executables
 73 |         JAVACMD="$JAVA_HOME/jre/sh/java"
 74 |     else
 75 |         JAVACMD="$JAVA_HOME/bin/java"
 76 |     fi
 77 |     if [ ! -x "$JAVACMD" ] ; then
 78 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
 79 | 
 80 | Please set the JAVA_HOME variable in your environment to match the
 81 | location of your Java installation."
 82 |     fi
 83 | else
 84 |     JAVACMD="java"
 85 |     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
 86 | 
 87 | Please set the JAVA_HOME variable in your environment to match the
 88 | location of your Java installation."
 89 | fi
 90 | 
 91 | # Increase the maximum file descriptors if we can.
 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
 93 |     MAX_FD_LIMIT=`ulimit -H -n`
 94 |     if [ $? -eq 0 ] ; then
 95 |         if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
 96 |             MAX_FD="$MAX_FD_LIMIT"
 97 |         fi
 98 |         ulimit -n $MAX_FD
 99 |         if [ $? -ne 0 ] ; then
100 |             warn "Could not set maximum file descriptor limit: $MAX_FD"
101 |         fi
102 |     else
103 |         warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 |     fi
105 | fi
106 | 
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 |     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 | 
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 |     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 |     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 |     JAVACMD=`cygpath --unix "$JAVACMD"`
117 | 
118 |     # We build the pattern for arguments to be converted via cygpath
119 |     ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 |     SEP=""
121 |     for dir in $ROOTDIRSRAW ; do
122 |         ROOTDIRS="$ROOTDIRS$SEP$dir"
123 |         SEP="|"
124 |     done
125 |     OURCYGPATTERN="(^($ROOTDIRS))"
126 |     # Add a user-defined pattern to the cygpath arguments
127 |     if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 |         OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 |     fi
130 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 |     i=0
132 |     for arg in "$@" ; do
133 |         CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 |         CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
135 | 
136 |         if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
137 |             eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 |         else
139 |             eval `echo args$i`="\"$arg\""
140 |         fi
141 |         i=$((i+1))
142 |     done
143 |     case $i in
144 |         (0) set -- ;;
145 |         (1) set -- "$args0" ;;
146 |         (2) set -- "$args0" "$args1" ;;
147 |         (3) set -- "$args0" "$args1" "$args2" ;;
148 |         (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 |         (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 |         (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 |         (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 |         (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 |         (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 |     esac
155 | fi
156 | 
157 | # Escape application args
158 | save ( ) {
159 |     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 |     echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 | 
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 | 
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 |   cd "$(dirname "$0")"
170 | fi
171 | 
172 | exec "$JAVACMD" "$@"
173 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustor.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.HashMap;
  5 | import java.util.HashSet;
  6 | import java.util.List;
  7 | import java.util.PriorityQueue;
  8 | import java.util.Set;
  9 | 
 10 | public class TreeAdjustor {
 11 | 	
 12 | 	private static final int MAX_EDIT = 15;
 13 | 	
 14 | 	/**
 15 | 	 * Return the node in the tree that equals to the targetNode.
 16 | 	 * @param tree
 17 | 	 * @param targetNode
 18 | 	 * @return
 19 | 	 */
 20 | 	private static Node find(ParseTree tree, Node targetNode) {
 21 | 		for (Node node : tree) {
 22 | 			if (node.equals(targetNode)) { return node; }
 23 | 		}
 24 | 		return null;
 25 | 	}
 26 | 	
 27 | 	/**
 28 | 	 * Swap this parent node and a child node.
 29 | 	 * @param parent
 30 | 	 * @param child
 31 | 	 */
 32 | 	private static void swap(Node parent, Node child) {
 33 | 		// swap the attributes directly.
 34 | 		NodeInfo childInfo = child.info;
 35 | 		String childWord = child.word;
 36 | 		String childPosTag = child.posTag;
 37 | 		child.info = parent.info;
 38 | 		child.word = parent.word;
 39 | 		child.posTag = parent.posTag;
 40 | 		parent.info = childInfo;
 41 | 		parent.word = childWord;
 42 | 		parent.posTag = childPosTag;
 43 | 	}
 44 | 	
 45 | 	/**
 46 | 	 * Make the child node a rightmost sibling of the target Node.
 47 | 	 * @param target
 48 | 	 * @param child
 49 | 	 */
 50 | 	private static void makeSibling(Node target, Node child) {
 51 | 		List<Node> children = target.getChildren();
 52 | 		target.children = new ArrayList<Node>();;
 53 | 		for (Node anyChild : children) {
 54 | 			if (anyChild != child) { target.getChildren().add(anyChild); }
 55 | 		}
 56 | 		target.parent.children.add(child);
 57 | 		child.parent = target.parent;
 58 | 	}
 59 | 	
 60 | 	/**
 61 | 	 * Make a sibling the rightmost child of the target.
 62 | 	 * @param target
 63 | 	 * @param sibling
 64 | 	 */
 65 | 	private static void makeChild(Node target, Node sibling) {
 66 | 		List<Node> siblings = target.parent.children;
 67 | 		target.parent.children = new ArrayList<Node>();
 68 | 		for (Node anySibling : siblings) {
 69 | 			if (anySibling != sibling) {
 70 | 				target.parent.children.add(anySibling);
 71 | 			}
 72 | 		}
 73 | 		target.children.add(sibling);
 74 | 		sibling.parent = target;
 75 | 	}
 76 | 	
 77 | 	/**
 78 | 	 * <p>Return a list of adjusted trees after one adjustment to the input tree
 79 | 	 * at the target Node.</p>
 80 | 	 * <p>Four possible adjustments can be made to that node:</p>
 81 | 	 * <ol>
 82 | 	 *   <li>Swap this node with its child. (all possible positions)</li>
 83 | 	 *   <li>Make child its rightmost sibling.</li> 
 84 | 	 *   <li>Make sibling its rightmost child.</li>
 85 | 	 *   <li>Swap leftmost child with other children</li>
 86 | 	 * </ol>
 87 | 	 * @param tree
 88 | 	 * @param targetNode
 89 | 	 * @return
 90 | 	 */
 91 | 	private static Set<ParseTree> adjust(ParseTree tree, Node target) {
 92 | 		Set<ParseTree> adjusted = new HashSet<>();
 93 | 		if (target.parent == null) { return adjusted; }
 94 | 		// (1) Swap target with its children.
 95 | 		for (Node child : target.getChildren()) {
 96 | 			ParseTree tempTree = new ParseTree(tree);
 97 | 			swap(find(tempTree, target), find(tempTree, child));
 98 | 			adjusted.add(tempTree);
 99 | 		}
100 | 		// (2) Make child its rightmost sibling.
101 | 		for (Node child : target.getChildren()) {
102 | 			ParseTree tempTree = new ParseTree(tree);
103 | 			makeSibling(find(tempTree, target), find(tempTree, child));
104 | 			adjusted.add(tempTree);
105 | 		}
106 | 		// (3) Make its sibling its rightmost child.
107 | 		for (Node sibling : target.parent.getChildren()) {
108 | 			if (sibling == target) { continue; }
109 | 			ParseTree tempTree = new ParseTree(tree);
110 | 			makeChild(find(tempTree, target), find(tempTree, sibling));
111 | 			adjusted.add(tempTree);
112 | 		}
113 | 		// (4) Swap leftmost child with other children.
114 | 		if (target.getChildren().size() >= 2) {
115 | 			List<Node> children = target.getChildren();
116 | 			for (int i = 1; i < children.size(); i++) {
117 | 				ParseTree tempTree = new ParseTree(tree);
118 | 				swap(find(tempTree, children.get(0)),
119 | 					 find(tempTree, children.get(i)));
120 | 				adjusted.add(tempTree);
121 | 			}
122 | 		}
123 | 		return adjusted;
124 | 	}
125 | 
126 | 	/**
127 | 	 * Return a set of adjusted trees after one adjustment to the input tree.
128 | 	 * @param tree
129 | 	 * @return
130 | 	 */
131 | 	public static List<ParseTree> adjust(ParseTree tree) { 
132 | 		Set<ParseTree> treeList = new HashSet<ParseTree>();
133 | 		for (Node node : tree) {
134 | 			treeList.addAll(adjust(tree, node));
135 | 		}
136 | 		return new ArrayList<ParseTree>(treeList);
137 | 	}
138 | 	
139 | 
140 | 	public static List<ParseTree> getAdjustedTrees(ParseTree tree) {
141 | 		List<ParseTree> results = new ArrayList<ParseTree>();
142 | 		// The top of the pq is the most valid tree (highest score, lowest number of invalid nodes)
143 | 		PriorityQueue<ParseTree> queue = new PriorityQueue<ParseTree>((t1,t2) -> ( - t1.getScore() + t2.getScore() ));
144 | 		HashMap<Integer, ParseTree> H = new HashMap<Integer, ParseTree>();
145 | 		queue.add(tree);
146 | 		results.add(tree);
147 | 		H.put(tree.hashCode(), tree);
148 | 		tree.setEdit(0);
149 | 		
150 | 		ParseTree treeWithON = tree.addON();
151 | 		queue.add(treeWithON);
152 | 		results.add(treeWithON);
153 | 		H.put(treeWithON.hashCode(), treeWithON);
154 | 		treeWithON.setEdit(0);
155 | 		
156 | 		while (queue.size() > 0){
157 | 			ParseTree oriTree = queue.poll();
158 | 			if (oriTree.getEdit() >= MAX_EDIT) { continue; }
159 | 			List<ParseTree> treeList = TreeAdjustor.adjust(oriTree);
160 | 			double numInvalidNodes = SyntacticEvaluator.numberOfInvalidNodes(oriTree);
161 | 			
162 | 			for (int i = 0; i < treeList.size(); i++){
163 | 				ParseTree currentTree = treeList.get(i);
164 | 				int hashValue = currentTree.hashCode();
165 | 				if ( !H.containsKey(hashValue) ) {
166 | 					H.put(hashValue, currentTree);
167 | 					currentTree.setEdit(oriTree.getEdit()+1);
168 | 					if (SyntacticEvaluator.numberOfInvalidNodes(currentTree) <= numInvalidNodes) {
169 | 						queue.add(currentTree);
170 | 						results.add(currentTree);
171 | 					}
172 | 				}
173 | 			}
174 | 		}
175 | 		return results;
176 | 	}
177 | }
178 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/controller/Controller.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.controller;
  2 | 
  3 | import com.dukenlidb.nlidb.model.request.ExecuteSQLRequest;
  4 | import com.dukenlidb.nlidb.model.request.TranslateNLRequest;
  5 | import com.dukenlidb.nlidb.model.response.*;
  6 | import com.dukenlidb.nlidb.service.SQLExecutionService;
  7 | import com.fasterxml.jackson.core.JsonProcessingException;
  8 | import com.dukenlidb.nlidb.model.DBConnectionConfig;
  9 | import com.dukenlidb.nlidb.model.UserSession;
 10 | import com.dukenlidb.nlidb.model.request.ConnectDBRequest;
 11 | import org.springframework.beans.factory.annotation.Autowired;
 12 | import org.springframework.http.ResponseEntity;
 13 | import org.springframework.web.bind.annotation.CookieValue;
 14 | import org.springframework.web.bind.annotation.RequestBody;
 15 | import org.springframework.web.bind.annotation.RequestMapping;
 16 | import org.springframework.web.bind.annotation.RestController;
 17 | import com.dukenlidb.nlidb.service.CookieService;
 18 | import com.dukenlidb.nlidb.service.DBConnectionService;
 19 | import com.dukenlidb.nlidb.service.RedisService;
 20 | 
 21 | import javax.servlet.http.HttpServletResponse;
 22 | import java.io.IOException;
 23 | import java.sql.SQLException;
 24 | import java.util.UUID;
 25 | 
 26 | import static com.dukenlidb.nlidb.service.CookieService.COOKIE_NAME;
 27 | import static com.dukenlidb.nlidb.service.CookieService.USER_NONE;
 28 | 
 29 | @RestController
 30 | public class Controller {
 31 | 
 32 |     private CookieService cookieService;
 33 |     private RedisService redisService;
 34 |     private DBConnectionService dbConnectionService;
 35 |     private SQLExecutionService sqlExecutionService;
 36 | 
 37 |     @Autowired
 38 |     public Controller(
 39 |             CookieService cookieService,
 40 |             RedisService redisService,
 41 |             DBConnectionService dbConnectionService,
 42 |             SQLExecutionService sqlExecutionService
 43 |     ) {
 44 |         this.cookieService = cookieService;
 45 |         this.redisService = redisService;
 46 |         this.dbConnectionService = dbConnectionService;
 47 |         this.sqlExecutionService = sqlExecutionService;
 48 |     }
 49 | 
 50 |     @RequestMapping("/api/connect/user")
 51 |     public ResponseEntity connectUser(
 52 |             @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId,
 53 |             HttpServletResponse res
 54 |     ) throws IOException {
 55 |         if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) {
 56 |             return ResponseEntity.ok(new StatusMessageResponse(false, "No user session found"));
 57 |         } else {
 58 |             redisService.refreshUser(userId);
 59 |             UserSession session = redisService.getUserSession(userId);
 60 |             cookieService.setUserIdCookie(res, userId);
 61 |             return ResponseEntity.ok(new ConnectResponse(true, session.getDbConnectionConfig().getUrl()));
 62 |         }
 63 |     }
 64 | 
 65 |     @RequestMapping("/api/disconnect")
 66 |     public ResponseEntity disconnect(
 67 |             @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId,
 68 |             HttpServletResponse res
 69 |     ) {
 70 |         if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) {
 71 |             return ResponseEntity.status(401).body(new MessageResponse("You are not logged in."));
 72 |         } else {
 73 |             redisService.removeUser(userId);
 74 |             cookieService.expireUserIdCookie(res, userId);
 75 |             return ResponseEntity.status(200).body(new MessageResponse("Disconnect successfully."));
 76 |         }
 77 |     }
 78 | 
 79 | 
 80 |     @RequestMapping("/api/connect/db")
 81 |     public ResponseEntity connectDB(
 82 |             @RequestBody ConnectDBRequest req,
 83 |             HttpServletResponse res
 84 |     ) throws JsonProcessingException {
 85 | 
 86 |         DBConnectionConfig config = DBConnectionConfig.builder()
 87 |                 .host(req.getHost())
 88 |                 .port(req.getPort())
 89 |                 .database(req.getDatabase())
 90 |                 .username(req.getUsername())
 91 |                 .password(req.getPassword())
 92 |                 .build();
 93 | 
 94 |         try {
 95 |             dbConnectionService.getConnection(config);
 96 |             String userId = UUID.randomUUID().toString();
 97 |             UserSession session = new UserSession(config);
 98 |             redisService.setUserSession(userId, session);
 99 |             cookieService.setUserIdCookie(res, userId);
100 |             return ResponseEntity.ok().body(new ConnectResponse(true, config.getUrl()));
101 |         } catch (SQLException e) {
102 |             // TODO: different kinds of connection failure.
103 |             return ResponseEntity.status(400).body(new MessageResponse("Connection Failed!"));
104 |         }
105 |     }
106 | 
107 |     @RequestMapping("/api/translate/nl")
108 |     public ResponseEntity translateNL(
109 |             @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId,
110 |             @RequestBody TranslateNLRequest req
111 |     ) {
112 | 
113 |         if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) {
114 |             return ResponseEntity.status(401).body(new MessageResponse("You are not connected to a Database."));
115 |         }
116 |         return ResponseEntity.ok(new TranslateResponse(
117 |                 "We are still writing the code to translate your natural language input..."
118 |         ));
119 |     }
120 | 
121 |     @RequestMapping("/api/execute/sql")
122 |     public ResponseEntity executeSQL(
123 |             @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId,
124 |             @RequestBody ExecuteSQLRequest req
125 |     ) throws IOException, SQLException {
126 | 
127 |         if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) {
128 |             return ResponseEntity.status(401).body(new MessageResponse("You are not connected to a Database."));
129 |         }
130 |         UserSession session = redisService.getUserSession(userId);
131 |         String resultString = sqlExecutionService.executeSQL(session.getDbConnectionConfig(), req.getQuery());
132 |         return ResponseEntity.ok(new QueryResponse(resultString));
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/ParseTreeTest.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | public class ParseTreeTest {
  6 | 
  7 | 	/**
  8 | 	 * <p>We want to translate: <br>"Return all titles of theory papers before 1970."
  9 | 	 * <br> into (in for inproceedings): 
 10 | 	 * <br>SELECT in.title <br>FROM in 
 11 | 	 * <br>WHERE in.area = 'Theory' AND in.year < 1970;</p>
 12 | 	 * 
 13 | 	 * <p>The direct parsing result of this natural language input is: </p>
 14 | 	 * 
 15 | 	 * <p><pre>
 16 | 	 *           root
 17 | 	 *            |
 18 | 	 *          return
 19 | 	 *            |  `---\---------\
 20 | 	 *          titles   1970       .
 21 | 	 *          /   \      \
 22 | 	 *       all   papers  before
 23 | 	 *              /  \ 
 24 | 	 *            of   theory
 25 | 	 * </pre></p>
 26 | 	 * 
 27 | 	 * <p>Suppose we have already successfully gone through the process of 
 28 | 	 * nodes mapping and structural adjustment. Then we should arrive at a ParseTree
 29 | 	 * like this: (in for inproceedings)</p>
 30 | 	 * 
 31 | 	 * <p><pre>
 32 | 	 *    root
 33 | 	 *      |
 34 | 	 *  return(SN:SELECT)
 35 | 	 *      |
 36 | 	 *  titles(NN:in.title)
 37 | 	 *      | `-------------\
 38 | 	 *  theory(VN:in.area)  before(ON:<)
 39 | 	 *                       |
 40 | 	 *                     1970(VN:in.year)
 41 | 	 * </pre></p>
 42 | 	 * 
 43 | 	 * <p>The next step is to translate this "perfect" ParseTree word-to-word to
 44 | 	 * an SQL translateNL, which is what this method is testing for.</p>
 45 | 	 */
 46 | 	public static void testTranslation1() {
 47 | 		// (1) Let's construct the perfect ParseTree for testing.
 48 | 		ParseTree tree = new ParseTree();
 49 | 		Node[] nodes = new Node[6];
 50 | 
 51 | 		nodes[0] = new Node(0, "ROOT", "ROOT");
 52 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
 53 | 		nodes[1] = new Node(1, "return", "--"); // posTag not useful
 54 | 		nodes[1].info = new NodeInfo("SN", "SELECT");
 55 | 		nodes[2] = new Node(2, "titles", "--");
 56 | 		nodes[2].info = new NodeInfo("NN", "in.title");
 57 | 		nodes[3] = new Node(3, "theory", "--");
 58 | 		nodes[3].info = new NodeInfo("VN", "in.area");
 59 | 		nodes[4] = new Node(4, "before", "--");
 60 | 		nodes[4].info = new NodeInfo("ON", "<");
 61 | 		nodes[5] = new Node(5, "1970", "--");
 62 | 		nodes[5].info = new NodeInfo("VN", "in.year");
 63 | 		
 64 | 		tree.root = nodes[0];
 65 | 		tree.root.getChildren().add(nodes[1]);
 66 | 		nodes[1].children.add(nodes[2]);
 67 | 		nodes[2].parent = nodes[1];
 68 | 		nodes[2].children.add(nodes[3]);
 69 | 		nodes[2].children.add(nodes[4]);
 70 | 		nodes[3].parent = nodes[2];
 71 | 		nodes[4].parent = nodes[2];
 72 | 		nodes[4].children.add(nodes[5]);
 73 | 		nodes[5].parent = nodes[4];
 74 | /*		
 75 | 		System.out.println(tree);
 76 | 		
 77 | 		// (2) Do the translation.
 78 | 		SQLQuery translateNL = tree.translateToSQL();
 79 | 		
 80 | 		// (3) Print out the translateNL and see.
 81 | 		System.out.println(translateNL);
 82 | */
 83 | 
 84 | 		System.out.println("===========test for Running SyntacticEvaluator.numberOfInvalidNodes===========");
 85 | 		System.out.println("Input tree: "+tree.toString());
 86 | 		System.out.println("Number of Invalid nodes: "+SyntacticEvaluator.numberOfInvalidNodes(tree)+"\n");
 87 | 		System.out.println("Invalid nodes: ");
 88 | 		for (int i = 1; i < tree.size(); i++){
 89 | 			if (nodes[i].isInvalid)
 90 | 				System.out.println(nodes[i]);
 91 | 		}
 92 | 
 93 | 		System.out.println("===========test for Running mergeLNQN===========");
 94 | 		System.out.println("Input tree: "+tree.toString());
 95 | 		ParseTree newTree = tree.mergeLNQN();
 96 | 		System.out.println("Output tree: "+newTree.toString());
 97 | 		System.out.println("===========test for Running adjust() in TreeAdjustor===========");
 98 | 		System.out.println("Input tree: "+tree.toString());
 99 | 		List<ParseTree> treeList = TreeAdjustor.adjust(tree);
100 | 		System.out.println("Output size: "+treeList.size());
101 | 		System.out.println("Output trees:");
102 | 		for (int j = 0; j < treeList.size(); j++){
103 | 			System.out.println("Tree "+j+" :");
104 | 			System.out.println(treeList.get(j).toString());
105 | 		}
106 | 		
107 | 		System.out.println("===========test for Running getAdjustedTrees() in TreeAdjustor===========");
108 | 		System.out.println("Number of possible trees for choice:");
109 | 		List<ParseTree> result = TreeAdjustor.getAdjustedTrees(tree);
110 | 		System.out.println(result.size());
111 | 		for (ParseTree t:result)
112 | 			System.out.println(t);
113 | 	}
114 | 	
115 | 	/**
116 | 	 * Using natural language input "Return all titles of theory papers before 1970."
117 | 	 * <p>The original tree:</p>
118 | 	 * <p><pre>
119 | 	 *           root
120 | 	 *            |
121 | 	 *          return
122 | 	 *            |  `---\---------\
123 | 	 *          titles   1970       .
124 | 	 *          /   \      \
125 | 	 *       all   papers  before
126 | 	 *              /  \ 
127 | 	 *            of   theory
128 | 	 * </pre></p>
129 | 	 * 
130 | 	 * <p>The tree after removing meaningless nodes:</p>
131 | 	 * <p><pre>
132 | 	 *    root
133 | 	 *      |
134 | 	 *  return(SN:SELECT)
135 | 	 *      |     `----------\
136 | 	 *  titles(NN:in.title) 1970(VN:in.year)
137 | 	 *      |                | 
138 | 	 *  theory(VN:in.area)  before(ON:<)
139 | 	 * </pre></p>
140 | 	 * 
141 | 	 * <p>Still need the adjustor to swap the position of "1970" and "before".</p>
142 | 	 */
143 | 	public static void removeMeaninglessNodesTest() {
144 | 		String input = "Return all titles of theory papers before 1970.";
145 | 		NLParser parser = new NLParser();
146 | 		ParseTree tree = new ParseTree(input, parser);
147 | 		System.out.println("ParseTree: ");
148 | 		System.out.println(tree);
149 | 		
150 | 		// Set NodeInfo
151 | 		Node[] nodes = tree.genNodesArray();
152 | 		nodes[1].info = new NodeInfo("SN", "SELECT");
153 | 		nodes[2].info = new NodeInfo("UNKNOWN", "meaningless");
154 | 		nodes[3].info = new NodeInfo("NN", "in.title");
155 | 		nodes[4].info = new NodeInfo("UNKNOWN", "meaningless");
156 | 		nodes[5].info = new NodeInfo("VN", "in.area");
157 | 		nodes[6].info = new NodeInfo("UNKNOWN", "meaningless");
158 | 		nodes[7].info = new NodeInfo("ON", "<");
159 | 		nodes[8].info = new NodeInfo("VN", "in.year");
160 | 		nodes[9].info = new NodeInfo("UNKNOWN", "meaningless");
161 | 		
162 | 		System.out.println("After setting nodeinfo:");
163 | 		System.out.println(tree);
164 | 		
165 | 		tree.removeMeaninglessNodes();
166 | 		
167 | 		System.out.println("After removing meaningless nodes");
168 | 		System.out.println(tree);
169 | 		
170 | 		SQLQuery query = tree.translateToSQL();
171 | 		
172 | 		System.out.println(query);
173 | 		
174 | 	}
175 | 	
176 | 	public static void main(String[] args) {
177 | 		testTranslation1();
178 | 		//removeMeaninglessNodesTest();
179 | 	}
180 | 	
181 | }
182 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/app/Controller.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.app;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.DriverManager;
  5 | import java.sql.SQLException;
  6 | import java.util.List;
  7 | 
  8 | import javafx.collections.FXCollections;
  9 | import com.dukenlidb.nlidb.archive.model.NLParser;
 10 | import com.dukenlidb.nlidb.archive.model.Node;
 11 | import com.dukenlidb.nlidb.archive.model.NodeInfo;
 12 | import com.dukenlidb.nlidb.archive.model.NodeMapper;
 13 | import com.dukenlidb.nlidb.archive.model.ParseTree;
 14 | import com.dukenlidb.nlidb.archive.model.ParseTree.ParseTreeIterator;
 15 | import com.dukenlidb.nlidb.archive.model.SQLQuery;
 16 | import com.dukenlidb.nlidb.archive.model.SchemaGraph;
 17 | import com.dukenlidb.nlidb.archive.ui.UserView;
 18 | 
 19 | 
 20 | /**
 21 |  * The controller between com.dukenlidb.nlidb.model and view.
 22 |  * @author keping
 23 |  */
 24 | public class Controller {
 25 | 	private Connection connection = null;
 26 | 	private SchemaGraph schema;
 27 | 	private NLParser parser;
 28 | 	private NodeMapper nodeMapper;
 29 | 	private ParseTree parseTree;
 30 | 	private UserView view;	
 31 | 	/**
 32 | 	 * Iterator for nodes mapping.
 33 | 	 */
 34 | 	private ParseTreeIterator iter;
 35 | 	/**
 36 | 	 * Attribute for nodes mapping, to indicate the current Node.
 37 | 	 */
 38 | 	private Node node;
 39 | 	private boolean mappingNodes = false;
 40 | 	private boolean selectingTree = false;
 41 | 	private boolean processing = false;
 42 | 	private List<ParseTree> treeChoices;
 43 | 	private SQLQuery query;
 44 | 	
 45 | 	/**
 46 | 	 * Initialize the Controller.
 47 | 	 */
 48 | 	public Controller(UserView view) {
 49 | 		this.view = view;
 50 | 		startConnection();
 51 | 		
 52 | 		try { nodeMapper = new NodeMapper();
 53 | 		} catch (Exception e) { e.printStackTrace(); }
 54 | 		parser     = new NLParser(); // initialize parser, takes some time
 55 | 		
 56 | 		System.out.println("Controller initialized.");
 57 | 	}
 58 | 	
 59 | 	/**
 60 | 	 * ONLY FOR TESTING. An empty constructor.
 61 | 	 */
 62 | 	public Controller() {
 63 | 		
 64 | 	}
 65 | 	
 66 | 	/**
 67 | 	 * Start connection with the database and read schema graph
 68 | 	 */
 69 | 	public void startConnection() {
 70 | 		
 71 | 		try { Class.forName("org.postgresql.Driver"); } 
 72 | 		catch (ClassNotFoundException e1) { }
 73 | 		
 74 | 		System.out.println("PostgreSQL JDBC Driver Registered!");
 75 | 
 76 | 		try {
 77 | 			connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser");
 78 | 		} catch (SQLException e) {
 79 | 			e.printStackTrace();
 80 | 		}
 81 | 		System.out.println("Connection successful!");
 82 | 		
 83 | 		try {
 84 | 			schema = new SchemaGraph(connection);
 85 | 			view.setDisplay("Database Schema:\n\n"+schema.toString());
 86 | 		} catch (SQLException e) {
 87 | 			e.printStackTrace();
 88 | 		}
 89 | 		
 90 | 	}
 91 | 	
 92 | 	/**
 93 | 	 * Close connection with the database.
 94 | 	 */
 95 | 	public void closeConnection() {
 96 | 		try {
 97 | 			if (connection != null) { connection.close(); }
 98 | 		} catch (SQLException e) {
 99 | 			e.printStackTrace();
100 | 		}
101 | 		System.out.println("Connection closed.");
102 | 	}
103 | 	
104 | // ---- Methods for nodes mapping ---- //
105 | 	/**
106 | 	 * Helper method for nodes mapping, displaying the currently mapping Node
107 | 	 * and the choices on the view.
108 | 	 * @param choices
109 | 	 */
110 | 	private void setChoicesOnView(List<NodeInfo> choices) {
111 | 		view.setDisplay("Mapping nodes: \n"+parseTree.getSentence()+"\n");
112 | 		view.appendDisplay("Currently on: "+node);
113 | 		view.setChoices(FXCollections.observableArrayList(choices));
114 | 	}
115 | 	
116 | 	/**
117 | 	 * Terminates the mapping Nodes process by setting the boolean mappingNodes false;
118 | 	 */
119 | 	private void finishNodesMapping() {
120 | 		view.setDisplay("Nodes mapped.\n"+parseTree.getSentence());
121 | 		mappingNodes = false;
122 | 		view.removeChoiceBoxButton();
123 | 		processAfterNodesMapping();
124 | 	}
125 | 	
126 | 	/**
127 | 	 * Start the nodes mapping process. A boolean will be set to indicate that
128 | 	 * the application is in the process of mapping Nodes. Cannot call startMappingNodes
129 | 	 * again during mapping Nodes. After this is called, the view shows the choices
130 | 	 * of NodeInfos for a node, waiting for the user to choose one.
131 | 	 */
132 | 	public void startMappingNodes() {
133 | 		if (mappingNodes) { return; }
134 | 		view.showNodesChoice();
135 | 		
136 | 		mappingNodes = true;
137 | 		iter = parseTree.iterator();
138 | 		if (!iter.hasNext()) {
139 | 			finishNodesMapping();
140 | 			return; 
141 | 		}
142 | 		
143 | 		node = iter.next();
144 | 		List<NodeInfo> choices = nodeMapper.getNodeInfoChoices(node, schema);
145 | 		if (choices.size() == 1) { chooseNode(choices.get(0)); }
146 | 		else { setChoicesOnView(choices); }
147 | 		// After this wait for the button to call chooseNode
148 | 	}
149 | 	
150 | 	/**
151 | 	 * Choose NodeInfo for the current Node. This method is called when the user
152 | 	 * clicked the confirmChoice button, or automatically called when the choices
153 | 	 * of NodeInfo contains only one element. 
154 | 	 * @param info {@link NodeInfo}
155 | 	 */
156 | 	public void chooseNode(NodeInfo info) {
157 | 		if (!mappingNodes) { return; }
158 | //		System.out.println("Now the tree is:");
159 | //		System.out.println(parseTree);
160 | 		node.setInfo(info);
161 | 		if (!iter.hasNext()) {
162 | 			finishNodesMapping(); 
163 | 			return;
164 | 		}
165 | 		node = iter.next();
166 | 		List<NodeInfo> choices = nodeMapper.getNodeInfoChoices(node, schema);
167 | 		if (choices.size() == 1) { chooseNode(choices.get(0)); }
168 | 		else { setChoicesOnView(choices); }
169 | 		// After this wait for the button to call chooseNode
170 | 	}
171 | // ----------------------------------- //
172 | 	
173 | 	
174 | // ---- Methods for trees selection ---- //
175 | 	public void startTreeSelection() {
176 | 		if (selectingTree) { return; }
177 | 		view.showTreesChoice();
178 | 		selectingTree = true;
179 | 		treeChoices = parseTree.getAdjustedTrees();
180 | 	}
181 | 	
182 | 	public void showTree(int index) {
183 | 		view.setDisplay(treeChoices.get(index).toString());
184 | 	}
185 | 	
186 | 	public void chooseTree(int index) {
187 | 		parseTree = treeChoices.get(index);
188 | 		finishTreeSelection();
189 | 	}
190 | 	
191 | 	public void finishTreeSelection() {
192 | 		selectingTree = false;
193 | 		view.removeTreesChoices();
194 | 		processAfterTreeSelection();
195 | 	}
196 | // ------------------------------------- //
197 | 	
198 | 	public void processAfterTreeSelection() {
199 | 		System.out.println("The tree before implicit nodes insertion: ");
200 | 		System.out.println(parseTree);
201 | 		parseTree.insertImplicitNodes();
202 | 		System.out.println("Going to do translation for tree: ");
203 | 		System.out.println(parseTree);
204 | 		query = parseTree.translateToSQL(schema);	
205 | 		view.setDisplay(query.toString());
206 | 		processing = false;		
207 | 	}
208 | 	
209 | 	public void processAfterNodesMapping() {
210 | 		System.out.println("Going to remove meaningless nodes for tree: ");
211 | 		System.out.println(parseTree);
212 | 		parseTree.removeMeaninglessNodes();
213 | 		parseTree.mergeLNQN();
214 | 		startTreeSelection();
215 | 	}
216 | 	
217 | 	/**
218 | 	 * Process natural language and return an sql translateNL.
219 | 	 * @param nl
220 | 	 * @return
221 | 	 */
222 | 	public void processNaturalLanguage(String input) {
223 | 		if (processing) { view.appendDisplay("\nCurrently processing a sentence!\n"); }
224 | 		processing = true;
225 | 		parseTree = new ParseTree(input, parser);
226 | 		startMappingNodes();
227 | 	}
228 | 
229 | }
230 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/WordNet.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.io.File;
  4 | import java.net.URL;
  5 | import java.util.ArrayList;
  6 | import java.util.HashSet;
  7 | import java.util.List;
  8 | import java.util.Set;
  9 | 
 10 | import edu.mit.jwi.IRAMDictionary;
 11 | import edu.mit.jwi.RAMDictionary;
 12 | import edu.mit.jwi.data.ILoadPolicy;
 13 | import edu.mit.jwi.item.IIndexWord;
 14 | import edu.mit.jwi.item.ISynset;
 15 | import edu.mit.jwi.item.ISynsetID;
 16 | import edu.mit.jwi.item.IWordID;
 17 | import edu.mit.jwi.item.POS;
 18 | import edu.mit.jwi.item.Pointer;
 19 | import edu.mit.jwi.morph.WordnetStemmer;
 20 | 
 21 | public class WordNet {
 22 | 	String sep = File.separator;
 23 | 	String wordNetDir = "lib" + sep + "WordNet-3.0" + sep + "dict";
 24 | 	URL url;
 25 | 	IRAMDictionary dict;
 26 | 	WordnetStemmer stemmer;
 27 | 	
 28 | 	public WordNet() throws Exception {
 29 | 		url = new URL("file", null, wordNetDir);
 30 | 		dict = new RAMDictionary(url, ILoadPolicy.NO_LOAD);
 31 | 		dict.open();
 32 | 		System.out.println("Loading wordNet...");
 33 | 		dict.load(true); // load dictionary into memory
 34 | 		System.out.println("WordNet loaded.");
 35 | 		
 36 | 		stemmer = new WordnetStemmer(dict);
 37 | 	}
 38 | 	
 39 | 	/**
 40 | 	 * Find the similarity of two nouns.
 41 | 	 * @param word1
 42 | 	 * @param word2
 43 | 	 * @return
 44 | 	 */
 45 | 	public double similarity(String word1, String word2) {
 46 | //		System.out.println("Finding similarity between: "+word1+" and "+word2);
 47 | 		// remove all special characters from words
 48 | 		if (word1.equals("") || word2.equals("")) { return 0.0; }
 49 | 		word1 = word1.replaceAll("[^a-zA-Z0-9]", "");
 50 | 		word2 = word2.replaceAll("[^a-zA-Z0-9]", "");
 51 | 		if (word1.equals("") || word2.equals("")) { return 0.0; }
 52 | 		// ? why NullPointerException here ??? Doesn't seem to be my fault!
 53 | 		// Here special symbols in word causes Exception.
 54 | 		List<String> stems1 = stemmer.findStems(word1, POS.NOUN);
 55 | 		List<String> stems2 = stemmer.findStems(word2, POS.NOUN);
 56 | 		
 57 | 		if (stems1.isEmpty() || stems2.isEmpty()) {
 58 | //			System.out.println("One word cannot be identified in WordNet");
 59 | 			return 0.0;
 60 | 		}
 61 | 		
 62 | 		ArrayList<Set<ISynset>> visited1, visited2;
 63 | 		visited1 = new ArrayList<>();
 64 | 		visited2 = new ArrayList<>();
 65 | 
 66 | 		List<IWordID> wordIDs1 = new ArrayList<>();
 67 | 		for (String stem : stems1) {
 68 | 			IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN);
 69 | 			if (indexWord != null) {
 70 | 				wordIDs1.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs());
 71 | 			}
 72 | 		}
 73 | 		if (wordIDs1.isEmpty()) { return 0.0; }
 74 | 		List<ISynset> synsets1 = new ArrayList<>();
 75 | 		for (IWordID wID : wordIDs1) { synsets1.add(dict.getWord(wID).getSynset());	}
 76 | 		visited1.add(new HashSet<ISynset> (synsets1));
 77 | 		
 78 | 		List<IWordID> wordIDs2 = new ArrayList<>();
 79 | 		for (String stem : stems2) {
 80 | 			IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN);
 81 | 			if (indexWord != null) {
 82 | 				wordIDs2.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs());
 83 | 			}
 84 | 		}
 85 | 		if (wordIDs2.isEmpty()) { return 0.0; }
 86 | 		List<ISynset> synsets2 = new ArrayList<>();
 87 | 		for (IWordID wID : wordIDs2) { synsets2.add(dict.getWord(wID).getSynset()); }
 88 | 		visited2.add(new HashSet<ISynset> (synsets2));
 89 | 		
 90 | 		boolean commonFound = false;
 91 | 		ISynset commonSynset = null;
 92 | 		boolean endSearch1 = false;
 93 | 		boolean endSearch2 = false;
 94 | 		
 95 | 		int commonSynsetPos1 = -1;
 96 | 		int commonSynsetPos2 = -1;
 97 | 		
 98 | 		while (!commonFound && !(endSearch1 && endSearch2)) {
 99 | 			int sz1 = visited1.size();
100 | 			int sz2 = visited2.size();
101 | 			if (!commonFound && !endSearch1) { // check the newest of 1 against all of 2
102 | 				for (int i = 0; i < sz2; i++) {
103 | 					if (intersection(visited1.get(sz1-1), visited2.get(i)) != null) {
104 | 						commonSynsetPos1 = sz1-1;
105 | 						commonSynsetPos2 = i;
106 | 						commonSynset = intersection(visited1.get(sz1-1), visited2.get(i));
107 | 						commonFound = true;
108 | 						break;
109 | 					}
110 | 				}
111 | 			}
112 | 			if (!commonFound && !endSearch2) { // check the newest of 2 against all of 1
113 | 				for (int i = 0; i < sz1; i++) {
114 | 					if (intersection(visited1.get(i), visited2.get(sz2-1)) != null) {
115 | 						commonSynsetPos1 = i;
116 | 						commonSynsetPos2 = sz2-1;
117 | 						commonSynset = intersection(visited1.get(i), visited2.get(sz2-1));
118 | 						commonFound = true;
119 | 						break;
120 | 					}
121 | 				}
122 | 			}
123 | 			if (!commonFound) {
124 | 				if (!endSearch1) {
125 | 					Set<ISynset> hyperSet1 = getHyperSet(visited1.get(sz1-1));
126 | 					if (hyperSet1.isEmpty()) { endSearch1 = true; }
127 | 					else { visited1.add(hyperSet1); }
128 | 				}
129 | 				if (!endSearch2) {
130 | 					Set<ISynset> hyperSet2 = getHyperSet(visited2.get(sz2-1));
131 | 					if (hyperSet2.isEmpty()) { endSearch2 = true; }
132 | 					else { visited2.add(hyperSet2); }
133 | 				}
134 | 			}
135 | 		}
136 | 		
137 | 		if (commonSynset == null) { return 0.0; }
138 | 				
139 | //		System.out.println("Common ancestor synset found: ");
140 | //		System.out.println(commonSynset.getWord(1).getLemma());
141 | //		System.out.println(commonSynset.getGloss());
142 | //		System.out.println("Common synset pos1: "+commonSynsetPos1);
143 | //		System.out.println("Common synset pos2: "+commonSynsetPos2);
144 | //		System.out.println("Depth of this common ancestor is:"+findDepth(commonSynset));
145 | 
146 | 		int N1 = commonSynsetPos1;
147 | 		int N2 = commonSynsetPos2;
148 | 		int N3 = findDepth(commonSynset);
149 | 		
150 | 		return 2*N3 / (double) (N1+N2+2*N3);
151 | 	}
152 | 	
153 | 	private int findDepth(ISynset synset) {
154 | 		if (synset.getRelatedSynsets(Pointer.HYPERNYM).isEmpty()) { return 0; }
155 | 		List<Set<ISynset>> list = new ArrayList<>();
156 | 		Set<ISynset> set = new HashSet<>();
157 | 		set.add(synset);
158 | 		list.add(set);
159 | 		boolean topReached = false;
160 | 		int depth = -1;
161 | 		while (!topReached) {
162 | 			Set<ISynset> nextSet = new HashSet<>();
163 | 			for (ISynset syn : list.get(list.size()-1)) {
164 | 				List<ISynsetID> hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM);
165 | 				if (!hyperIDs.isEmpty()) {
166 | 					for (ISynsetID hyperID : hyperIDs) { nextSet.add(dict.getSynset(hyperID)); }
167 | 				} else {
168 | 					topReached = true;
169 | 					depth = list.size()-1;
170 | 					break;
171 | 				}
172 | 			}
173 | 			list.add(nextSet);
174 | 		}
175 | 		return depth;
176 | 	}
177 | 	
178 | 	private Set<ISynset> getHyperSet(Set<ISynset> set) {
179 | 		Set<ISynset> hyperSet = new HashSet<>();
180 | 		for (ISynset syn : set) {
181 | 			List<ISynsetID> hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM);
182 | 			if (!hyperIDs.isEmpty()) {
183 | 				for (ISynsetID hyperID : hyperIDs) { hyperSet.add(dict.getSynset(hyperID)); }
184 | 			}
185 | 		}
186 | 		return hyperSet;
187 | 	}
188 | 	
189 | 	private ISynset intersection(Set<ISynset> set1, Set<ISynset> set2) {
190 | 		for (ISynset syn2 : set2) {
191 | 			if (set1.contains(syn2)) { return syn2; }
192 | 		}
193 | 		return null;
194 | 	}
195 | 
196 | 	/**
197 | 	 * Testing method
198 | 	 * @param args
199 | 	 * @throws Exception
200 | 	 */
201 | 	public static void main(String[] args) throws Exception {
202 | 		WordNet net = new WordNet();
203 | 		String word1 = "scopes";
204 | 		String word2 = "book";
205 | 		System.out.printf("WUP similarity between %s and %s is: %f\n", word1, word2, net.similarity(word1, word2));
206 | //		String word = "SCOPES";
207 | //		List<IWordID> wordIDs = net.dict.getIndexWord(word, POS.NOUN).getWordIDs();
208 | //		List<ISynset> synsets = new ArrayList<>();
209 | //		for (IWordID wID : wordIDs) { synsets.add(net.dict.getWord(wID).getSynset());	}
210 | //		
211 | //		for (ISynset syn : synsets) {
212 | //			System.out.println(syn.getGloss());
213 | //			System.out.println("Words in this synset:");
214 | //			for (IWord w : syn.getWords()) {
215 | //				System.out.println(w.getLemma());
216 | //			}
217 | //		}
218 | //		
219 | //		ISynset hyper = net.dict.getSynset(synsets.get(0).getRelatedSynsets(Pointer.HYPERNYM).get(0));
220 | //		System.out.println(hyper.getGloss());;
221 | //		System.out.println(hyper.getWords().get(0).getLemma());
222 | 		
223 | 	}
224 | 
225 | }
226 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SyntacticEvaluator.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | public class SyntacticEvaluator {
  6 | 
  7 | 	int numOfInvalid;
  8 | 	
  9 | 	public SyntacticEvaluator() {
 10 | 		numOfInvalid = 0;
 11 | 	}
 12 | 	
 13 | 	/**
 14 | 	 * a root is invalid if: 
 15 | 	 * it has no child; 
 16 | 	 * it has only one child and this child is not SN; 
 17 | 	 * it has more than one child and other than the first child is not ON.
 18 | 	 * @param node
 19 | 	 * @return
 20 | 	 */
 21 | 	private static int checkROOT(Node node){
 22 | 		int numOfInvalid = 0;
 23 | 		List<Node> children = node.getChildren();
 24 | 		int sizeOfChildren = children.size();
 25 | 		
 26 | 		if (sizeOfChildren == 0){
 27 | 			numOfInvalid++;
 28 | 			node.isInvalid = true;
 29 | 		}
 30 | 		else if (sizeOfChildren == 1 && !children.get(0).getInfo().getType().equals("SN")){
 31 | 			numOfInvalid++;
 32 | 			node.isInvalid = true;
 33 | 		}
 34 | 		else if (sizeOfChildren > 1){
 35 | 			if (!children.get(0).getInfo().getType().equals("SN")){
 36 | 				numOfInvalid++;
 37 | 				node.isInvalid = true;
 38 | 			}
 39 | 			else {
 40 | 				for (int j = 1; j < sizeOfChildren; j++){
 41 | 					if (!children.get(j).getInfo().getType().equals("ON")){
 42 | 						numOfInvalid++;
 43 | 						node.isInvalid = true;
 44 | 					}
 45 | 				}
 46 | 			}
 47 | 		}
 48 | 		return numOfInvalid;
 49 | 	}
 50 | 	
 51 | 	/**
 52 | 	 * a SN is not valid if: 
 53 | 	 * it has more than 1 child; 
 54 | 	 * it has 1 child but this child is not GNP (FN or NN).
 55 | 	 * @param node
 56 | 	 * @return
 57 | 	 */
 58 | 	private static int checkSN(Node node){
 59 | 		int numOfInvalid = 0;
 60 | 		List<Node> children = node.getChildren();
 61 | 		int sizeOfChildren = children.size();
 62 | 		
 63 | 		//SN can only have one child from FN or NN
 64 | 		if (sizeOfChildren != 1){
 65 | 			numOfInvalid++;
 66 | 			node.isInvalid = true;
 67 | 		}
 68 | 		else{
 69 | 			String childType = children.get(0).getInfo().getType();
 70 | 			if (!(childType.equals("NN") || childType.equals("FN"))){
 71 | 				numOfInvalid++;
 72 | 				node.isInvalid = true;
 73 | 			}
 74 | 		}
 75 | 		
 76 | 		return numOfInvalid;
 77 | 	}
 78 | 	
 79 | 	/**
 80 | 	 * a ON is invalid if: 
 81 | 	 * (1) in ComplexCondition (its parent is ROOT):
 82 | 	 * 		its number of children is not 2 (left & right subtrees);
 83 | 	 * 		it has 2 children, but first one is not GNP, or second one is not GNP/VN/FN.
 84 | 	 * (2) in Condition (its parent is NN):
 85 | 	 * 		its number of children is not 1;
 86 | 	 * 		it has 1 child, but the child is not VN.
 87 | 	 * @param node
 88 | 	 * @return
 89 | 	 */
 90 | 	private static int checkON(Node node){
 91 | 		int numOfInvalid = 0;
 92 | 		String parentType = node.getParent().getInfo().getType();
 93 | 		List<Node> children = node.getChildren();
 94 | 		int sizeOfChildren = children.size();
 95 | 		
 96 | 		if (parentType.equals("ROOT")){
 97 | 			if (sizeOfChildren != 2){
 98 | 				numOfInvalid++;
 99 | 				node.isInvalid = true;
100 | 			}
101 | 			else{
102 | 				for (int j = 0; j<sizeOfChildren; j++){
103 | 					String childType = children.get(j).getInfo().getType();
104 | 					if (j==0){
105 | 						if (!(childType.equals("NN") || childType.equals("FN"))){
106 | 							numOfInvalid++;
107 | 							node.isInvalid = true;
108 | 							break;
109 | 						}
110 | 					}
111 | 					else if (j==1){
112 | 						if (childType.equals("ON")){
113 | 							numOfInvalid++;
114 | 							node.isInvalid = true;
115 | 							break;
116 | 						}
117 | 					}
118 | 				}
119 | 			}
120 | 		}
121 | 		else if (parentType.equals("NN")){
122 | 			if (sizeOfChildren != 1){
123 | 				numOfInvalid++;
124 | 				node.isInvalid = true;
125 | 			}
126 | 			else if (!children.get(0).getInfo().getType().equals("VN")){
127 | 				numOfInvalid++;
128 | 				node.isInvalid = true;
129 | 			}
130 | 		}
131 | 		
132 | 		return numOfInvalid;
133 | 	}
134 | 	
135 | 	/**
136 | 	 * a NN is invalid if: 
137 | 	 * it is the second NN in "NP=NN+NN*Condition", and it has children.
138 | 	 * it is the first NN in "GNP=NP=NN+NN*Condition", and its child is not NN, VN, ON.
139 | 	 * @param node
140 | 	 * @return
141 | 	 */
142 | 	private static int checkNN(Node node){
143 | 		int numOfInvalid = 0;
144 | 		String parentType = node.getParent().getInfo().getType();
145 | 		List<Node> children = node.getChildren();
146 | 		int sizeOfChildren = children.size();
147 | 		
148 | 		//NP=NN+NN*Condition. Second NN has no child.
149 | 		if (parentType.equals("NN")){
150 | 			if (sizeOfChildren != 0){   //this rule is different from figure 7 (a), but I think this makes sense
151 | 				numOfInvalid++;
152 | 				node.isInvalid = true;
153 | 			}
154 | 		}
155 | 		//SN+GNP, or ON+GNP, or FN+GNP. and GNP=NP=NN+NN*Condition. First NN can have any number of children from NN,ON,VN.
156 | 		else if (parentType.equals("SN") || parentType.equals("FN") || parentType.equals("ON")){
157 | 			if (sizeOfChildren != 0){
158 | 				for (int j = 0; j < sizeOfChildren; j++){
159 | 					String childType = children.get(j).getInfo().getType();
160 | 					if (!(childType.equals("NN") || childType.equals("VN") || childType.equals("ON"))){
161 | 						numOfInvalid++;
162 | 						node.isInvalid = true;
163 | 						break;
164 | 					}
165 | 				}
166 | 			}
167 | 		}
168 | 		
169 | 		return numOfInvalid;
170 | 	}
171 | 	
172 | 	/**
173 | 	 * a VN is invalid if:
174 | 	 * it has children.
175 | 	 * @param node
176 | 	 * @return
177 | 	 */
178 | 	private static int checkVN(Node node){
179 | 		int numOfInvalid = 0;
180 | 		//String parentType = node.getParent().getInfo().getType();
181 | 		List<Node> children = node.getChildren();
182 | 		int sizeOfChildren = children.size();
183 | 		
184 | 		if (sizeOfChildren != 0){  //VN cannot have children
185 | 			numOfInvalid++;
186 | 			node.isInvalid = true;
187 | 		}
188 | 		/*
189 | 		else if (!(parentType.equals("ON") || parentType.equals("NN"))){  //VN can only be child of ON and NN
190 | 			numOfInvalid++;
191 | 			node.isInvalid = true;
192 | 		}
193 | 		*/
194 | 		return numOfInvalid;
195 | 	}
196 | 	
197 | 	/**
198 | 	 * a FN is valid if:
199 | 	 * ON+FN, or ON+GNP, or SN+GNP, or FN+GNP. and GNP=FN+GNP,
200 | 	 * FN can be child of ON, without children or only 1 child of NN or FN, 
201 | 	 * FN can be child of SN, with only 1 child of NN or FN, 
202 | 	 * FN can be child of FN, with only 1 child of NN or FN. 
203 | 	 * @param node
204 | 	 * @return
205 | 	 */
206 | 	private static int checkFN(Node node){
207 | 		int numOfInvalid = 0;
208 | 		String parentType = node.getParent().getInfo().getType();
209 | 		List<Node> children = node.getChildren();
210 | 		int sizeOfChildren = children.size();
211 | 		
212 | 		if (sizeOfChildren == 0){
213 | 			if (!parentType.equals("ON")){
214 | 				numOfInvalid++;
215 | 				node.isInvalid = true;
216 | 			}
217 | 		}
218 | 		else if (sizeOfChildren == 1){
219 | 			String childType = children.get(0).getInfo().getType();
220 | 			if (!(parentType.equals("ON") || parentType.equals("SN") /*|| parentType.equals("FN")*/)){
221 | 				numOfInvalid++;
222 | 				node.isInvalid = true;
223 | 			}
224 | 			else if (!(childType.equals("NN") /*|| childType.equals("FN")*/)){
225 | 				numOfInvalid++;
226 | 				node.isInvalid = true;
227 | 			}
228 | 		}
229 | 		else{
230 | 			numOfInvalid++;
231 | 			node.isInvalid = true;
232 | 		}
233 | 		
234 | 		return numOfInvalid;
235 | 	}
236 | 	
237 | 	/**
238 | 	 * Number of invalid tree nodes according to the grammar:
239 | 	 * Q -> (SClause)(ComplexCindition)*
240 | 	 * SClause -> SELECT + GNP
241 | 	 * ComplexCondition -> ON + (LeftSubTree*RightSubTree)
242 | 	 * LeftSubTree -> GNP
243 | 	 * RightSubTree -> GNP | VN | FN
244 | 	 * GNP -> (FN + GNP) | NP
245 | 	 * NP -> NN + (NN)*(Condition)*
246 | 	 * Condition -> VN | (ON + VN)
247 | 	 * 
248 | 	 * +: parent-child relationship
249 | 	 * *: sibling relationship
250 | 	 * |: or
251 | 	 * 
252 | 	 * Basic rule: Check invalidity only considering its children
253 | 	 * @param T
254 | 	 * @return
255 | 	 */
256 | 	public static int numberOfInvalidNodes (ParseTree T){	
257 | 		int numOfInvalid = 0;   //number of invalid tree nodes
258 | 		for (Node curNode : T) {
259 | 			String curType = curNode.getInfo().getType();
260 | 			if (curType.equals("ROOT")){ //ROOT
261 | 				numOfInvalid = numOfInvalid + checkROOT(curNode);
262 | 			}
263 | 			if (curType.equals("SN")){ // select node
264 | 				numOfInvalid = numOfInvalid + checkSN(curNode);
265 | 			}
266 | 			else if (curType.equals("ON")){  //operator node
267 | 				numOfInvalid = numOfInvalid + checkON(curNode);
268 | 			}
269 | 			else if (curType.equals("NN")){  //name node
270 | 				numOfInvalid = numOfInvalid + checkNN(curNode);
271 | 			}
272 | 			else if (curType.equals("VN")){  //value node
273 | 				numOfInvalid = numOfInvalid + checkVN(curNode);
274 | 			}
275 | 			else if (curType.equals("FN")){  //function nodes
276 | 				numOfInvalid = numOfInvalid + checkFN(curNode);
277 | 			}
278 | 		}
279 | 		return numOfInvalid;
280 | 	}
281 | 	
282 | }
283 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/SchemaGraph.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.DatabaseMetaData;
  5 | import java.sql.DriverManager;
  6 | import java.sql.ResultSet;
  7 | import java.sql.SQLException;
  8 | import java.sql.Statement;
  9 | import java.util.ArrayList;
 10 | import java.util.HashMap;
 11 | import java.util.HashSet;
 12 | import java.util.LinkedList;
 13 | import java.util.List;
 14 | import java.util.Map;
 15 | import java.util.Set;
 16 | 
 17 | 
 18 | public class SchemaGraph {
 19 | 	
 20 | 	/**
 21 | 	 * table name, column name, column type
 22 | 	 */
 23 | 	private Map<String, Map<String, String>> tables;
 24 | 	//table name, column name, column values
 25 | 	private Map<String, Map<String, Set<String>>> tableRows;
 26 | 	
 27 | 	/**
 28 | 	 * table name, primary key (set of column names).
 29 | 	 * Two tables are connected only if pubkey of table1 is a
 30 | 	 * column of table2, but NOT the pubkey of table2. Graph no direction.
 31 | 	 */
 32 | 	private Map<String, Set<String>> keys;
 33 | 	
 34 | 	/**
 35 | 	 * table1Name, table2Name
 36 | 	 */
 37 | 	private Map<String, Set<String>> connectivity;
 38 | 	
 39 | 	/**
 40 | 	 * Construct a schemaGraph from database meta data.
 41 | 	 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/sql/
 42 | 	 * DatabaseMetaData.html#getTables%28java.lang.String,%20java.lang.
 43 | 	 * String,%20java.lang.String,%20java.lang.String%5b%5d%29">document of getTables</a>
 44 | 	 * @param meta
 45 | 	 * @throws SQLException
 46 | 	 */
 47 | 	public SchemaGraph(Connection c) throws SQLException {
 48 | 		System.out.println("Retrieving schema graph...");
 49 | 		DatabaseMetaData meta = c.getMetaData();
 50 | 		tables = new HashMap<>();
 51 | 		tableRows = new HashMap<>();
 52 | 		String[] types = {"TABLE"};
 53 | 		ResultSet rsTable = meta.getTables(null, null, "%", types);
 54 | 		
 55 | 
 56 | 	    Statement stmt = c.createStatement();
 57 | 		while (rsTable.next()) {
 58 | 			String tableName = rsTable.getString("TABLE_NAME");
 59 | 			tables.put(tableName, new HashMap<>());
 60 | 			tableRows.put(tableName, new HashMap<>());
 61 | 			
 62 | 			Map<String, String> table = tables.get(tableName);
 63 | 			Map<String, Set<String>> tableRow = tableRows.get(tableName);
 64 | 			
 65 | 			ResultSet rsColumn = meta.getColumns(null, null, tableName, null);
 66 | 			while (rsColumn.next()){
 67 | 				/*retrieve column info for each table, insert into tables*/
 68 | 				String columnName = rsColumn.getString("COLUMN_NAME");
 69 | 				String columnType = rsColumn.getString("TYPE_NAME");
 70 | 				table.put(columnName, columnType); 
 71 | 				/*draw random sample of size 10000 from each table, insert into tableRows*/
 72 | 				String query = "SELECT " + columnName + " FROM " + tableName + " ORDER BY RANDOM() LIMIT 2000;";
 73 | 				ResultSet rows = stmt.executeQuery(query);
 74 | 				tableRow.put(columnName, new HashSet<String>());
 75 | 				Set<String> columnValues = tableRow.get(columnName);
 76 | 				while (rows.next()){
 77 | 					String columnValue = rows.getString(1);
 78 | 					//testing if the last column read has a SQL NULL
 79 | 					if (!rows.wasNull())
 80 | 						columnValues.add(columnValue);
 81 | 				}
 82 | 			}			
 83 | 		}
 84 | 		if (stmt != null) { stmt.close(); }
 85 | 		readPrimaryKeys(meta);
 86 | 		findConnectivity();
 87 | 		System.out.println("Schema graph retrieved.");
 88 | 	}
 89 | 
 90 | 	private void readPrimaryKeys(DatabaseMetaData meta) throws SQLException {
 91 | 		keys = new HashMap<>();
 92 | 		for (String tableName : tables.keySet()) {
 93 | 			ResultSet rsPrimaryKey = meta.getPrimaryKeys(null, null, tableName);
 94 | 			keys.put(tableName, new HashSet<String>());
 95 | 		    while (rsPrimaryKey.next()) {
 96 | 		    	keys.get(tableName).add(rsPrimaryKey.getString("COLUMN_NAME"));
 97 | 		    }
 98 | 		}
 99 | //		System.out.println(keys);
100 | 	}
101 | 	
102 | 	private void findConnectivity() {
103 | 		connectivity = new HashMap<String, Set<String>>();
104 | 		for (String tableName : tables.keySet()) {
105 | 			connectivity.put(tableName, new HashSet<String>());
106 | 		}
107 | 		for (String table1 : tables.keySet()) {
108 | 			for (String table2 : tables.keySet()) {
109 | 				if (table1.equals(table2)) { continue; }
110 | 				if (!getJoinKeys(table1, table2).isEmpty()) {
111 | 					connectivity.get(table1).add(table2);
112 | 					connectivity.get(table2).add(table1);
113 | 				}
114 | 			}
115 | 		}
116 | 	}
117 | 
118 | 	public Set<String> getJoinKeys(String table1, String table2) {
119 | 		Set<String> table1Keys = keys.get(table1);
120 | 		Set<String> table2Keys = keys.get(table2);
121 | 		if (table1Keys.equals(table2Keys)) { return new HashSet<String>(); }
122 | 		boolean keys1ContainedIn2 = true;
123 | 		for (String table1Key : table1Keys) {
124 | 			if (!tables.get(table2).containsKey(table1Key)) {
125 | 				keys1ContainedIn2 = false;
126 | 				break;
127 | 			}
128 | 		}
129 | 		if (keys1ContainedIn2) { return new HashSet<String>(table1Keys); }
130 | 		
131 | 		boolean keys2ContainedIn1 = true;
132 | 		for (String table2Key : table2Keys) {
133 | 			if (!tables.get(table1).containsKey(table2Key)) {
134 | 				keys2ContainedIn1 = false;
135 | 				break;
136 | 			}
137 | 		}
138 | 		if (keys2ContainedIn1) { return new HashSet<String>(table2Keys); }
139 | 		
140 | 		return new HashSet<String>();
141 | 	}
142 | 	
143 | 	/**
144 | 	 * Return a list of String as join path in the form of:
145 | 	 * <br> table1 table3 table2
146 | 	 * <br> Shortest join path is found using BFS.
147 | 	 * <br> The join keys can be found using {@link #getJoinKeys(String, String)} 
148 | 	 * @param table1
149 | 	 * @param table2
150 | 	 * @return
151 | 	 */
152 | 	public List<String> getJoinPath(String table1, String table2) {
153 | 		if (!tables.containsKey(table1) || !tables.containsKey(table2)) {
154 | 			return new ArrayList<String>();
155 | 		}
156 | 		// Assume table1 and table2 are different.
157 | 		// Find shortest path using BFS.
158 | 		HashMap<String, Boolean> visited = new HashMap<>();
159 | 		for (String tableName : tables.keySet()) {
160 | 			visited.put(tableName, false);
161 | 		}
162 | 		HashMap<String, String> prev = new HashMap<>(); // the parent tableName
163 | 		LinkedList<String> queue = new LinkedList<>();
164 | 		queue.addLast(table1);
165 | 		visited.put(table1, true);
166 | 		boolean found = false;
167 | 		while (!queue.isEmpty() && !found) {
168 | 			String tableCurr = queue.removeFirst();
169 | 			for (String tableNext : connectivity.get(tableCurr)) {
170 | 				if (!visited.get(tableNext)) {
171 | 					visited.put(tableNext, true);
172 | 					queue.addLast(tableNext);
173 | 					prev.put(tableNext, tableCurr);
174 | 				}
175 | 				if (tableNext.equals(table2)) { found = true; }
176 | 			}
177 | 		}
178 | 
179 | 		LinkedList<String> path = new LinkedList<>();
180 | 		if (visited.get(table2)) {
181 | 			String tableEnd = table2; 
182 | 			path.push(tableEnd);
183 | 			while (prev.containsKey(tableEnd)) {
184 | 				tableEnd = prev.get(tableEnd);
185 | 				path.push(tableEnd);
186 | 			}
187 | 		}
188 | 		return path;
189 | 	}
190 | 	
191 | 	public Set<String> getTableNames() {
192 | 		return tables.keySet();
193 | 	}
194 | 	
195 | 	public Set<String> getColumns(String tableName) {
196 | 		return tables.get(tableName).keySet();
197 | 	}
198 | 	
199 | 	public Set<String> getValues(String tableName, String columnName){
200 | 		return tableRows.get(tableName).get(columnName);
201 | 	}
202 | 
203 | 	@Override
204 | 	public String toString() {
205 | 		String s = "";
206 | 		for (String tableName : tables.keySet()) {
207 | 			s += "table: "+tableName+"\n";
208 | 			s += "{";
209 | 			Map<String, String> columns = tables.get(tableName);
210 | 			for (String colName : columns.keySet()) {
211 | 				s += colName+": "+columns.get(colName)+"\t";
212 | 			}
213 | 			s += "}\n\n";
214 | 		}
215 | 		return s;
216 | 	}
217 | 	
218 | 	public static void main(String[] args) throws Exception {
219 | 		Connection connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser");
220 | 		SchemaGraph schema = new SchemaGraph(connection);
221 | 		System.out.println("The join path between article and authorship:");
222 | 		System.out.println(schema.getJoinPath("article", "authorship"));
223 | 		System.out.println("The join path between authorship and article:");
224 | 		System.out.println(schema.getJoinPath("authorship", "article"));
225 | 		System.out.println("The join path between inproceedings and authorship:");
226 | 		System.out.println(schema.getJoinPath("inproceedings", "authorship"));
227 | 		System.out.println("The join path between article and inproceedings:");
228 | 		System.out.println(schema.getJoinPath("article", "inproceedings"));
229 | 		System.out.println("----------------------------------------------");
230 | 		System.out.println("The join keys between article and authorship:");
231 | 		System.out.println(schema.getJoinKeys("article", "authorship"));
232 | 		System.out.println("The join keys between article and inproceedings:");
233 | 		System.out.println(schema.getJoinKeys("article", "inproceedings"));
234 | 		System.out.println("The join keys between inproceedings and authorship:");
235 | 		System.out.println(schema.getJoinKeys("inproceedings", "authorship"));
236 | 	}
237 | }
238 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustorTest.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.util.Collections;
  4 | import java.util.List;
  5 | 
  6 | public class TreeAdjustorTest {
  7 | 	public static void numberOfInvalidNodesTest(){
  8 | 		//construct a tree in the paper, 
  9 | 		//current test case is Figure 3 (a), output should be 3 (node 6 should not be invalid)
 10 | 		ParseTree T = new ParseTree();
 11 | 		Node[] nodes = new Node[9];
 12 | 		
 13 | 		nodes[0] = new Node(0, "ROOT", "--");
 14 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
 15 | 		nodes[1] = new Node(1, "return", "--");
 16 | 		nodes[1].info = new NodeInfo("SN","SELECT");
 17 | 		nodes[2] = new Node(2, "author", "--");
 18 | 		nodes[2].info = new NodeInfo("NN", "Author");
 19 | 		nodes[3] = new Node(3, "paper", "--");
 20 | 		nodes[3].info = new NodeInfo("NN", ">");
 21 | 		nodes[4] = new Node(4, "more", "--");
 22 | 		nodes[4].info = new NodeInfo("ON", "Title");
 23 | 		nodes[5] = new Node(5, "Bob", "--");
 24 | 		nodes[5].info = new NodeInfo("VN", "Author");
 25 | 		nodes[6] = new Node(6, "VLDB", "--");
 26 | 		nodes[6].info = new NodeInfo("VN", "Journal");
 27 | 		nodes[7] = new Node(7, "after", "--");
 28 | 		nodes[7].info = new NodeInfo("ON", ">");
 29 | 		nodes[8] = new Node(8, "2000", "--");
 30 | 		nodes[8].info = new NodeInfo("VN", "Year");
 31 | 		
 32 | 		T.root = nodes[0];
 33 | 		nodes[0].children.add(nodes[1]);
 34 | 		nodes[1].parent = nodes[0];
 35 | 		nodes[1].children.add(nodes[2]);
 36 | 		nodes[2].parent = nodes[1];
 37 | 		nodes[2].children.add(nodes[3]);
 38 | 		nodes[3].parent = nodes[2];
 39 | 		nodes[2].children.add(nodes[5]);
 40 | 		nodes[5].parent = nodes[2];
 41 | 		nodes[2].children.add(nodes[7]);
 42 | 		nodes[7].parent = nodes[2];
 43 | 		nodes[3].children.add(nodes[4]);
 44 | 		nodes[4].parent = nodes[3];
 45 | 		nodes[5].children.add(nodes[6]);
 46 | 		nodes[6].parent = nodes[5];
 47 | 		nodes[7].children.add(nodes[8]);
 48 | 		nodes[8].parent = nodes[7];
 49 | 		
 50 | 		System.out.println("===========test for Running SyntacticEvaluator.numberOfInvalidNodes===========");
 51 | 		System.out.println("Input tree: "+T.toString());
 52 | 		System.out.println("Number of Invalid nodes: "+SyntacticEvaluator.numberOfInvalidNodes(T)+"\n");
 53 | 		System.out.println("Invalid nodes: ");
 54 | 		for (int i = 1; i < nodes.length; i++){
 55 | 			if (nodes[i].isInvalid)
 56 | 				System.out.println(nodes[i]);
 57 | 		}
 58 | 	}
 59 | 	
 60 | 	public static void mergeLNQNTest() {
 61 | 		ParseTree T = new ParseTree();
 62 | 		Node[] nodes = new Node[9];
 63 | 		
 64 | 		nodes[0] = new Node(0, "ROOT", "--");
 65 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
 66 | 		nodes[1] = new Node(1, "return", "--");
 67 | 		nodes[1].info = new NodeInfo("SN","SELECT");
 68 | 		nodes[2] = new Node(2, "conference", "--");
 69 | 		nodes[2].info = new NodeInfo("NN", "Author");
 70 | 		nodes[3] = new Node(3, "area", "--");
 71 | 		nodes[3].info = new NodeInfo("NN", "Title");
 72 | 		nodes[4] = new Node(4, "each", "--");
 73 | 		nodes[4].info = new NodeInfo("QN", ">");
 74 | 		nodes[5] = new Node(5, "papers", "--");
 75 | 		nodes[5].info = new NodeInfo("NN", "Author");
 76 | 		nodes[6] = new Node(6, "citations", "--");
 77 | 		nodes[6].info = new NodeInfo("NN", "Journal");
 78 | 		nodes[7] = new Node(7, "most", "--");
 79 | 		nodes[7].info = new NodeInfo("FN", ">");
 80 | 		nodes[8] = new Node(8, "total", "--");
 81 | 		nodes[8].info = new NodeInfo("FN", "Year");
 82 | 		
 83 | 		T.root = nodes[0];
 84 | 		nodes[0].children.add(nodes[1]);
 85 | 		nodes[1].parent = nodes[0];
 86 | 		nodes[1].children.add(nodes[2]);
 87 | 		nodes[2].parent = nodes[1];
 88 | 		nodes[2].children.add(nodes[3]);
 89 | 		nodes[3].parent = nodes[2];
 90 | 		nodes[2].children.add(nodes[5]);
 91 | 		nodes[5].parent = nodes[2];
 92 | 		nodes[3].children.add(nodes[4]);
 93 | 		nodes[4].parent = nodes[3];
 94 | 		nodes[5].children.add(nodes[6]);
 95 | 		nodes[6].parent = nodes[5];
 96 | 		nodes[6].children.add(nodes[7]);
 97 | 		nodes[7].parent = nodes[6];
 98 | 		nodes[6].children.add(nodes[8]);
 99 | 		nodes[8].parent = nodes[6];
100 | 		
101 | 		System.out.println("===========test for Running mergeLNQN===========");
102 | 		System.out.println("Input tree: "+T.toString());
103 | 		ParseTree tree = T.mergeLNQN();
104 | 		System.out.println("Output tree: "+tree.toString());
105 | 	}
106 | 	
107 | 	public static void adjustTest(){
108 | 		ParseTree T = new ParseTree();
109 | 		Node[] nodes = new Node[9];
110 | 		
111 | 		nodes[0] = new Node(0, "ROOT", "--");
112 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
113 | 		nodes[1] = new Node(1, "return", "--");
114 | 		nodes[1].info = new NodeInfo("SN","SELECT");
115 | 		nodes[2] = new Node(2, "conference", "--");
116 | 		nodes[2].info = new NodeInfo("NN", "Author");
117 | 		nodes[3] = new Node(3, "area", "--");
118 | 		nodes[3].info = new NodeInfo("NN", "Title");
119 | 		nodes[4] = new Node(4, "each", "--");
120 | 		nodes[4].info = new NodeInfo("QN", ">");
121 | 		nodes[5] = new Node(5, "papers", "--");
122 | 		nodes[5].info = new NodeInfo("NN", "Author");
123 | 		nodes[6] = new Node(6, "citations", "--");
124 | 		nodes[6].info = new NodeInfo("NN", "Journal");
125 | 		nodes[7] = new Node(7, "most", "--");
126 | 		nodes[7].info = new NodeInfo("FN", ">");
127 | 		nodes[8] = new Node(8, "total", "--");
128 | 		nodes[8].info = new NodeInfo("FN", "Year");
129 | 		
130 | 		T.root = nodes[0];
131 | 		nodes[0].children.add(nodes[1]);
132 | 		nodes[1].parent = nodes[0];
133 | 		nodes[1].children.add(nodes[2]);
134 | 		nodes[2].parent = nodes[1];
135 | 		nodes[2].children.add(nodes[3]);
136 | 		nodes[3].parent = nodes[2];
137 | 		nodes[2].children.add(nodes[5]);
138 | 		nodes[5].parent = nodes[2];
139 | 		nodes[3].children.add(nodes[4]);
140 | 		nodes[4].parent = nodes[3];
141 | 		nodes[5].children.add(nodes[6]);
142 | 		nodes[6].parent = nodes[5];
143 | 		nodes[6].children.add(nodes[7]);
144 | 		nodes[7].parent = nodes[6];
145 | 		nodes[6].children.add(nodes[8]);
146 | 		nodes[8].parent = nodes[6];
147 | 		
148 | 		System.out.println("===========test for Running adjust() in TreeAdjustor===========");
149 | 		System.out.println("Input tree: "+T.toString());
150 | 		List<ParseTree> treeList = TreeAdjustor.adjust(T);
151 | 		System.out.println("Output size: "+treeList.size());
152 | 		System.out.println("Output trees:");
153 | 		for (int j = 0; j < treeList.size(); j++){
154 | 		System.out.println("Tree "+j+" :");
155 | 		System.out.println(treeList.get(j));
156 | 		}
157 | 	}
158 | 	
159 | 	public static void getAdjustedTreesTest(){
160 | 		ParseTree T = new ParseTree();
161 | 		Node[] nodes = new Node[8];
162 | 		
163 | 		nodes[0] = new Node(0, "ROOT", "--");
164 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
165 | 		nodes[1] = new Node(1, "return", "--");
166 | 		nodes[1].info = new NodeInfo("SN","SELECT");
167 | 		nodes[2] = new Node(2, "conference", "--");
168 | 		nodes[2].info = new NodeInfo("NN", "Author");
169 | 		nodes[3] = new Node(3, "area", "--");
170 | 		nodes[3].info = new NodeInfo("NN", "Title");
171 | 		nodes[4] = new Node(4, "papers", "--");
172 | 		nodes[4].info = new NodeInfo("NN", "Author");
173 | 		nodes[5] = new Node(5, "citations", "--");
174 | 		nodes[5].info = new NodeInfo("NN", "Journal");
175 | 		nodes[6] = new Node(6, "most", "--");
176 | 		nodes[6].info = new NodeInfo("FN", ">");
177 | 		nodes[7] = new Node(7, "total", "--");
178 | 		nodes[7].info = new NodeInfo("FN", "Year");
179 | 		
180 | 		T.root = nodes[0];
181 | 		nodes[0].children.add(nodes[1]);
182 | 		nodes[1].parent = nodes[0];
183 | 		nodes[1].children.add(nodes[2]);
184 | 		nodes[2].parent = nodes[1];
185 | 		nodes[2].children.add(nodes[3]);
186 | 		nodes[3].parent = nodes[2];
187 | 		nodes[2].children.add(nodes[4]);
188 | 		nodes[4].parent = nodes[2];
189 | 		nodes[4].children.add(nodes[5]);
190 | 		nodes[5].parent = nodes[4];
191 | 		nodes[5].children.add(nodes[6]);
192 | 		nodes[6].parent = nodes[5];
193 | 		nodes[5].children.add(nodes[7]);
194 | 		nodes[7].parent = nodes[5];
195 | 		
196 | 		System.out.println("===========test for Running getAdjustedTrees() in TreeAdjustor===========");
197 | 		System.out.println("The original tree:");
198 | 		System.out.println(T);
199 | 		System.out.println("Number of possible trees for choice:");
200 | 		List<ParseTree> result = TreeAdjustor.getAdjustedTrees(T);
201 | 		System.out.println(result.size());
202 | 		Collections.sort(result, (t1, t2) -> (- t1.getScore() + t2.getScore()));
203 | 		System.out.println("The three trees with highest scores look like:");
204 | 		for (int i = 0; i < 5; i++) {
205 | 			System.out.println(result.get(i));
206 | 		}
207 | 	}
208 | 	
209 | 	public static void testAddON (){
210 | 		ParseTree T = new ParseTree();
211 | 		Node[] nodes = new Node[8];
212 | 		
213 | 		nodes[0] = new Node(0, "ROOT", "--");
214 | 		nodes[0].info = new NodeInfo("ROOT","ROOT");
215 | 		nodes[1] = new Node(1, "return", "--");
216 | 		nodes[1].info = new NodeInfo("SN","SELECT");
217 | 		nodes[2] = new Node(2, "conference", "--");
218 | 		nodes[2].info = new NodeInfo("NN", "Author");
219 | 		nodes[3] = new Node(3, "area", "--");
220 | 		nodes[3].info = new NodeInfo("NN", "Title");
221 | 		nodes[4] = new Node(4, "papers", "--");
222 | 		nodes[4].info = new NodeInfo("NN", "Author");
223 | 		nodes[5] = new Node(5, "citations", "--");
224 | 		nodes[5].info = new NodeInfo("NN", "Journal");
225 | 		nodes[6] = new Node(6, "most", "--");
226 | 		nodes[6].info = new NodeInfo("FN", ">");
227 | 		nodes[7] = new Node(7, "total", "--");
228 | 		nodes[7].info = new NodeInfo("FN", "Year");
229 | 		
230 | 		T.root = nodes[0];
231 | 		nodes[0].children.add(nodes[1]);
232 | 		nodes[1].parent = nodes[0];
233 | 		nodes[1].children.add(nodes[2]);
234 | 		nodes[2].parent = nodes[1];
235 | 		nodes[2].children.add(nodes[3]);
236 | 		nodes[3].parent = nodes[2];
237 | 		nodes[2].children.add(nodes[4]);
238 | 		nodes[4].parent = nodes[2];
239 | 		nodes[4].children.add(nodes[5]);
240 | 		nodes[5].parent = nodes[4];
241 | 		nodes[5].children.add(nodes[6]);
242 | 		nodes[6].parent = nodes[5];
243 | 		nodes[5].children.add(nodes[7]);
244 | 		nodes[7].parent = nodes[5];
245 | 		
246 | 		System.out.println("===========test for Running addON() in ParseTree===========");
247 | 		System.out.println("The original tree:");
248 | 		System.out.println(T);
249 | 		ParseTree tree = T.addON();
250 | 		System.out.println("After adding ON:");
251 | 		System.out.println(tree);
252 | 		System.out.println("The original tree:");
253 | 		System.out.println(T);
254 | 	}
255 | 
256 | 	public static void main(String[] args) {
257 | //		numberOfInvalidNodesTest();
258 | //		mergeLNQNTest();
259 | //		adjustTest();
260 | 		getAdjustedTreesTest();
261 | //		testAddON();
262 | 	}
263 | 
264 | }
265 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, com.dukenlidb.nlidb.service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/doc/report/midterm/midterm.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[twocolumn]{article}
  2 | 
  3 | % Feel free to add more packages
  4 | \usepackage{float, amsmath, amssymb, mathtools}
  5 | \usepackage{graphicx, caption, color}
  6 | \usepackage{tabularx, fullpage}
  7 | %\usepackage{kotex}
  8 | %\usepackage{multicol}
  9 | \setlength{\columnsep}{1cm}
 10 | \usepackage{comment, cite, wrapfig}
 11 | \usepackage[utf8]{inputenc}
 12 | \usepackage[hidelinks]{hyperref}
 13 | \usepackage{courier}
 14 | %\usepackage{geometry}
 15 | \hypersetup{breaklinks=true}
 16 | \urlstyle{same}
 17 | 
 18 | \newcommand{\red}[1]{{\bf \color{red}#1}}
 19 | \newcommand{\blue}[1]{{\bf \color{blue}#1}}
 20 | \newcommand{\cut}[1]{}
 21 | 
 22 | 
 23 | \begin{document}
 24 | 
 25 | \title{Natural Language Interface for Relational Database\\
 26 | 	\small{Midterm Report}}
 27 | 
 28 | %Authors in alphabetical order of last names
 29 | \author{Yilin Gao \\
 30 | 	\small \texttt{yilin.gao@duke.edu} \and 
 31 | 	Keping Wang \\
 32 | 	\small \texttt{kw238@duke.edu} \and 
 33 | 	Chengkang Xu \\
 34 | 	\small \texttt{cx33@duke.edu} }
 35 | 	
 36 | \date{\today}
 37 | \maketitle
 38 | 
 39 | %%%================================================================%%%
 40 | \section{Introduction}\label{sec:introduction}
 41 | 
 42 | Writing SQL queries can be difficult, especially when it involves complex logic. As more and more non-expert users are accessing relational databases, it is very important to simplify their process of writing SQL queries. This project is going to build a Natural Language Interface for relational DataBases (NLIDB), closely following Li and Jagadish (2014)\cite{li2014}. NLIDB will be a tool for everyone to query data easily from relational databases.
 43 | 
 44 | Translating natural language into an SQl query isn't an easy job. Not only because of the disambiguity of natural language, but also that users may make mistakes in writing natural language input, such as mis-spelling. We want the users to feel at ease using our interface, not afraid of being mis-interpreted by the NLIDB, even if they cannot remember the exact names of the database column names. So we follow Li and Jagadish (2014)\cite{li2014} to use an interactive interface to let user make choices in several ambiguous phases of the translation.
 45 | 
 46 | The com.dukenlidb.nlidb.main components for translating a natural language to an SQL query are as follows:
 47 | 
 48 | \begin{enumerate}
 49 |   \item Parse the natural language input into a parse tree using dependency syntax parser.
 50 |   \item Map the nodes in the parse tree to SQL keywords, table names, column names, and values. Here users may choose the desired mapping from ranked options.
 51 |   \item Adjust the structure of the parse tree to make it follow the structure of an SQL query. Here users may choose the desired structure from ranked options.
 52 |   \item Translate the parse tree to an SQL query.
 53 | \end{enumerate}
 54 | 
 55 | Up until this midterm report, we have completed steps 1 - 2 above. We have built an interactive graphical user interface (GUI), and established connection with a database to experiment with the two steps. Now the user can already choose the desired node mappings from the choices offered by our application. 
 56 | 
 57 | Before the final report, we will finish steps 3 - 4 and tune the com.dukenlidb.nlidb.model with some hand-written natural language and SQL query pairs.
 58 | 
 59 | %%%================================================================%%%
 60 | \section{Related Work}
 61 | 
 62 | Early day NLIDB systems were usually based on small scale database, which requires a small set of supported queries. Their parsing mechanism could only support ad-hoc methods and rules. Thus, early work would produce ambiguity if the database is scalable and natural language queries are "open-domain". Moreover, without the help of machine learning, early NLIDB systems cannot update their parsing methods as they accumulate more data.\cite{QATutorial}
 63 | 
 64 | Our approach involves machine learning in parsing the natural language input into a parse tree. Then we adjust the structure of the parse tree to obey with the SQL syntax. Our approach can handle natural language input with more complicated structures than the simple key-word matching method. 
 65 | 
 66 | NLIDB here is a concrete application of the natural language QA systems.\cite{QATutorial} Currently, the mainstream approach for QA is the semantic parsing of questions. It can map natural language questions to logic forms or structured queries, and produce accurate answers when the query is complete and clear. However, the accuracy of answers will decrease if the input language is ambiguous, or if the logic relationship of the query is complicated. Due to our lack of training data, our NLIDB system cannot adopt the popular RNN (LSTM) for a direct and efficient translation. Still we are trying to allow more input ambiguity and structural complexity by letting the users choose the mappings and structures interactively.
 67 | 
 68 | %%%================================================================%%%
 69 | \section{Problem Definition}
 70 | 
 71 | For this NLIDB, we have to first develop a GUI, and then design the \texttt{ParseTree} class. Then we need to develop parse tree node mapper, parse tree structure adjuster, and SQL query translator.
 72 | 
 73 | There are three com.dukenlidb.nlidb.main problems that we face. The first one is the use of data structure in \texttt{ParseTree}. The second is what algorithms to use for each phase of the translation. The last problem is to specify the rules for different phases, such as what word should be mapped to the ``SELECT'' key word, and what rules should a legal parse tree follow before being translated to an SQL query.
 74 | 
 75 | %%%================================================================%%%
 76 | \section{Algorithms}
 77 | 
 78 | In the natural language parsing phase, we use the feature based pos-tagger\cite{toutanova2003feature} and the neural network dependency parser\cite{chen2014fast} from the Stanford NLP package.
 79 | 
 80 | In the node mapping phase, other than mapping words with hard coded rules, we compare words with table and column names in the database, which requires a word similarity score. The similarity score is the maximum of two subscores. The first is lexical similarity (similarity in spelling), which is Jaccord coefficient here. The second is semantic similarity, for which we use WUP similarity\cite{wu1994verbs}. To compute WUP similarity, we have to do a breadth-first-search to find the lowest common ancestor of two words in the WordNet. The calculation of word similarity will be explained in detail in the next section.
 81 | 
 82 | More algorithms will be needed for parse tree adjustment and SQL query generation in the future.
 83 | 
 84 | %%%================================================================%%%
 85 | \section{System Design}
 86 | 
 87 | \begin{figure*}[ht]
 88 |   \centering
 89 |   \includegraphics[width=0.8\linewidth]{figures/nlidb_system_diagram.pdf}
 90 |   \caption{System Diagram}
 91 | \end{figure*}
 92 | 
 93 | Our current system is implemented in Java, using maven as the project management tool. The source code are divided into three parts: com.dukenlidb.nlidb.model, control, and view. The com.dukenlidb.nlidb.model part takes care of how to realize major functions of the natural language database interface, like parsing natural language, mapping nodes, adjusting node tree structure, and translating the tree into SQL query. The controller wraps many models as attribute variables, and it takes charge of the interaction between database and the view (GUI). And the view part uses JavaFX to design a GUI.
 94 | 
 95 | Figure 1 is a diagram of our system. The boxes with solid frame lines are the ones we've already written, and the boxes with dashed frame lines are to be completed in the future. 
 96 | 
 97 | Below we’ll introduce the design ideas on two steps that we’ve completed: parsing natural language into a parse tree, and mapping the nodes of the parse tree to SQL components.
 98 | 
 99 | \subsection{Natural Language Parser}
100 | We write the NLParser class to parse natural language from the user input in GUI to a dependency parse tree. The NLParser is just a wrapper of the Standford NLP pos-tagger and dependency syntax parser. A natural language sentence is first tagged with part-of-speech labels, and then parsed with dependency parser to a ParseTree.
101 | 
102 | A ParseTree consists of an array of Nodes. Each Node has information about the natural language word and its corresponding SQL component. A Node also contains parent and children links pointing to other Nodes in the ParseTree.
103 | 
104 | \subsection{Node Mapper}
105 | Then we map each of the Node into an SQL component. We iterate over the tree and map each Node according to a certain Node Type in Figure 2, according to predefined rules. There are 7 node types in total, and 5 of them, SN, ON, FN, QN, and LN have hard-coded mapping rules. For example, map word “return” or “Return” to an “SN” node with value “SELECT”. A word will first be searched against these five Node Types. If there is no match, the search will go on to the remaining two types, NN and VN.
106 | 
107 | \begin{figure}[ht]
108 |   \centering
109 |   \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_rules.png}
110 |   \caption[caption for nodes mapping rules]{Nodes Mapping Rules\protect\footnotemark}
111 | \end{figure}
112 | \footnotetext{Taken from \cite{li2014}.}
113 | 
114 | The remaining two types, Name Node and Value Node, are decided by searching over the database for matching names or values. The matching of word to names or values are decided by the word similarity score of two words.The word similarity score here is the maximum of semantic similarity and lexical similarity.
115 | 
116 | Semantic similarity is the WUP similarity\cite{wu1994verbs} function using WordNet. WordNet is a net of synonym sets (synsets) connected with semantic and lexical pointers. Two most important semantic pointers are hypernym and hyponym, which connect the synsets to the tree that we are interested here, as Figure 3. In Figure 3, the WUP similarity between $C1$ and $C2$ is:
117 | 
118 | $$ Sim_{WUP} = \frac{2*N3}{N1+N2+2*N3} $$
119 | 
120 | \begin{figure}[ht]
121 |   \centering
122 |   \includegraphics[width=0.7\linewidth]{figures/wordnet_tree.png}
123 |   \caption[caption for wordnet tree]{WUP word similarity.\protect\footnotemark }
124 | \end{figure}
125 | \footnotetext{Taken from \cite{wu1994verbs}.}
126 | 
127 | One thing to note about WordNet is that each word can be in multiple synsets, and each synset can have multiple parents, so we use breadth-first-search to find the lowest of all possible common parents of two words.
128 | 
129 | 
130 | For lexical similarity between two words, we use the Jaccard coefficient:
131 | 
132 | $$ J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$
133 | 
134 | where $A$ and $B$ are the set of characters of the two words respectively. The Jaccard coefficient may not be as good for measuring the lexical similarity of two words (as edit distance), but it is currently still used because it is a measure in range (0,1), which makes it easily compared with the WUP semantic similarity.
135 | 
136 | To search over the database, we first visit the database, retrieve its schema, and store the Schema Graph as an attribute variable in the Controller class, so that each node mapping task don’t have to go through the slow database query. The Schema contains the table names, the column names of each column, and some sample distinct values of each column, such that they can be searched over to map Name Node or Value Node.
137 | 
138 | Once we have the word similarity scores of one word to names and values in database, we rank different mapping choices by their similarity score, and return the highest several choices to the GUI for the user to choose. Here we add another node type for the user to choose from, that is “UNKNOWN”, which means that node doesn’t correspond to any meaningful SQL component. These meaningless nodes will be removed in later steps.
139 | 
140 | Figure 4 is an example of a parse tree with nodes mapped to SQL components. The left part is a parse tree, and the right part is the mappings of all its nodes.
141 | 
142 | \begin{figure}[ht]
143 |   \centering
144 |   \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_example.png}
145 |   \caption[caption for nodes mapping example]{Node Mapping Example.\protect\footnotemark }
146 | \end{figure}
147 | \footnotetext{Taken from \cite{li2014}.}
148 | 
149 | \subsection{Implicit Node}
150 | The com.dukenlidb.nlidb.main idea of inserting implicit node into parse tree is to make sure that two nodes which are being mapped have corresponding schema in database. Assuming invalid nodes are removed from the tree properly, there shoud be a tree with at most three branches. The leftmost tree should contain select node (SN) and name node (NN). If a name node in the left tree does not have ancestor, then it is the core node of the left tree. If the type of core node in left tree is different from right tree, the real core node in right tree is deemed hidden, i.e. implicit. The implicit core node may cause unresonable comparision between two variables of different types due to the change of semantic meaning. An example of implicit node: return all author who wrote more than 100 paper. In our previous process of the parse tree, the right subtree should contain only the number 100, which is a value node (VN). In order to make the tree semantically meaningful, nodes in left subtree are copied over to the right subtree.
151 | 
152 | After inserting name node based on the core node comparision, next step is to check the constrain for both core node. For example: if left core node has constrain of year greater than 2007 and area of "Database" , right core node should also have the same constrain. If right node does not conform to this constrain, then constrains nodes should be copied from the left subtree to right subtree. 
153 | 
154 | Our implementation of processing the implicit nodes insertion starts from the root of the tree. It checks if any nodes below select Node (SN) is missing in the middle treel. If there is, copy it over to the middle tree. Then repeat the same procedure to the middle tree and rightmost subtree. After the name node is copied over, it starts from the middle tree to check if there is any constrain missing in the rightmost tree. If there is, copy those over to the right tree. Finally,  if the root of subtree is an ON (operator node), and the first node connect to root in the subtree is a name node, there may be a function node missing. Our implementation tries to insert a function node in between to make the subtree semantically meaningful. 
155 | 
156 | %%%================================================================%%%
157 | \section{Experiments}
158 | The JavaFX application runs on JVM, and we’ve tested it on an Ubuntu 16.04 machine. We are using JDBC to connect to the PostgreSQL database of dblp, which we used in homework 1.
159 | 
160 | Our program has already finished part of final target. 
161 | 
162 | \begin{figure}[ht]
163 |   \centering
164 |   \includegraphics[width=0.8\linewidth]{figures/program_structure.png}
165 |   \caption{Program Structure}
166 | \end{figure}
167 |   
168 | Figure 5 is a detailed structure on programs that we have already finished or at least conceived.
169 | 
170 | We have programmed a GUI in \texttt{UserView.java} and a connection between database and GUI in \texttt{Controller.java}. To realize natural language query, our first step in implementing the translation process is to parse the natural language into SQL keywords using a predefined natural language parser called Stanford NLP. The parsing process is written in \texttt{NLParser.java} and \texttt{ParserTree.java}. After we get the parser tree, we map each tree node (word in initial natural language input) to certain component of SQL and database. The mapping is written in \texttt{NodeMapper.java}.
171 | 
172 | \begin{figure*}[ht]
173 |   \centering
174 |   \includegraphics[width=0.7\linewidth]{figures/gui_nodes_mapping.png}
175 |   \caption{GUI during Node Mapping}
176 | \end{figure*}
177 | 
178 | Figure 6 is a screenshot of our application during the nodes mapping stage. The upper left part is where the user input comes. The bottom left part is supposed to be the translated SQL query (which hasn’t been completed). The upper left part shows the current information on nodes mapping. The choice box showing “NN: inproceedings.title” contains a drop down list of node types and values for the user to choose from. Once the user confirms the choice by pressing the “confirm choice” button, the app will go on to map the next word. The mapping choices will only be shown to the user if the word doesn’t match with the five predefined node types.
179 | 
180 | As for the node mapper, currently we’ve only defined very limited number of explicit rules for nodes mapping. There are only a few predefined keywords, such as return, equals, all, etc (thus limited SQL query functions as well). We plan to tune the app after we’ve completed writing the whole process of translation. The nodes mapping for name nodes and value nodes doesn’t work perfectly well, maybe in the future we will try some more sensible measures of word similarity. But it is ok for now, since the users can almost always find the right name node or value node from the multiple choices. 
181 | 
182 | As for basic GUI functions, we may need to design a much fancier GUI as the last step of our program.
183 | 
184 | %%%================================================================%%%
185 | \section{Contributions of Project Members}
186 | 
187 | \begin{itemize}
188 | \item {\bf Yilin Gao:} GUI implementation, controller design, report writing.
189 | \item {\bf Keping Wang:} database connection, schema retrieval, Stanford NLP parser usage, parse tree design, word similarity score, report writing.
190 | \item {\bf Chengkang Xu:} node mapping, meaningless nodes removal, inserting implicit nodes, report writing.
191 | \end{itemize}
192 | 
193 | 
194 | \Urlmuskip=0mu plus 1mu\relax
195 | \bibliographystyle{abbrv}
196 | \bibliography{nlidb}
197 | 
198 | \end{document}
199 | 


--------------------------------------------------------------------------------
/src/main/java/com/dukenlidb/nlidb/archive/model/ParseTree.java:
--------------------------------------------------------------------------------
  1 | package com.dukenlidb.nlidb.archive.model;
  2 | 
  3 | import java.io.StringReader;
  4 | import java.util.ArrayList;
  5 | import java.util.Collections;
  6 | import java.util.Iterator;
  7 | import java.util.LinkedList;
  8 | import java.util.List;
  9 | 
 10 | import edu.stanford.nlp.ling.HasWord;
 11 | import edu.stanford.nlp.ling.TaggedWord;
 12 | import edu.stanford.nlp.process.DocumentPreprocessor;
 13 | import edu.stanford.nlp.trees.GrammaticalStructure;
 14 | import edu.stanford.nlp.trees.TypedDependency;
 15 | 
 16 | public class ParseTree implements IParseTree {
 17 | 	
 18 | 	/**
 19 | 	 * Order of parse tree reformulation (used in getAdjustedTrees())
 20 | 	 */
 21 | 	int edit;
 22 | 	// We no longer use an array to store the nodes!
 23 | 	/**
 24 | 	 * Root Node. Supposed to be "ROOT".
 25 | 	 */
 26 | 	Node root;
 27 | 	
 28 | 	/**
 29 | 	 * Empty constructor, only for testing.
 30 | 	 */
 31 | 	public ParseTree() { }
 32 | 	
 33 | 	/**
 34 | 	 * Construct a parse tree using the stanford NLP parser. Only one sentence.
 35 | 	 * Here we are omitting the information of dependency labels (tags).
 36 | 	 * @param text input text.
 37 | 	 */
 38 | 	public ParseTree(String text, NLParser parser) {
 39 | 		// pre-processing the input text
 40 | 		DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
 41 | 		List<HasWord> sentence = null;
 42 | 		for (List<HasWord> sentenceHasWord : tokenizer) {
 43 | 			sentence = sentenceHasWord;
 44 | 			break;
 45 | 		}
 46 | 		// part-of-speech tagging
 47 | 		List<TaggedWord> tagged = parser.tagger.tagSentence(sentence);
 48 | 		// dependency syntax parsing
 49 | 		GrammaticalStructure gs = parser.parser.predict(tagged);
 50 | 		
 51 | 		// Reading the parsed sentence into ParseTree
 52 | 		int N = sentence.size()+1;
 53 | 		Node[] nodes = new Node[N];
 54 | 		root = new Node(0, "ROOT", "ROOT");
 55 | 		nodes[0] = root;
 56 | 		for (int i = 0; i < N-1; i++) {
 57 | 			nodes[i+1] = new Node(i+1, 
 58 | 					sentence.get(i).word(), tagged.get(i).tag());
 59 | 		}
 60 | 		for (TypedDependency typedDep : gs.allTypedDependencies()) {
 61 | 			int from = typedDep.gov().index();
 62 | 			int to   = typedDep.dep().index();
 63 | 			// String label = typedDep.reln().getShortName(); // omitting the label
 64 | 			nodes[to].parent = nodes[from];
 65 | 			nodes[from].children.add(nodes[to]);
 66 | 		}
 67 | 	}
 68 | 
 69 | 	public ParseTree(Node node) {
 70 | 		root = node.clone();
 71 | 	}
 72 | 	public ParseTree(ParseTree other) {
 73 | 		this(other.root);
 74 | 	}
 75 | 	
 76 | 	@Override
 77 | 	public int size() {
 78 | 		return root.genNodesArray().length;
 79 | 	}
 80 | 
 81 | 	@Override
 82 | 	public int getEdit() {
 83 | 		return edit;
 84 | 	}
 85 | 	
 86 | 	@Override
 87 | 	public void setEdit(int edit){
 88 | 		this.edit = edit;
 89 | 	}
 90 | 	
 91 | 	/**
 92 | 	 * Helper method for {@link #removeMeaninglessNodes()}.
 93 | 	 * (1) If curr node is meaning less, link its children to its parent.
 94 | 	 * (2) Move on to remove the meaningless nodes of its children.
 95 | 	 */
 96 | 	private void removeMeaninglessNodes(Node curr) {
 97 | 		if (curr == null) { return; }
 98 | 		List<Node> currChildren = new ArrayList<>(curr.getChildren());
 99 | 		for (Node child : currChildren) {
100 | 			removeMeaninglessNodes(child);
101 | 		}
102 | 		if (curr != root && curr.getInfo().getType().equals("UNKNOWN")) {
103 | 			curr.parent.getChildren().remove(curr);
104 | 			for (Node child : curr.getChildren()) {
105 | 				curr.parent.getChildren().add(child);
106 | 				child.parent = curr.parent;
107 | 			}	
108 | 		}
109 | 
110 | 	}
111 | 	
112 | 	/**
113 | 	 * Remove a node from tree if its NodeInfo is ("UNKNOWN", "meaningless").
114 | 	 * To remove the meaningless node, link the children of this node
115 | 	 * to its parent.
116 | 	 */
117 | 	@Override
118 | 	public void removeMeaninglessNodes() {
119 | 		if (root.getChildren().get(0).getInfo() == null) {
120 | 			System.out.println("ERR! Node info net yet mapped!");
121 | 		}
122 | 		// Remove meaningless nodes.
123 | 		removeMeaninglessNodes(root);
124 | 	}
125 | 	
126 | 	@Override
127 | 	
128 | 	public void insertImplicitNodes() {
129 | 
130 | 		List <Node> childrenOfRoot = root.getChildren();
131 | 		
132 | 		// no condition
133 | 		if (childrenOfRoot.size() <= 1) {
134 | 			
135 | 			
136 | 			return;
137 | 		}
138 | 		
139 | 		//phase 1, add nodes under select to left subtree
140 | 		
141 | 		System.out.println("Phase 1, add nodes under select node to left subtree");
142 | 
143 | 		int IndexOfSN = 0;
144 | 		for (int i = 0; i < childrenOfRoot.size(); i ++) {
145 | 			
146 | 			if (childrenOfRoot.get(i).getInfo().getType().equals("SN")) {
147 | 				
148 | 				IndexOfSN = i;
149 | 				break;
150 | 			}
151 | 		}
152 | 
153 | 		//start from the name node 
154 | 
155 | 		Node SN = childrenOfRoot.get(IndexOfSN);
156 | 		List <Node> SN_children = SN.getChildren();
157 | 
158 | 		int IndexOfSN_NN = 0;
159 | 
160 | 
161 | 		for (int i = 0; i < SN_children.size(); i ++) {
162 | 
163 | 			if (SN_children.get(i).getInfo().getType().equals("NN")) {
164 | 
165 | 				IndexOfSN_NN = i;
166 | 				break;
167 | 			}
168 | 		}
169 | 
170 | 		//add them to left subtree of all branches
171 | 
172 | 		Node copy;
173 | 		int indexOfAppendedNode; 
174 | 		Node SN_NN = SN_children.get(IndexOfSN_NN);
175 | 
176 | 		for (int i = 0; i < childrenOfRoot.size(); i ++) {
177 | 
178 | 			if (i != IndexOfSN) {
179 | 				
180 | 				Node [] nodes_SN_NN = childrenOfRoot.get(i).genNodesArray();
181 | 				indexOfAppendedNode = nameNodeToBeAppended(nodes_SN_NN);
182 | 
183 | 				if (indexOfAppendedNode != -1) {
184 | 
185 | 					copy = SN_NN.clone();
186 | 					copy.setOutside(true);
187 | 
188 | 					nodes_SN_NN[indexOfAppendedNode].setChild(copy);
189 | 					copy.setParent(nodes_SN_NN[indexOfAppendedNode]);
190 | 				}
191 | 			}
192 | 		}
193 | 		
194 | 		System.out.println(toString() + '\n');
195 | 		
196 | 		
197 | 		//phase 2, compare left core node with right core node
198 | 		
199 | 		System.out.println("Phase 2, core node insertion");
200 | 
201 | 		int indexOfRightCoreNode = -1;
202 | 		int indexOfLeftCoreNode = -1;
203 | 
204 | 		for (int i = 0; i < childrenOfRoot.size(); i ++) {
205 | 			
206 | 			if (i != IndexOfSN) {
207 | 				
208 | 				Node [] nodes = childrenOfRoot.get(i).genNodesArray();
209 | 				int startOfRightBranch = endOfLeftBranch(nodes) + 1;
210 | 				int sizeOfRightTree = nodes[startOfRightBranch].getChildren().size() + 1;
211 | 
212 | 				//if right tree only contains numbers, skip it
213 | 
214 | 				if (sizeOfRightTree != 1 || !isNumeric(nodes[startOfRightBranch].getWord())) {
215 | 
216 | 					indexOfLeftCoreNode = coreNode(nodes, true);
217 | 					indexOfRightCoreNode = coreNode(nodes, false);
218 | 
219 | 					//if left core node exists
220 | 
221 | 					if (indexOfLeftCoreNode != -1) {
222 | 
223 | 						boolean doInsert = false;
224 |  
225 | 						//if right subtree neither have core node nor it only contains number
226 | 						if (indexOfRightCoreNode == -1) {
227 | 
228 | 							//copy core node only
229 | 						
230 | 							doInsert = true;
231 | 						}
232 | 
233 | 						//if right core node & left core node are different schema
234 | 
235 | 						else if (!nodes[indexOfRightCoreNode].getInfo().
236 | 								 ExactSameSchema(nodes[indexOfLeftCoreNode].getInfo())) {
237 | 
238 | 							//copy core node only
239 | 
240 | 							doInsert = true;
241 | 						}
242 | 
243 | 						if (doInsert) {
244 | 
245 | 							copy = nodes[indexOfLeftCoreNode].clone();
246 | 							copy.children = new ArrayList<Node>();
247 | 							copy.setOutside(true);
248 | 							
249 | 							
250 | 							boolean insertAroundFN = false;
251 | 
252 | 							int indexOfNewRightCN = IndexToInsertCN(nodes);
253 | 
254 | 							if (indexOfNewRightCN == -1) {
255 | 
256 | 								for (int j = nodes.length - 1; j >  endOfLeftBranch(nodes); j --) {
257 | 
258 | 									if (nodes[j].getInfo().getType().equals("FN")) {
259 | 
260 | 										indexOfNewRightCN = j + 1;
261 | 										insertAroundFN = true;
262 | 										break;
263 | 									}
264 | 								}
265 | 							}
266 | 							
267 | 							if (insertAroundFN) {
268 | 
269 | 								//THIS ONLY HANDLES FN NODE HAS NO CHILD OR ONE NAME NODE CHILD
270 | 
271 | 								List <Node> FN_children = nodes[indexOfNewRightCN - 1].getChildren();
272 | 								
273 | 								for (int j = 0; j < FN_children.size(); j ++) {
274 | 									
275 | 									copy.setChild(FN_children.get(j));
276 | 									FN_children.get(j).setParent(copy);
277 | 								}
278 | 
279 | 								copy.setParent(nodes[indexOfNewRightCN - 1]);
280 | 								nodes[indexOfNewRightCN - 1].children = new ArrayList<Node>();
281 | 								nodes[indexOfNewRightCN - 1].setChild(copy); 
282 | 							}
283 | 
284 | 							else {
285 | 								
286 | 								//if right subtree only contains VN, adjust index
287 | 
288 | 								if (indexOfNewRightCN == -1) {
289 | 
290 | 									indexOfNewRightCN = endOfLeftBranch(nodes) + 1;
291 | 								}
292 | 
293 | 								copy.setChild(nodes[indexOfNewRightCN]);
294 | 								copy.setParent(nodes[indexOfNewRightCN].getParent());
295 | 								nodes[indexOfNewRightCN].getParent().removeChild(nodes[indexOfNewRightCN]);
296 | 								nodes[indexOfNewRightCN].getParent().setChild(copy);
297 | 								nodes[indexOfNewRightCN].setParent(copy);
298 | 								
299 | 							}
300 | 						}
301 | 						
302 | 						System.out.println(toString());
303 | 
304 | 						//phase 3, map each NV under left core node to right core node
305 | 						
306 | 						System.out.println("Phase 3, transfer constrain nodes from left to right");
307 | 						
308 | 						List <Node> NV_children_left = nodes[indexOfLeftCoreNode].getChildren();
309 | 
310 | 						for (int j = 0; j < NV_children_left.size(); j ++) {
311 | 
312 | 							Node [] nodes_new = childrenOfRoot.get(i).genNodesArray();
313 | 							indexOfRightCoreNode = coreNode(nodes_new, false);
314 | 							List <Node> NV_children_right = nodes_new[indexOfRightCoreNode].getChildren();
315 | 
316 | 							boolean found_NV = false;
317 | 
318 | 							Node curr_left = NV_children_left.get(j);
319 | 							String curr_left_type = curr_left.getInfo().getType();
320 | 
321 | 							for (int k = 0; k < NV_children_right.size(); k ++) {
322 | 
323 | 								//compare
324 | 
325 | 								Node curr_right = NV_children_right.get(k);
326 | 
327 | 								//strictly compare, exact match ON
328 | 
329 | 								if (curr_left_type.equals("ON")) {
330 | 
331 | 									if (curr_left.equals(curr_right)) {
332 | 
333 | 										found_NV = true;
334 | 										break;
335 | 									}
336 | 								}
337 | 
338 | 								else {
339 | 
340 | 									if (curr_left.getInfo().sameSchema(curr_right.getInfo())) {
341 | 
342 | 										found_NV = true;
343 | 										break;
344 | 									}
345 | 								}
346 | 							}
347 | 
348 | 							if (!found_NV) {
349 | 
350 | 								//insert
351 | 
352 | 								copy = curr_left.clone();
353 | 								nodes_new[indexOfRightCoreNode].setChild(copy);
354 | 								copy.setOutside(true);
355 | 								copy.setParent(nodes_new[indexOfRightCoreNode]);
356 | 							}
357 | 						}
358 | 
359 | 						System.out.println(toString());
360 | 						
361 | 						//phase 4, insert function node
362 | 						
363 | 						System.out.println("Phase 4, insert missing function node");
364 | 
365 | 						Node [] nodes_final_temp = childrenOfRoot.get(i).genNodesArray();
366 | 
367 | 						int indexOfLeftFN_Tail = -1;
368 | 
369 | 						for (int j = indexOfLeftCoreNode; j > 0; j --) {
370 | 
371 | 							if (nodes_final_temp[j].getInfo().getType().equals("FN")) {
372 | 
373 | 								indexOfLeftFN_Tail = j;
374 | 								break;
375 | 							}
376 | 						}
377 | 
378 | 						if (indexOfLeftFN_Tail != -1) {
379 | 
380 | 							//ASSUMPTION: if FN exists, it always before core node
381 | 
382 | 							for (int k = 1; k < indexOfLeftFN_Tail + 1; k ++) {
383 | 
384 | 								Node [] nodes_final = childrenOfRoot.get(i).genNodesArray();
385 | 								indexOfRightCoreNode = coreNode(nodes_final, false);
386 | 
387 | 								boolean found_FN = false;
388 | 
389 | 								for (int j = endOfLeftBranch(nodes_final) + 1; j < indexOfRightCoreNode; j ++) {
390 | 
391 | 									if (nodes_final[j].getInfo().ExactSameSchema(nodes_final[k].getInfo())) {
392 | 
393 | 										found_FN = true;
394 | 									}
395 | 								}
396 | 
397 | 								if(!found_FN) {
398 | 									copy = nodes_final[k].clone();
399 | 									copy.setOutside(true);
400 | 									copy.children = new ArrayList<Node>();
401 | 
402 | 									nodes[0].removeChild(nodes_final[endOfLeftBranch(nodes_final) + 1]);
403 | 									nodes[0].setChild(copy);
404 | 
405 | 									copy.setParent(nodes[0]);
406 | 									copy.setChild(nodes[endOfLeftBranch(nodes_final) + 1]);
407 | 									nodes[endOfLeftBranch(nodes_final) + 1].setParent(copy);
408 | 								}
409 | 							}
410 | 						}
411 | 						System.out.println(toString());
412 | 					}
413 | 				}
414 | 			}
415 | 		}
416 | 	}
417 | 
418 | 	/**
419 | 	 * find the index in the right tree to append core node
420 | 	 */
421 | 
422 | 	public int IndexToInsertCN (Node [] nodes) {
423 | 
424 | 
425 | 		for (int i = endOfLeftBranch(nodes) + 1; i < nodes.length; i ++) {
426 | 
427 | 			if (nodes[i].getInfo().getType().equals("NN")) {
428 | 
429 | 				return i;
430 | 			}
431 | 		}
432 | 
433 | 		return -1;
434 | 	}
435 | 
436 | 	/**
437 | 	 * Appending the name node under SELECT to the last name node in leftsubtree
438 | 	 */
439 | 
440 | 	public int nameNodeToBeAppended (Node [] nodes) {
441 | 
442 | 		for (int i = endOfLeftBranch(nodes); i > 0; i --) {
443 | 
444 | 			if (nodes[i].getInfo().getType().equals("NN")) {
445 | 
446 | 				return i;
447 | 			}
448 | 		}
449 | 
450 | 		return -1;
451 | 	}
452 | 	
453 | 	/**
454 | 	 * find the index of the last node in the left subtree
455 | 	 */
456 | 
457 | 	public int endOfLeftBranch (Node [] nodes) {
458 | 
459 | 		for (int i = 2; i < nodes.length; i ++) {
460 | 
461 | 			if(nodes[i].getParent().equals(nodes[0])) {
462 | 				
463 | 				return i - 1;
464 | 			}
465 | 
466 | 		}
467 | 
468 | 		return -1;
469 | 	}
470 | 
471 | 	/**
472 | 	 * check if right branch contains only number
473 | 	 */
474 | 	public boolean isNumeric(String str)  {  
475 |   		try  {  
476 |     		double d = Double.parseDouble(str);  
477 |   		}  
478 |   		catch(NumberFormatException e) {  
479 |     		return false;  
480 |   		}  
481 |   		return true;  
482 | 	}
483 | 
484 | 	/**
485 | 	 * find index of core node
486 | 	 */
487 | 
488 | 	public int coreNode (Node [] nodes, boolean left) {
489 | 
490 | 		int startIndex = 1;
491 | 		int endIndex = endOfLeftBranch(nodes);
492 | 
493 | 		if (!left) {
494 | 
495 | 			startIndex = endOfLeftBranch(nodes) + 1;
496 | 			endIndex = nodes.length - 1;
497 | 		}
498 | 
499 | 		for (int i = startIndex; i <= endIndex; i ++) {
500 | 
501 | 			if (nodes[i].getInfo().getType().equals("NN")) {
502 | 
503 | 				return i;
504 | 			}
505 | 		}
506 | 
507 | 		return -1;
508 | 	}
509 | 
510 | 	
511 | 	@Override
512 | 	public ParseTree mergeLNQN(){   
513 | 		Node[] nodes = this.root.genNodesArray();
514 | 		for (int i=0; i<this.size(); i++){
515 | 			if (nodes[i].getInfo().getType().equals("LN") || nodes[i].getInfo().getType().equals("QN")){
516 | 				String word = "("+nodes[i].getWord()+")";
517 | 				String parentWord = nodes[i].getParent().getWord()+word;
518 | 				nodes[i].getParent().setWord(parentWord);
519 | 				removeNode(nodes[i]);
520 | 			}
521 | 		}
522 | 		ParseTree tree = new ParseTree (root);
523 | 		return tree;
524 | 	}
525 | 
526 | 	private void removeNode (Node curNode) {   //remove this node by changing parent-children relationship
527 | 		curNode.getParent().getChildren().remove(curNode);
528 | 		for (Node child: curNode.getChildren()) {
529 | 			child.setParent(curNode.getParent());
530 | 			curNode.getParent().setChild(child);
531 | 		}
532 | 	}
533 | 	
534 | 	public ParseTree addON(){
535 | 		Node root = this.root.clone();
536 | 		Node on = new Node (0,"equals", "postag");
537 | 		on.info = new NodeInfo ("ON", "=");
538 | 		root.setChild(on);
539 | 		on.setParent(root);	
540 | 		ParseTree tree = new ParseTree(root);
541 | 		return tree;
542 | 	}
543 | 
544 | 	/**
545 | 	 * For now, return the first three trees for choices.
546 | 	 * First order on higher validity score, second order on lower edits.
547 | 	 */
548 | 	@Override
549 | 	public List<ParseTree> getAdjustedTrees() {
550 | 		List<ParseTree> result = TreeAdjustor.getAdjustedTrees(this);
551 | 		Collections.sort(result, (t1, t2) -> {
552 | 			if (t1.getScore() != t2.getScore()) {
553 | 				return - t1.getScore() + t2.getScore();
554 | 			} else {
555 | 				return t1.getEdit() - t2.getEdit();
556 | 			}
557 | 		});
558 | 		return result.subList(0, 4);
559 | 	}	
560 | 	
561 | 	/**
562 | 	 * Only for testing.
563 | 	 * @return
564 | 	 */
565 | 	@Deprecated
566 | 	public SQLQuery translateToSQL() {
567 | 		return translateToSQL(null);
568 | 	}
569 | 	
570 | 	@Override
571 | 	public SQLQuery translateToSQL(SchemaGraph schema) {
572 | 		SQLTranslator translator = new SQLTranslator(root, schema);
573 | 		return translator.getResult(); 
574 | 	}
575 | 
576 | 	@Override
577 | 	public int hashCode() {
578 | 		final int prime = 31;
579 | 		int result = 17;
580 | 		result = prime * result + ((root == null) ? 0 : root.hashCode());
581 | 		return result;
582 | 	}
583 | 
584 | 	@Override
585 | 	public boolean equals(Object obj) {
586 | 		if (this == obj)
587 | 			return true;
588 | 		if (obj == null)
589 | 			return false;
590 | 		if (getClass() != obj.getClass())
591 | 			return false;
592 | 		ParseTree other = (ParseTree) obj;
593 | 		if (root == null) {
594 | 			if (other.root != null)
595 | 				return false;
596 | 		} else if (!root.equals(other.root))
597 | 			return false;
598 | 		return true;
599 | 	}
600 | 
601 | 	/**
602 | 	 * Return an array of nodes in the tree, shallow copy.
603 | 	 * @return
604 | 	 */
605 | 	public Node[] genNodesArray() {
606 | 		return root.genNodesArray();
607 | 	}
608 | 	
609 | 	/**
610 | 	 * Pre-order iterator
611 | 	 * @author keping
612 | 	 */
613 | 	public class ParseTreeIterator implements Iterator<Node> {
614 | 		LinkedList<Node> stack = new LinkedList<>();
615 | 		ParseTreeIterator() {
616 | 			stack.push(root);
617 | 		}
618 | 		@Override
619 | 		public boolean hasNext() {
620 | 			return !stack.isEmpty(); 
621 | 		}
622 | 		@Override
623 | 		public Node next() {
624 | 			Node curr = stack.pop();
625 | 			List<Node> children = curr.getChildren();
626 | 			for (int i = children.size()-1; i >= 0; i--) {
627 | 				stack.push(children.get(i));
628 | 			}
629 | 			return curr;
630 | 		}
631 | 	}
632 | 	
633 | 	/**
634 | 	 * The default iterator in ParseTree returns the Nodes
635 | 	 * using pre-order of the tree.
636 | 	 */
637 | 	@Override
638 | 	public ParseTreeIterator iterator() { return new ParseTreeIterator(); }
639 | 	
640 | 	/**
641 | 	 * Get the natural language sentence corresponding to this
642 | 	 * parse tree.
643 | 	 * @return sentence
644 | 	 */
645 | 	public String getSentence() {
646 | 		StringBuilder sb = new StringBuilder();
647 | 		boolean first = true;
648 | 		for (Node node : this) {
649 | 			if (first) {
650 | 				sb.append(node.getWord());
651 | 				first = false;
652 | 			} else {
653 | 				sb.append(" ").append(node.getWord());
654 | 			}
655 | 		}
656 | 		return sb.toString();
657 | 	}
658 | 	
659 | 	/**
660 | 	 * toString like "curr -> [child1, child2, ...]"
661 | 	 * @param curr
662 | 	 * @return
663 | 	 */
664 | 	private String nodeToString(Node curr) {
665 | 		if (curr == null) { return ""; }
666 | 		String s = curr.toString() + " -> ";
667 | 		s += curr.getChildren().toString() + "\n";
668 | 		for (Node child : curr.getChildren()) {
669 | 			s += nodeToString(child);
670 | 		}
671 | 		return s;
672 | 	}
673 | 	
674 | 	@Override
675 | 	public String toString() {
676 | 		StringBuilder sb = new StringBuilder();
677 | 		sb.append("Sentence: ").append(getSentence()).append("\n");
678 | 		sb.append(nodeToString(root));
679 | 		return sb.toString();
680 | 	}
681 | 	
682 | 	/**
683 | 	 * Score of a tree measures the syntactic legality of 
684 | 	 * the tree. It is negative number of Invalid nodes.
685 | 	 * @return
686 | 	 */
687 | 	public int getScore(){
688 | 		return - SyntacticEvaluator.numberOfInvalidNodes(this);
689 | 	}
690 | 	
691 | }
692 | 


--------------------------------------------------------------------------------