├── src ├── main │ ├── resources │ │ ├── public │ │ │ └── .gitkeep │ │ ├── application-dev.properties │ │ └── application.properties │ └── java │ │ └── com │ │ └── dukenlidb │ │ └── nlidb │ │ ├── model │ │ ├── request │ │ │ ├── ExecuteSQLRequest.java │ │ │ ├── TranslateNLRequest.java │ │ │ └── ConnectDBRequest.java │ │ ├── response │ │ │ ├── MessageResponse.java │ │ │ ├── QueryResponse.java │ │ │ ├── TranslateResponse.java │ │ │ ├── ConnectResponse.java │ │ │ └── StatusMessageResponse.java │ │ ├── UserSession.java │ │ └── DBConnectionConfig.java │ │ ├── main │ │ └── Application.java │ │ ├── service │ │ ├── DBConnectionService.java │ │ ├── CookieService.java │ │ ├── RedisService.java │ │ └── SQLExecutionService.java │ │ ├── archive │ │ ├── model │ │ │ ├── NLParser.java │ │ │ ├── WordSimilarity.java │ │ │ ├── QueryTree.java │ │ │ ├── ParserDemo.java │ │ │ ├── ImplicitNodeTest.java │ │ │ ├── IParseTree.java │ │ │ ├── SQLQuery.java │ │ │ ├── NodeInfo.java │ │ │ ├── NodeMapper.java │ │ │ ├── SQLTranslator.java │ │ │ ├── Node.java │ │ │ ├── TreeAdjustor.java │ │ │ ├── ParseTreeTest.java │ │ │ ├── WordNet.java │ │ │ ├── SyntacticEvaluator.java │ │ │ ├── SchemaGraph.java │ │ │ ├── TreeAdjustorTest.java │ │ │ └── ParseTree.java │ │ ├── ui │ │ │ └── UserView.java │ │ └── app │ │ │ └── Controller.java │ │ └── controller │ │ └── Controller.java └── test │ └── java │ └── com │ └── dukenlidb │ └── nlidb │ └── model │ └── UserSessionTest.java ├── doc ├── ref │ ├── 3.png │ ├── 7.png │ ├── node type.png │ └── queries.png ├── report │ ├── final │ │ ├── final.pdf │ │ ├── figures │ │ │ ├── translation.png │ │ │ ├── wordnet_tree.png │ │ │ ├── tree_adjustor2.png │ │ │ ├── tree_adjustor3.png │ │ │ ├── gui_nodes_mapping.png │ │ │ ├── gui_translation.png │ │ │ ├── program_structure.png │ │ │ ├── gui_tree_adjustor1.png │ │ │ ├── nodes_mapping_rules.png │ │ │ ├── nlidb_system_diagram.png │ │ │ └── nodes_mapping_example.png │ │ ├── compile.py │ │ └── nlidb.bib │ ├── midterm │ │ ├── midterm.pdf │ │ ├── template.pdf │ │ ├── figures │ │ │ ├── wordnet_tree.png │ │ │ ├── gui_nodes_mapping.png │ │ │ ├── program_structure.png │ │ │ ├── nlidb_system_diagram.pdf │ │ │ ├── nodes_mapping_rules.png │ │ │ └── nodes_mapping_example.png │ │ ├── compile.py │ │ ├── README.md │ │ ├── nlidb.bib │ │ └── midterm.tex │ └── .gitignore ├── edu.mit.jwi_2.4.0_manual.pdf ├── Verb Semantics and Lexical Selection.pdf └── Constructing an Interactive Natural Language Interface for Relational Databases.pdf ├── .dockerignore ├── client ├── public │ ├── favicon.ico │ ├── manifest.json │ └── index.html ├── src │ ├── utils │ │ ├── new-id.js │ │ └── registerServiceWorker.js │ ├── index.js │ ├── app │ │ ├── reducer.js │ │ ├── actions.js │ │ ├── index.js │ │ ├── components │ │ │ └── search-bar.js │ │ ├── sagas.js │ │ └── app.js │ ├── styles │ │ └── button.js │ ├── store.js │ ├── requests.js │ └── common │ │ └── form.js ├── .gitignore ├── .eslintrc.json └── package.json ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .gitignore ├── Dockerfile.test ├── Dockerfile ├── Jenkinsfile ├── gradlew.bat ├── README.md ├── gradlew └── LICENSE /src/main/resources/public/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/ref/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/3.png -------------------------------------------------------------------------------- /doc/ref/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/7.png -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | build/ 2 | client/build/ 3 | client/node_modules/ 4 | out/ 5 | .gradle/ 6 | -------------------------------------------------------------------------------- /doc/ref/node type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/node type.png -------------------------------------------------------------------------------- /doc/ref/queries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/ref/queries.png -------------------------------------------------------------------------------- /client/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/client/public/favicon.ico -------------------------------------------------------------------------------- /doc/report/final/final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/final.pdf -------------------------------------------------------------------------------- /doc/report/midterm/midterm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/midterm.pdf -------------------------------------------------------------------------------- /doc/report/midterm/template.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/template.pdf -------------------------------------------------------------------------------- /doc/edu.mit.jwi_2.4.0_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/edu.mit.jwi_2.4.0_manual.pdf -------------------------------------------------------------------------------- /doc/report/.gitignore: -------------------------------------------------------------------------------- 1 | auto/ 2 | midterm/README.md 3 | !*/*.pdf 4 | !*/*.py 5 | !*/*.bib 6 | !*/*.tex 7 | !*/*.md 8 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /src/main/resources/application-dev.properties: -------------------------------------------------------------------------------- 1 | server.address=localhost 2 | server.port=8080 3 | 4 | redis.host=localhost 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .DS_Store 3 | 4 | *.iml 5 | .idea/ 6 | 7 | # build files 8 | target/ 9 | build/ 10 | out/ 11 | .gradle/ -------------------------------------------------------------------------------- /doc/report/final/figures/translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/translation.png -------------------------------------------------------------------------------- /doc/report/final/figures/wordnet_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/wordnet_tree.png -------------------------------------------------------------------------------- /doc/report/final/figures/tree_adjustor2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/tree_adjustor2.png -------------------------------------------------------------------------------- /doc/report/final/figures/tree_adjustor3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/tree_adjustor3.png -------------------------------------------------------------------------------- /doc/report/midterm/figures/wordnet_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/wordnet_tree.png -------------------------------------------------------------------------------- /doc/Verb Semantics and Lexical Selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/Verb Semantics and Lexical Selection.pdf -------------------------------------------------------------------------------- /doc/report/final/figures/gui_nodes_mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_nodes_mapping.png -------------------------------------------------------------------------------- /doc/report/final/figures/gui_translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_translation.png -------------------------------------------------------------------------------- /doc/report/final/figures/program_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/program_structure.png -------------------------------------------------------------------------------- /doc/report/final/figures/gui_tree_adjustor1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/gui_tree_adjustor1.png -------------------------------------------------------------------------------- /doc/report/final/figures/nodes_mapping_rules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nodes_mapping_rules.png -------------------------------------------------------------------------------- /doc/report/midterm/figures/gui_nodes_mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/gui_nodes_mapping.png -------------------------------------------------------------------------------- /doc/report/midterm/figures/program_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/program_structure.png -------------------------------------------------------------------------------- /doc/report/final/figures/nlidb_system_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nlidb_system_diagram.png -------------------------------------------------------------------------------- /doc/report/final/figures/nodes_mapping_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/final/figures/nodes_mapping_example.png -------------------------------------------------------------------------------- /doc/report/midterm/figures/nlidb_system_diagram.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nlidb_system_diagram.pdf -------------------------------------------------------------------------------- /doc/report/midterm/figures/nodes_mapping_rules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nodes_mapping_rules.png -------------------------------------------------------------------------------- /client/src/utils/new-id.js: -------------------------------------------------------------------------------- 1 | let lastId = 0; 2 | 3 | export default function (prefix = 'id') { 4 | lastId += 1; 5 | return `${prefix}${lastId}`; 6 | } 7 | -------------------------------------------------------------------------------- /doc/report/midterm/figures/nodes_mapping_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/report/midterm/figures/nodes_mapping_example.png -------------------------------------------------------------------------------- /src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | server.address=0.0.0.0 2 | server.port=80 3 | 4 | redis.host=cache-nlidb.gr7mqq.0001.use1.cache.amazonaws.com 5 | redis.port=6379 6 | -------------------------------------------------------------------------------- /doc/Constructing an Interactive Natural Language Interface for Relational Databases.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DukeNLIDB/NLIDB/HEAD/doc/Constructing an Interactive Natural Language Interface for Relational Databases.pdf -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/request/ExecuteSQLRequest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.request; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class ExecuteSQLRequest { 7 | 8 | String query; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/request/TranslateNLRequest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.request; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class TranslateNLRequest { 7 | 8 | String input; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/response/MessageResponse.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.response; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class MessageResponse { 7 | 8 | String message; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/response/QueryResponse.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.response; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class QueryResponse { 7 | 8 | String queryResult; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/response/TranslateResponse.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.response; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class TranslateResponse { 7 | 8 | String translateResult; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/response/ConnectResponse.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.response; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class ConnectResponse { 7 | 8 | boolean success; 9 | String databaseUrl; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/response/StatusMessageResponse.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.response; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class StatusMessageResponse { 7 | 8 | boolean success; 9 | String message; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Sun Oct 08 21:21:59 EDT 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.5-rc-2-all.zip 7 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/request/ConnectDBRequest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model.request; 2 | 3 | import lombok.Value; 4 | 5 | @Value 6 | public class ConnectDBRequest { 7 | 8 | String host; 9 | String port; 10 | String database; 11 | String username; 12 | String password; 13 | 14 | } 15 | -------------------------------------------------------------------------------- /doc/report/final/compile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import subprocess, sys 4 | 5 | commands = [ 6 | ['pdflatex', sys.argv[1] + '.tex'], 7 | ['bibtex', sys.argv[1] + '.aux'], 8 | ['pdflatex', sys.argv[1] + '.tex'], 9 | ['pdflatex', sys.argv[1] + '.tex'] 10 | ] 11 | 12 | for c in commands: 13 | subprocess.call(c) 14 | -------------------------------------------------------------------------------- /doc/report/midterm/compile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import subprocess, sys 4 | 5 | commands = [ 6 | ['pdflatex', sys.argv[1] + '.tex'], 7 | ['bibtex', sys.argv[1] + '.aux'], 8 | ['pdflatex', sys.argv[1] + '.tex'], 9 | ['pdflatex', sys.argv[1] + '.tex'] 10 | ] 11 | 12 | for c in commands: 13 | subprocess.call(c) 14 | -------------------------------------------------------------------------------- /client/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "192x192", 8 | "type": "image/png" 9 | } 10 | ], 11 | "start_url": "./index.html", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /client/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | -------------------------------------------------------------------------------- /client/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import { Provider } from 'react-redux'; 4 | import store from './store'; 5 | import App from './app'; 6 | import registerServiceWorker from './utils/registerServiceWorker'; 7 | 8 | ReactDOM.render( 9 | ( 10 | 11 | 12 | 13 | ), 14 | document.getElementById('root'), 15 | ); 16 | registerServiceWorker(); 17 | -------------------------------------------------------------------------------- /doc/report/midterm/README.md: -------------------------------------------------------------------------------- 1 | ### How to render .pdf from .tex file: 2 | 3 | 1. Make sure you've installed latex and python. 4 | 2. Run in cmd/terminal: `python compile.py $(filename)`. $(filename) is the name of the .tex file you want to compile without .suffix. For example, here it is `python compile.py midterm`. 5 | 3. Windows and Unix file systems have different line breaks. I suggest editing the .tex file on a Unix system. 6 | 7 | #### To add more reference, add entry in `nlidb.bib` 8 | -------------------------------------------------------------------------------- /client/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "airbnb", 3 | "env": { 4 | "browser": true, 5 | "es6": true, 6 | "jest": true 7 | }, 8 | "rules": { 9 | "react/jsx-filename-extension": [ 10 | 1, { "extensions": [".js", ".jsx"] } 11 | ], 12 | "react/prop-types": [ 13 | 0, { } 14 | ], 15 | "react/no-array-index-key": [ 16 | 0, { } 17 | ], 18 | "padded-blocks": [ 19 | "error", 20 | { "blocks": "never" } 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/main/Application.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.main; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | import org.springframework.context.annotation.ComponentScan; 6 | 7 | @SpringBootApplication 8 | @ComponentScan("com.dukenlidb.nlidb") 9 | public class Application { 10 | 11 | public static void main(String[] args) { 12 | SpringApplication.run(Application.class, args); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /client/src/app/reducer.js: -------------------------------------------------------------------------------- 1 | import { fromJS } from 'immutable'; 2 | import * as actions from './actions'; 3 | 4 | const initialState = fromJS({ 5 | connected: false, 6 | connectErrorMsg: null, 7 | databaseUrl: null, 8 | translateResult: null, 9 | queryResult: null, 10 | }); 11 | 12 | const reducers = (state = initialState, action) => { 13 | 14 | switch (action.type) { 15 | case actions.SET_APP_STATE: 16 | return state.merge(action.payload); 17 | default: 18 | return state; 19 | } 20 | }; 21 | 22 | export default reducers; 23 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/service/DBConnectionService.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.service; 2 | 3 | import org.springframework.stereotype.Service; 4 | import com.dukenlidb.nlidb.model.DBConnectionConfig; 5 | 6 | import java.sql.Connection; 7 | import java.sql.DriverManager; 8 | import java.sql.SQLException; 9 | 10 | @Service 11 | public class DBConnectionService { 12 | 13 | public Connection getConnection(DBConnectionConfig config) throws SQLException { 14 | return DriverManager.getConnection(config.getUrl(), config.getProperties()); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /client/src/styles/button.js: -------------------------------------------------------------------------------- 1 | import { css } from 'styled-components'; 2 | 3 | const buttonStyle = css` 4 | background-color: #fff; 5 | border: none; 6 | cursor: pointer; 7 | padding: 6px 10px; 8 | text-align: center; 9 | display: inline-block; 10 | font-size: 14px; 11 | outline: none; 12 | box-shadow: 0px 0.5px 2px 1.5px #aaa; 13 | border: 1px solid #aaa; 14 | :active { 15 | box-shadow: none; 16 | border: 1px solid #777; 17 | } 18 | cursor: ${props => (props.disabled ? 'default' : 'pointer')}; 19 | ${props => (props.disabled ? 'opacity: 0.65;' : '')} 20 | `; 21 | 22 | export default buttonStyle; 23 | -------------------------------------------------------------------------------- /client/src/store.js: -------------------------------------------------------------------------------- 1 | import { createStore, combineReducers, applyMiddleware, compose } from 'redux'; 2 | import createSagaMiddleware from 'redux-saga'; 3 | import appReducer from './app/reducer'; 4 | import appSagas from './app/sagas'; 5 | 6 | const reducers = combineReducers({ 7 | app: appReducer, 8 | }); 9 | 10 | const sagaMiddleware = createSagaMiddleware(); 11 | 12 | /* eslint-disable no-underscore-dangle */ 13 | const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose; 14 | /* eslint-enable */ 15 | 16 | const store = createStore(reducers, composeEnhancers( 17 | applyMiddleware(sagaMiddleware), 18 | )); 19 | 20 | sagaMiddleware.run(appSagas); 21 | 22 | export default store; 23 | -------------------------------------------------------------------------------- /Dockerfile.test: -------------------------------------------------------------------------------- 1 | FROM node:8.6-alpine 2 | 3 | # directory automatically created 4 | WORKDIR /usr/nlidb 5 | 6 | # install dependencies packages first to utilize docker layer caching 7 | # COPY dest path could be relative to WORKDIR, or absolute. dest dir must end with / 8 | COPY client/package.json client/ 9 | RUN cd client && npm install 10 | 11 | # copy everything to filesystem of container 12 | COPY client client/ 13 | 14 | # build react bundle 15 | RUN cd client && npm run build 16 | 17 | 18 | FROM frolvlad/alpine-oraclejdk8:full 19 | WORKDIR /usr/nlidb 20 | COPY src src 21 | COPY gradle gradle 22 | COPY gradlew ./ 23 | COPY build.gradle ./ 24 | COPY --from=0 /usr/nlidb/client/build/ src/main/resources/public/ 25 | 26 | CMD ["./gradlew", "test"] 27 | -------------------------------------------------------------------------------- /client/src/requests.js: -------------------------------------------------------------------------------- 1 | const request = (url, payload) => 2 | fetch(url, { 3 | method: 'POST', 4 | headers: { 'Content-Type': 'application/json' }, 5 | credentials: 'include', 6 | body: JSON.stringify(payload), 7 | }); 8 | 9 | export const connectUserSession = () => 10 | request('/api/connect/user/'); 11 | 12 | export const disconnect = () => 13 | request('/api/disconnect'); 14 | 15 | export const connectToDB = ({ host, port, database, username, password }) => 16 | request('/api/connect/db', { host, port, database, username, password }); 17 | 18 | export const translateNL = ({ input }) => 19 | request('/api/translate/nl', { input }); 20 | 21 | export const executeSQL = ({ query }) => 22 | request('/api/execute/sql', { query }); 23 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/NLParser.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import edu.stanford.nlp.parser.nndep.DependencyParser; 4 | import edu.stanford.nlp.tagger.maxent.MaxentTagger; 5 | 6 | /** 7 | * Natural language parser, a wrapper of the Stanford NLP parser. 8 | * @author keping 9 | * 10 | */ 11 | public class NLParser { 12 | MaxentTagger tagger; 13 | DependencyParser parser; 14 | 15 | public NLParser() { 16 | String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; 17 | String modelPath = DependencyParser.DEFAULT_MODEL; 18 | tagger = new MaxentTagger(taggerPath); 19 | parser = DependencyParser.loadFromModelFile(modelPath); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:8.6-alpine 2 | 3 | # directory automatically created 4 | WORKDIR /usr/nlidb 5 | 6 | # install dependencies packages first to utilize docker layer caching 7 | # COPY dest path could be relative to WORKDIR, or absolute. dest dir must end with / 8 | COPY client/package.json client/ 9 | RUN cd client && npm install 10 | 11 | # copy everything to filesystem of container 12 | COPY client client/ 13 | 14 | # build react bundle 15 | RUN cd client && npm run build 16 | 17 | 18 | FROM frolvlad/alpine-oraclejdk8:full 19 | WORKDIR /usr/nlidb 20 | COPY src src 21 | COPY gradle gradle 22 | COPY gradlew ./ 23 | COPY build.gradle ./ 24 | COPY --from=0 /usr/nlidb/client/build/ src/main/resources/public/ 25 | 26 | EXPOSE 80 27 | CMD ["./gradlew", "-Dspring.profiles.active=prod", "bootRun"] 28 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/UserSession.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model; 2 | 3 | 4 | import com.fasterxml.jackson.annotation.JsonUnwrapped; 5 | import com.fasterxml.jackson.core.JsonProcessingException; 6 | import com.fasterxml.jackson.databind.ObjectMapper; 7 | import lombok.AllArgsConstructor; 8 | import lombok.Data; 9 | 10 | import java.io.IOException; 11 | 12 | @Data 13 | @AllArgsConstructor 14 | public class UserSession { 15 | 16 | @JsonUnwrapped 17 | private DBConnectionConfig dbConnectionConfig; 18 | 19 | public String serialize() throws JsonProcessingException { 20 | return new ObjectMapper().writeValueAsString(this); 21 | } 22 | 23 | public static UserSession deserialize(String str) throws IOException { 24 | return new ObjectMapper().readValue(str, UserSession.class); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nlidb-client", 3 | "version": "0.1.0", 4 | "private": true, 5 | "proxy": "http://localhost:8080", 6 | "dependencies": { 7 | "immutable": "^3.8.2", 8 | "react": "^16.0.0", 9 | "react-dom": "^16.0.0", 10 | "react-redux": "^5.0.6", 11 | "react-scripts": "1.0.14", 12 | "redux": "^3.7.2", 13 | "redux-saga": "^0.15.6", 14 | "styled-components": "^2.2.1" 15 | }, 16 | "scripts": { 17 | "start": "react-scripts start", 18 | "build": "react-scripts build", 19 | "test": "react-scripts test --env=jsdom", 20 | "eject": "react-scripts eject" 21 | }, 22 | "devDependencies": { 23 | "eslint": "^4.8.0", 24 | "eslint-config-airbnb": "^15.1.0", 25 | "eslint-plugin-import": "^2.7.0", 26 | "eslint-plugin-jsx-a11y": "^5.1.1", 27 | "eslint-plugin-react": "^7.4.0" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/model/DBConnectionConfig.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model; 2 | 3 | 4 | import com.fasterxml.jackson.annotation.JsonIgnore; 5 | import lombok.Builder; 6 | import lombok.Value; 7 | 8 | import java.util.Properties; 9 | 10 | @Builder 11 | @Value 12 | public class DBConnectionConfig { 13 | 14 | String host; 15 | String port; 16 | String database; 17 | String username; 18 | String password; 19 | 20 | @JsonIgnore 21 | public String getUrl() { 22 | return "jdbc:postgresql://" 23 | + host + ":" + port 24 | + "/" + database; 25 | } 26 | 27 | @JsonIgnore 28 | public Properties getProperties() { 29 | Properties props = new Properties(); 30 | props.setProperty("user", username); 31 | props.setProperty("password", password); 32 | return props; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/test/java/com/dukenlidb/nlidb/model/UserSessionTest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.model; 2 | 3 | import org.junit.Before; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class UserSessionTest { 9 | 10 | private UserSession session; 11 | 12 | @Before 13 | public void init() { 14 | DBConnectionConfig config = DBConnectionConfig 15 | .builder() 16 | .host("hostname") 17 | .port("portnum") 18 | .database("db") 19 | .username("username1") 20 | .password("passwd") 21 | .build(); 22 | session = new UserSession(config); 23 | } 24 | 25 | @Test 26 | public void serialize() throws Exception { 27 | session.serialize(); 28 | } 29 | 30 | @Test 31 | public void deserialize() throws Exception { 32 | assertEquals(session, UserSession.deserialize(session.serialize())); 33 | } 34 | 35 | } -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/service/CookieService.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.service; 2 | 3 | import org.springframework.stereotype.Service; 4 | 5 | import javax.servlet.http.Cookie; 6 | import javax.servlet.http.HttpServletResponse; 7 | 8 | @Service 9 | public class CookieService { 10 | 11 | public static final String COOKIE_NAME = "nlidbUser"; 12 | public static final String USER_NONE = "none"; 13 | 14 | public void setUserIdCookie(HttpServletResponse res, String userId) { 15 | Cookie cookie = new Cookie(COOKIE_NAME, userId); 16 | cookie.setHttpOnly(true); 17 | cookie.setMaxAge(3600 * 24); 18 | cookie.setPath("/"); 19 | res.addCookie(cookie); 20 | } 21 | 22 | public void expireUserIdCookie(HttpServletResponse res, String userId) { 23 | Cookie cookie = new Cookie(COOKIE_NAME, userId); 24 | cookie.setHttpOnly(true); 25 | cookie.setMaxAge(0); 26 | cookie.setPath("/"); 27 | res.addCookie(cookie); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /client/src/app/actions.js: -------------------------------------------------------------------------------- 1 | 2 | export const CONNECT_USER_SESSION = 'app/connect-user-session'; 3 | export const connectUserSession = () => ({ 4 | type: CONNECT_USER_SESSION, 5 | }); 6 | 7 | export const DISCONNECT = 'app/disconnect'; 8 | export const disconnect = () => ({ 9 | type: DISCONNECT, 10 | }); 11 | 12 | export const CONNECT_TO_DB = 'app/connect-to-db'; 13 | export const connectToDB = payload => ({ 14 | type: CONNECT_TO_DB, payload, 15 | }); 16 | 17 | export const CONNECT_TO_DEMODB = 'app/connect-to-demodb'; 18 | export const connectToDemoDB = () => ({ 19 | type: CONNECT_TO_DEMODB, 20 | }); 21 | 22 | export const TRANSLATE_NL = 'app/translateNL-nl'; 23 | export const translateNL = payload => ({ 24 | type: TRANSLATE_NL, payload, 25 | }); 26 | 27 | export const EXECUTE_SQL = 'app/execute-sql'; 28 | export const executeSQL = payload => ({ 29 | type: EXECUTE_SQL, payload, 30 | }); 31 | 32 | export const SET_APP_STATE = 'app/set-app-state'; 33 | export const setAppState = payload => ({ 34 | type: SET_APP_STATE, payload, 35 | }); 36 | -------------------------------------------------------------------------------- /client/src/app/index.js: -------------------------------------------------------------------------------- 1 | import { connect } from 'react-redux'; 2 | import * as actions from './actions'; 3 | import App from './app'; 4 | 5 | const mapStateToProps = state => ({ 6 | connected: state.app.get('connected'), 7 | connectErrorMsg: state.app.get('connectErrorMsg'), 8 | databaseUrl: state.app.get('databaseUrl'), 9 | translateResult: state.app.get('translateResult'), 10 | queryResult: state.app.get('queryResult'), 11 | }); 12 | 13 | const mapDispatchToProps = dispatch => ({ 14 | connectUserSession: () => { 15 | dispatch(actions.connectUserSession()); 16 | }, 17 | disconnect: () => { 18 | dispatch(actions.disconnect()); 19 | }, 20 | connectToDB: (payload) => { 21 | dispatch(actions.connectToDB(payload)); 22 | }, 23 | connectToDemoDB: () => { 24 | dispatch(actions.connectToDemoDB()); 25 | }, 26 | translateNL: (payload) => { 27 | dispatch(actions.translateNL(payload)); 28 | }, 29 | executeSQL: (payload) => { 30 | dispatch(actions.executeSQL(payload)); 31 | }, 32 | }); 33 | 34 | export default connect(mapStateToProps, mapDispatchToProps)(App); 35 | -------------------------------------------------------------------------------- /client/src/app/components/search-bar.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import styled from 'styled-components'; 3 | import buttonStyle from '../../styles/button'; 4 | 5 | const Wrapper = styled.form` 6 | margin: 20px auto; 7 | padding: 20px; 8 | display: flex; 9 | justify-content: center; 10 | align-items: center; 11 | `; 12 | 13 | const LabelText = styled.div` 14 | display: inline-block; 15 | line-height: 31xpx; 16 | `; 17 | 18 | const Input = styled.input` 19 | height: 25px; 20 | width: 600px; 21 | margin: 0 5px; 22 | font-size: 16px; 23 | line-height: 25px; 24 | `; 25 | 26 | const SubmitButton = styled.input` 27 | ${() => buttonStyle} 28 | `; 29 | 30 | class SearchBar extends Component { 31 | constructor(props) { 32 | super(props); 33 | this.state = { 34 | input: '', 35 | }; 36 | } 37 | 38 | handleChange(event) { 39 | this.setState({ input: event.target.value }); 40 | } 41 | 42 | handleSubmit(event) { 43 | const { state: { input }, props: { submit } } = this; 44 | event.preventDefault(); 45 | submit(input); 46 | } 47 | 48 | render() { 49 | const { title, buttonTitle } = this.props; 50 | 51 | return ( 52 | this.handleSubmit(e)}> 53 | {title} 54 | this.handleChange(event)} /> 55 | 56 | 57 | ); 58 | } 59 | } 60 | 61 | export default SearchBar; 62 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/service/RedisService.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.service; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.dukenlidb.nlidb.model.UserSession; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.beans.factory.annotation.Value; 7 | import org.springframework.stereotype.Service; 8 | import redis.clients.jedis.Jedis; 9 | 10 | import java.io.IOException; 11 | 12 | @Service 13 | public class RedisService { 14 | 15 | private Jedis jedis; 16 | 17 | @Autowired 18 | public RedisService( 19 | @Value("${redis.host}") String host, 20 | @Value("${redis.port}") int port 21 | ) { 22 | jedis = new Jedis(host, port); 23 | } 24 | 25 | public boolean hasUser(String userId) { 26 | return jedis.exists(userId); 27 | } 28 | 29 | public void removeUser(String userId) { 30 | jedis.del(userId); 31 | } 32 | 33 | public void refreshUser(String userId) { 34 | jedis.expire(userId, 3600 * 24); 35 | } 36 | 37 | public UserSession getUserSession(String userId) 38 | throws IOException { 39 | String sessionStr = jedis.get(userId); 40 | return UserSession.deserialize(sessionStr); 41 | } 42 | 43 | public void setUserSession(String userId, UserSession session) 44 | throws JsonProcessingException { 45 | jedis.set(userId, session.serialize()); 46 | jedis.expire(userId, 3600 * 24); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent any 3 | stages { 4 | stage('Test') { 5 | steps { 6 | sh '''#!/bin/bash 7 | docker build -t nlidb/test --file=Dockerfile.test ${WORKSPACE} 8 | docker run nlidb/test 9 | docker rm $(docker ps -aq --filter status=exited) 10 | docker rmi $(docker images -aq --filter dangling=true) 11 | ''' 12 | } 13 | } 14 | stage('Deploy') { 15 | when { 16 | branch 'master' 17 | } 18 | steps { 19 | sh '''#!/bin/bash 20 | docker build -t nlidb/main --file=Dockerfile ${WORKSPACE} 21 | docker save -o /tmp/nlidb-main.img nlidb/main 22 | echo scping the image file... 23 | scp -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem /tmp/nlidb-main.img centos@34.231.141.223:/home/centos/ 24 | echo stopping and removing the previously running nlidb container 25 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker stop $(docker ps -aq --filter ancestor=nlidb/main)' 26 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker rm $(docker ps -aq --filter ancestor=nlidb/main)' 27 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker rmi $(docker images -aq --filter dangling=true)' 28 | echo loading the new image and start the container 29 | ssh -o "StrictHostKeyChecking no" -i $HOME/.ssh/aws-keping94-us-east1.pem centos@34.231.141.223 'docker load -i nlidb-main.img; docker run -d -p 8080:80 nlidb/main;' 30 | ''' 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /client/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 11 | 12 | 13 | 22 | React App 23 | 24 | 25 | 28 |
29 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/WordSimilarity.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | 6 | /** 7 | * A class with only static methods to help calculate similarity between two words. 8 | * @author keping 9 | * 10 | */ 11 | public final class WordSimilarity { 12 | private WordSimilarity() { } 13 | 14 | /** 15 | * WordNet WUP similarity. 16 | * @param word1 17 | * @param word2 18 | * @return 19 | */ 20 | private static double semanticalSimilarity(String word1, String word2, WordNet wordNet) { 21 | return wordNet.similarity(word1, word2); 22 | } 23 | 24 | /** 25 | * Jaccord Coefficient 26 | * @param word1 27 | * @param word2 28 | * @return 29 | */ 30 | private static double lexicalSimilarity(String word1, String word2) { 31 | Set charSet1 = new HashSet<>(); 32 | Set charSet2 = new HashSet<>(); 33 | Set commonSet= new HashSet<>(); 34 | for (char c : word1.toCharArray()) { charSet1.add(c); } 35 | for (char c : word2.toCharArray()) { charSet2.add(c); } 36 | for (char c : charSet1) { 37 | if (charSet2.contains(c)) { commonSet.add(c); } 38 | } 39 | double jaccord = commonSet.size() / (double) (charSet1.size() + 40 | charSet2.size() + commonSet.size()); 41 | return Math.sqrt(jaccord); 42 | } 43 | 44 | /** 45 | * The similarity score between two words. This score is a combination of 46 | * semantic similarity and lexical similarity. 47 | * @param word1 48 | * @param word2 49 | * @return similarity score between word1 and word2 50 | */ 51 | public static double getSimilarity(String word1, String word2, WordNet wordNet) { 52 | return Math.max(semanticalSimilarity(word1, word2, wordNet), 53 | lexicalSimilarity(word1, word2)); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/service/SQLExecutionService.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.service; 2 | 3 | import com.dukenlidb.nlidb.model.DBConnectionConfig; 4 | import org.postgresql.util.PSQLException; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.sql.*; 9 | 10 | @Service 11 | public class SQLExecutionService { 12 | 13 | private DBConnectionService dbConnectionService; 14 | 15 | @Autowired 16 | public SQLExecutionService(DBConnectionService dbConnectionService) { 17 | this.dbConnectionService = dbConnectionService; 18 | } 19 | 20 | public String executeSQL(DBConnectionConfig config, String query) 21 | throws SQLException { 22 | try { 23 | Connection conn = dbConnectionService.getConnection(config); 24 | Statement stmt = conn.createStatement(); 25 | ResultSet rs = stmt.executeQuery(query); 26 | ResultSetMetaData rsmd = rs.getMetaData(); 27 | int numCols = rsmd.getColumnCount(); 28 | StringBuilder sb = new StringBuilder(); 29 | 30 | // SQL column index start from 1 31 | for (int col = 1; col <= numCols; col++) { 32 | sb.append(rsmd.getColumnName(col)).append("\t"); 33 | } 34 | sb.append("\n"); 35 | 36 | while (rs.next()) { 37 | for (int col = 1; col <= numCols; col++) { 38 | sb.append(rs.getString(col)).append("\t"); 39 | } 40 | sb.append("\n"); 41 | } 42 | 43 | rs.close(); 44 | stmt.close(); 45 | conn.close(); 46 | 47 | return sb.toString(); 48 | } catch (PSQLException e) { 49 | return e.getMessage(); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/QueryTree.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.PriorityQueue; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.ArrayList; 7 | 8 | public class QueryTree { 9 | List results; 10 | 11 | public QueryTree() {} 12 | 13 | public QueryTree (ParseTree T){ 14 | results = new ArrayList(); 15 | PriorityQueue Q = new PriorityQueue(); 16 | Q.add(T); 17 | HashMap H = new HashMap(); 18 | H.put(hashing(T), T); 19 | T.setEdit(0); 20 | 21 | while (Q.size() > 0){ 22 | ParseTree oriTree = Q.poll(); 23 | List treeList = adjuster(oriTree); 24 | double treeScore = evaluate(oriTree); 25 | 26 | for (int i = 0; i < treeList.size(); i++){ 27 | ParseTree currentTree = treeList.get(i); 28 | int hashValue = hashing(currentTree); 29 | if (oriTree.getEdit()<10 && !H.containsKey(hashValue)){ 30 | H.put(hashValue, currentTree); 31 | currentTree.setEdit(oriTree.getEdit()+1); 32 | if (evaluate(currentTree) >= treeScore){ 33 | Q.add(currentTree); 34 | results.add(currentTree); 35 | } 36 | } 37 | } 38 | } 39 | } 40 | 41 | public List adjuster (ParseTree T){ 42 | List treeList = new ArrayList(); 43 | 44 | //TODO: generate all possible parse trees in one subtree move operation 45 | 46 | return treeList; 47 | } 48 | 49 | public double evaluate (ParseTree T){ 50 | double score = 0; 51 | 52 | //TODO: generate the evaluation criteria 53 | return score; 54 | } 55 | 56 | public int hashing (ParseTree T){ 57 | int hashValue = 0; 58 | 59 | //TODO: how to get a reasonable hash value for each parse tree (with different node orders) 60 | return hashValue; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /doc/report/final/nlidb.bib: -------------------------------------------------------------------------------- 1 | @book{cowbook, 2 | author = {Ramakrishnan, Raghu and Gehrke, Johannes}, 3 | title = {Database Management Systems}, 4 | year = {2003}, 5 | isbn = {0072465638, 9780072465631}, 6 | edition = {3}, 7 | publisher = {McGraw-Hill, Inc.}, 8 | address = {New York, NY, USA} 9 | } 10 | 11 | @article{li2014, 12 | title={Constructing an interactive natural language interface for relational databases}, 13 | author={Li, Fei and Jagadish, HV}, 14 | journal={Proceedings of the VLDB Endowment}, 15 | volume={8}, 16 | number={1}, 17 | pages={73--84}, 18 | year={2014}, 19 | publisher={VLDB Endowment} 20 | } 21 | 22 | @misc{QATutorial, 23 | title = {Question and Answer Tutorial}, 24 | howpublished = {\url{https://github.com/scottyih/Slides/blob/master/QA\%20Tutorial.pdf}}, 25 | note = {Accessed: 25-09-2016} 26 | } 27 | 28 | @inproceedings{wu1994verbs, 29 | title={Verbs semantics and lexical selection}, 30 | author={Wu, Zhibiao and Palmer, Martha}, 31 | booktitle={Proceedings of the 32nd annual meeting on Association for Computational Linguistics}, 32 | pages={133--138}, 33 | year={1994}, 34 | organization={Association for Computational Linguistics} 35 | } 36 | 37 | @inproceedings{chen2014fast, 38 | title={A Fast and Accurate Dependency Parser using Neural Networks.}, 39 | author={Chen, Danqi and Manning, Christopher D}, 40 | booktitle={EMNLP}, 41 | pages={740--750}, 42 | year={2014} 43 | } 44 | 45 | @inproceedings{toutanova2003feature, 46 | title={Feature-rich part-of-speech tagging with a cyclic dependency network}, 47 | author={Toutanova, Kristina and Klein, Dan and Manning, Christopher D and Singer, Yoram}, 48 | booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1}, 49 | pages={173--180}, 50 | year={2003}, 51 | organization={Association for Computational Linguistics} 52 | } -------------------------------------------------------------------------------- /doc/report/midterm/nlidb.bib: -------------------------------------------------------------------------------- 1 | @book{cowbook, 2 | author = {Ramakrishnan, Raghu and Gehrke, Johannes}, 3 | title = {Database Management Systems}, 4 | year = {2003}, 5 | isbn = {0072465638, 9780072465631}, 6 | edition = {3}, 7 | publisher = {McGraw-Hill, Inc.}, 8 | address = {New York, NY, USA} 9 | } 10 | 11 | @article{li2014, 12 | title={Constructing an interactive natural language interface for relational databases}, 13 | author={Li, Fei and Jagadish, HV}, 14 | journal={Proceedings of the VLDB Endowment}, 15 | volume={8}, 16 | number={1}, 17 | pages={73--84}, 18 | year={2014}, 19 | publisher={VLDB Endowment} 20 | } 21 | 22 | @misc{QATutorial, 23 | title = {Question and Answer Tutorial}, 24 | howpublished = {\url{https://github.com/scottyih/Slides/blob/master/QA\%20Tutorial.pdf}}, 25 | note = {Accessed: 25-09-2016} 26 | } 27 | 28 | @inproceedings{wu1994verbs, 29 | title={Verbs semantics and lexical selection}, 30 | author={Wu, Zhibiao and Palmer, Martha}, 31 | booktitle={Proceedings of the 32nd annual meeting on Association for Computational Linguistics}, 32 | pages={133--138}, 33 | year={1994}, 34 | organization={Association for Computational Linguistics} 35 | } 36 | 37 | @inproceedings{chen2014fast, 38 | title={A Fast and Accurate Dependency Parser using Neural Networks.}, 39 | author={Chen, Danqi and Manning, Christopher D}, 40 | booktitle={EMNLP}, 41 | pages={740--750}, 42 | year={2014} 43 | } 44 | 45 | @inproceedings{toutanova2003feature, 46 | title={Feature-rich part-of-speech tagging with a cyclic dependency network}, 47 | author={Toutanova, Kristina and Klein, Dan and Manning, Christopher D and Singer, Yoram}, 48 | booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1}, 49 | pages={173--180}, 50 | year={2003}, 51 | organization={Association for Computational Linguistics} 52 | } -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/ParserDemo.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.io.StringReader; 4 | import java.util.List; 5 | 6 | import edu.stanford.nlp.ling.HasWord; 7 | import edu.stanford.nlp.ling.TaggedWord; 8 | import edu.stanford.nlp.parser.nndep.DependencyParser; 9 | import edu.stanford.nlp.process.DocumentPreprocessor; 10 | import edu.stanford.nlp.tagger.maxent.MaxentTagger; 11 | import edu.stanford.nlp.trees.GrammaticalStructure; 12 | import edu.stanford.nlp.util.logging.Redwood; 13 | 14 | public class ParserDemo { 15 | 16 | /** A logger for this class */ 17 | private static Redwood.RedwoodChannels log = Redwood.channels(ParserDemo.class); 18 | 19 | public static void main(String[] args) { 20 | String modelPath = DependencyParser.DEFAULT_MODEL; 21 | String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; 22 | 23 | for (int argIndex = 0; argIndex < args.length;) { 24 | switch (args[argIndex]) { 25 | case "-tagger": 26 | taggerPath = args[argIndex + 1]; 27 | argIndex += 2; 28 | break; 29 | case "-com.dukenlidb.nlidb.model": 30 | modelPath = args[argIndex + 1]; 31 | argIndex += 2; 32 | break; 33 | default: 34 | throw new RuntimeException("Unknown argument " + args[argIndex]); 35 | } 36 | } 37 | 38 | String text = "Return authors who have more papers than Bob in VLDB after 2000"; 39 | 40 | MaxentTagger tagger = new MaxentTagger(taggerPath); 41 | DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); 42 | 43 | DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); 44 | for (List sentence : tokenizer) { 45 | List tagged = tagger.tagSentence(sentence); 46 | GrammaticalStructure gs = parser.predict(tagged); 47 | 48 | // Print typed dependencies 49 | log.info(gs); 50 | } 51 | 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/ImplicitNodeTest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | public class ImplicitNodeTest { 4 | 5 | public void test() { 6 | 7 | ParseTree tree = new ParseTree(); 8 | Node[] nodes = new Node[20]; 9 | 10 | nodes[0] = new Node(0, "ROOT", "ROOT"); 11 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 12 | 13 | nodes[1] = new Node(1, "return", "--"); // posTag not useful 14 | nodes[1].info = new NodeInfo("SN", "SELECT"); 15 | 16 | nodes[2] = new Node(2, "author", "--"); 17 | nodes[2].info = new NodeInfo("NN", "authorship.author"); 18 | 19 | nodes[3] = new Node(3, "more", "--"); 20 | nodes[3].info = new NodeInfo("ON", ">"); 21 | 22 | nodes[4] = new Node(4, "paper", "--"); 23 | nodes[4].info = new NodeInfo("NN", "in.pubkey"); 24 | 25 | nodes[5] = new Node(5, "VLDB", "--"); 26 | nodes[5].info = new NodeInfo("VN", "in.area"); 27 | 28 | nodes[6] = new Node(6, "after", "--"); 29 | nodes[6].info = new NodeInfo("ON", ">"); 30 | 31 | nodes[7] = new Node(7, "2000", "--"); 32 | nodes[7].info = new NodeInfo("VN", "in.year"); 33 | 34 | nodes[8] = new Node(8, "Bob", "--"); 35 | nodes[8].info = new NodeInfo("VN", "authorship.author"); 36 | 37 | tree.root = nodes[0]; 38 | tree.root.setChild(nodes[1]); 39 | tree.root.setChild(nodes[3]); 40 | nodes[1].setParent(nodes[0]); 41 | nodes[3].setParent(nodes[0]); 42 | 43 | nodes[1].setChild(nodes[2]); 44 | nodes[2].setParent(nodes[1]); 45 | 46 | nodes[3].setChild(nodes[4]); 47 | nodes[3].setChild(nodes[8]); 48 | nodes[4].setParent(nodes[3]); 49 | nodes[8].setParent(nodes[3]); 50 | 51 | nodes[4].setChild(nodes[5]); 52 | nodes[4].setChild(nodes[6]); 53 | nodes[5].setParent(nodes[4]); 54 | nodes[6].setParent(nodes[4]); 55 | 56 | nodes[6].setChild(nodes[7]); 57 | nodes[7].setParent(nodes[6]); 58 | 59 | System.out.println("Before"); 60 | System.out.println(tree.toString()); 61 | 62 | tree.insertImplicitNodes(); 63 | 64 | System.out.println("After"); 65 | System.out.println(tree.toString()); 66 | 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/IParseTree.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * An interface for a parse tree. 7 | * @author keping 8 | * 9 | */ 10 | public interface IParseTree extends Iterable { 11 | 12 | /** 13 | * Get size of the ParseTree, including the root Node. 14 | * @return number of nodes 15 | */ 16 | public int size(); 17 | 18 | /** 19 | * Return the number of edit of the ParseTree. 20 | */ 21 | public int getEdit(); 22 | 23 | /** 24 | * Set the number of edit of the ParseTree. 25 | */ 26 | public void setEdit(int edit); 27 | 28 | /** 29 | * Restructure the parse tree by removing meaningless nodes. 30 | * The Node object should contain information indicating whether 31 | * it is meaningful. It is meaningful if it corresponds to an SQL component. 32 | */ 33 | public void removeMeaninglessNodes(); 34 | 35 | /** 36 | * Restructure the parse tree by merging Logic Nodes and Quantifier Nodes with their parents. 37 | */ 38 | public ParseTree mergeLNQN(); 39 | 40 | /** 41 | * Insert implicit nodes, mostly about the symmetry for comparison. 42 | */ 43 | public void insertImplicitNodes(); 44 | 45 | /** 46 | * Get a list of structurally adjusted parse trees. 47 | * @return a list of adjusted trees 48 | */ 49 | public List getAdjustedTrees(); 50 | 51 | /** 52 | * Translate the parse tree into an SQL translateNL. 53 | * @return 54 | */ 55 | public SQLQuery translateToSQL(SchemaGraph schema); 56 | 57 | /** 58 | * Convert the tree to a String for easier debugging. 59 | * @return string representation 60 | */ 61 | public String toString(); 62 | 63 | /** 64 | * Check equality for two IParseTree objects, for searching 65 | * them in a HashMap. 66 | * @param other 67 | * @return true if they are equal 68 | */ 69 | public boolean equals(Object obj); 70 | 71 | /** 72 | * Get the hashCode for the parse tree. So that trees can be 73 | * stored in a HashMap and equal trees can be seen as one. 74 | * @return hashCode for the object 75 | */ 76 | public int hashCode(); 77 | } 78 | -------------------------------------------------------------------------------- /client/src/common/form.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import styled from 'styled-components'; 3 | import buttonStyle from '../styles/button'; 4 | import newId from '../utils/new-id'; 5 | 6 | const Wrapper = styled.form` 7 | margin: 20px; 8 | width: 230px; 9 | `; 10 | 11 | const FormItem = styled.div` 12 | display: flex; 13 | justify-content: flex-end; 14 | margin-top: 20px; 15 | `; 16 | const SubmitFormItem = FormItem.extend` 17 | justify-content: center; 18 | margin-top: 30px; 19 | `; 20 | 21 | const LabelText = styled.label` 22 | padding-right: 5px; 23 | `; 24 | 25 | const SubmitInput = styled.input` 26 | ${() => buttonStyle} 27 | `; 28 | 29 | const Message = styled.div` 30 | text-align: center; 31 | `; 32 | 33 | class Form extends Component { 34 | 35 | constructor(props) { 36 | super(props); 37 | const { fields } = this.props; 38 | this.state = fields.reduce((obj, field) => ( 39 | Object.assign(obj, { [field.name]: field.initialValue || '' }) 40 | ), {}); 41 | } 42 | 43 | handleSubmit(event) { 44 | event.preventDefault(); 45 | this.props.button.submit(this.state); 46 | } 47 | 48 | render() { 49 | const { fields, message, button } = this.props; 50 | const ids = fields.map(() => newId()); 51 | return ( 52 | this.handleSubmit(e)}> 53 | {message ? {message} : null} 54 | {fields.map((field, idx) => ( 55 | 56 | 69 | 70 | ))} 71 | 72 | 73 | 74 | 75 | ); 76 | } 77 | 78 | } 79 | 80 | export default Form; 81 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /client/src/app/sagas.js: -------------------------------------------------------------------------------- 1 | import { call, put, takeLatest } from 'redux-saga/effects'; 2 | import * as requests from '../requests'; 3 | import * as actions from './actions'; 4 | 5 | function* handleConnectUserSession() { 6 | const res = yield call(requests.connectUserSession); 7 | const { success, databaseUrl } = yield res.json(); 8 | if (success) { 9 | yield put(actions.setAppState({ 10 | connected: true, 11 | databaseUrl, 12 | })); 13 | } 14 | } 15 | 16 | function* handleDisconnect() { 17 | const res = yield call(requests.disconnect); 18 | if (res.status === 200) { 19 | yield put(actions.setAppState({ 20 | connected: false, 21 | connectErrorMsg: null, 22 | databaseUrl: null, 23 | translateResult: null, 24 | queryResult: null, 25 | })); 26 | } 27 | } 28 | 29 | function* handleConnectToDB(action) { 30 | const res = yield call(requests.connectToDB, action.payload); 31 | const body = yield res.json(); 32 | if (res.status === 200) { 33 | yield put(actions.setAppState({ 34 | connected: true, 35 | connectErrorMsg: null, 36 | databaseUrl: body.databaseUrl, 37 | })); 38 | } else { 39 | yield put(actions.setAppState({ 40 | connectErrorMsg: body.message, 41 | })); 42 | } 43 | } 44 | 45 | function* handleConnectToDemoDB() { 46 | const res = yield call(requests.connectToDB, { 47 | host: 'db-nlidb.cjna4ta2n7it.us-east-1.rds.amazonaws.com', 48 | port: 5432, 49 | database: 'demodb', 50 | username: 'keping', 51 | password: 'kepingwang' 52 | }); 53 | const body = yield res.json(); 54 | if (res.status === 200) { 55 | yield put(actions.setAppState({ 56 | connected: true, 57 | connectErrorMsg: null, 58 | databaseUrl: body.databaseUrl, 59 | })); 60 | } else { 61 | yield put(actions.setAppState({ 62 | connectErrorMsg: body.message, 63 | })); 64 | } 65 | } 66 | 67 | function* handleTranslateNL(action) { 68 | const res = yield call(requests.translateNL, action.payload); 69 | const body = yield res.json(); 70 | yield put(actions.setAppState({ 71 | translateResult: body.translateResult, 72 | })); 73 | } 74 | 75 | function* handleExecuteSQL(action) { 76 | const res = yield call(requests.executeSQL, action.payload); 77 | const body = yield res.json(); 78 | yield put(actions.setAppState({ 79 | queryResult: body.queryResult, 80 | })); 81 | } 82 | 83 | function* sagas() { 84 | yield takeLatest(actions.CONNECT_USER_SESSION, handleConnectUserSession); 85 | yield takeLatest(actions.DISCONNECT, handleDisconnect); 86 | yield takeLatest(actions.CONNECT_TO_DB, handleConnectToDB); 87 | yield takeLatest(actions.CONNECT_TO_DEMODB, handleConnectToDemoDB); 88 | yield takeLatest(actions.TRANSLATE_NL, handleTranslateNL); 89 | yield takeLatest(actions.EXECUTE_SQL, handleExecuteSQL); 90 | } 91 | 92 | export default sagas; 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Natural Language Interface to DataBases (NLIDB) 2 | 3 | ### [The final report is here](./doc/report/final/final.pdf). 4 | 5 | ### How it works. 6 | 1. Parse the input and map nodes to SQL components and database attributes. 7 | ![nodes_mapping_example](./doc/report/final/figures/nodes_mapping_example.png) 8 | ![gui_nodes_mapping](./doc/report/final/figures/gui_nodes_mapping.png) 9 | 10 | 2. Adjust the structure of the parse tree to make it syntactically valid. 11 | ![gui_tree_adjustor](./doc/report/final/figures/gui_tree_adjustor1.png) 12 | 13 | 3. Translate the parse tree to an SQL query. 14 | ![gui_translation](./doc/report/final/figures/gui_translation.png) 15 | 16 | ****** 17 | 18 | change some thing to set CI 19 | 20 | ### Grammar rules of syntactically valid parse trees: 21 | 22 | 1. Q -> (SClause)(ComplexCondition)\* 23 | 2. SClause -> SELECT + GNP 24 | 3. ComplexCondition -> ON + (leftSubtree\*rightSubtree) 25 | 4. leftSubtree -> GNP 26 | 5. rightSubtree -> GNP | VN | MIN | MAX 27 | 6. GNP -> (FN + GNP) | NP 28 | 7. NP -> NN + (NN)\*(condition)\* 29 | 8. condition -> VN | (ON + VN) 30 | 31 | Note: 32 | All terminal nodes are defined in the paper. 33 | \+ represents a parent-child relationship. 34 | \* represents a sibling relationship. 35 | One Query (Q) can must have one SClause and zero or more ComplexConditions. 36 | A ComplexCondition must have one ON, with a leftSubtree and a rightSubtree. 37 | An NP is: one NN (since an SQL query has to select at least one attribute), whose children 38 | are multiple NNs and Conditions. (All other selected attributes and conditions are stacked 39 | here to form a wide "NP" tree.) 40 | 41 | ***** 42 | 43 | ### For developers: 44 | 45 | This is a project managed using maven. Just in case, if you don't know about maven, checkout this wonderful [tutorial](https://www.udemy.com/apachemaven/), which you have to pay for though... 46 | 47 | Right now it uses the dblp database on local machine. To connect to the database, make sure you have database "dblp" on your localhost with post 5432, accessible to user "dblpuser" with password "dblpuser". Or modify the `startConnection()` method in class `Controller` to connect to database. 48 | 49 | To get hands on the development, import it into eclipse, but first make sure you've installed the following eclipse plugins: 50 | 51 | 1. m2eclipse (for using maven in eclipse) 52 | 2. e(fx)clipse (for using javafx smoothly in eclipse) 53 | 54 | To use WordNet inside the project (I'm using MIT JWI as the interface, which is already included in maven `pom.xml`): 55 | 56 | 1. Create a folder "lib" in the project base directory. 57 | 2. Download [WordNet](https://wordnet.princeton.edu/wordnet/download/) into that "lib" directory just created. 58 | 3. Extract the downloaded WordNet. 59 | 4. Finally just make sure "$(basedir)/lib/WordNet-3.0/dict/" exists. (Or you have to modify the path inside class `com.dukenlidb.nlidb.model.WordNet`.) 60 | 61 | The entry point of the application is the `com.dukenlidb.nlidb.main()` method in `UserView` class. 62 | -------------------------------------------------------------------------------- /client/src/app/app.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import styled from 'styled-components'; 3 | import Form from '../common/form'; 4 | import SearchBar from './components/search-bar'; 5 | import buttonStyle from '../styles/button'; 6 | 7 | const Wrapper = styled.div` 8 | max-width: 1200px; 9 | margin: auto; 10 | `; 11 | 12 | const Title = styled.h1` 13 | text-align: center; 14 | `; 15 | 16 | const ResultText = styled.div` 17 | margin: 0 auto; 18 | width: 800px; 19 | `; 20 | 21 | const Text = styled.div` 22 | text-align: center; 23 | margin: 0 5px; 24 | `; 25 | 26 | const Button = styled.button` 27 | ${() => buttonStyle} 28 | `; 29 | 30 | const StatusBar = styled.div` 31 | display: flex; 32 | justify-content: center; 33 | align-items: center; 34 | `; 35 | 36 | const VerticalCenterDiv = styled.div` 37 | display: flex; 38 | flex-direction: column; 39 | align-items: center; 40 | `; 41 | 42 | class App extends Component { 43 | 44 | componentDidMount() { 45 | const { connectUserSession } = this.props; 46 | connectUserSession(); 47 | } 48 | 49 | render() { 50 | const { 51 | connected, connectErrorMsg, databaseUrl, translateResult, queryResult, 52 | disconnect, connectToDB, connectToDemoDB, translateNL, executeSQL, 53 | } = this.props; 54 | 55 | return ( 56 | 57 | Natural Language Interface to DataBases 58 | { 59 | connected 60 | ? ( 61 |
62 | 63 | connected to {databaseUrl} 64 | 65 | 66 | translateNL({ input })} 69 | buttonTitle={'translate'} 70 | /> 71 | {translateResult} 72 | executeSQL({ query: input })} 75 | buttonTitle={'execute'} 76 | /> 77 | 78 |
{queryResult}
79 |
80 |
81 | ) 82 | : ( 83 | 84 | 85 |
100 | 101 | ) 102 | } 103 | 104 | ); 105 | } 106 | } 107 | 108 | export default App; 109 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/SQLQuery.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | /** 11 | * Just a wrapper for a String of sql translateNL. 12 | * @author keping 13 | */ 14 | public class SQLQuery { 15 | private List blocks; 16 | private Map> map; 17 | 18 | SQLQuery() { 19 | map = new HashMap<>(); 20 | map.put("SELECT", new ArrayList()); 21 | map.put("FROM", new HashSet()); 22 | map.put("WHERE", new HashSet()); 23 | blocks = new ArrayList(); 24 | } 25 | 26 | @Deprecated 27 | public SQLQuery(String s) { 28 | 29 | } 30 | 31 | /** 32 | * Get the String translateNL insides the SQLQuery. 33 | * @return 34 | */ 35 | String get() { return toString(); } 36 | 37 | public void addBlock(SQLQuery query) { 38 | blocks.add(query); 39 | add("FROM", "BLOCK"+blocks.size()); 40 | } 41 | 42 | Collection getCollection(String keyWord) { return map.get(keyWord); } 43 | 44 | /** 45 | * Add (key, value) to the SQL Query. 46 | * For example, (SELECT, article.title) or (FROM, article). 47 | * @param key 48 | * @param val 49 | */ 50 | void add(String key, String value) { 51 | map.get(key).add(value); 52 | } 53 | 54 | 55 | /** 56 | * Serve for the toString() method. 57 | * @param SELECT (or FROM) 58 | * @return one line of arguments of that translateNL (SELECT, FROM) 59 | */ 60 | private StringBuilder toSBLine(Collection SELECT) { 61 | StringBuilder sb = new StringBuilder(); 62 | for (String val : SELECT) { 63 | if (sb.length() == 0) { 64 | sb.append(val); 65 | } else { 66 | sb.append(", ").append(val); 67 | } 68 | } 69 | return sb; 70 | } 71 | 72 | /** 73 | * Similar to {@link #toSBLine(Collection)}, but that incorporates 74 | * the information of "AND" and "OR". 75 | * @param WHERE 76 | * @return 77 | */ 78 | private StringBuilder toSBLineCondition(Collection WHERE) { 79 | StringBuilder sb = new StringBuilder(); 80 | for (String val : WHERE) { 81 | if (sb.length() == 0) { 82 | sb.append(val); 83 | } else { 84 | // currently only allow for "AND" 85 | // TODO: add "OR" 86 | sb.append(" AND ").append(val); 87 | } 88 | } 89 | return sb; 90 | } 91 | 92 | @Override 93 | public String toString() { 94 | if (map.get("SELECT").isEmpty() || map.get("FROM").isEmpty()) { 95 | return "Illegal Query"; 96 | } 97 | StringBuilder sb = new StringBuilder(); 98 | for (int i = 0; i < blocks.size(); i++) { 99 | sb.append("BLOCK"+(i+1)+":").append("\n"); 100 | sb.append(blocks.get(i).toString()).append("\n"); 101 | sb.append("\n"); 102 | } 103 | sb.append("SELECT ").append(toSBLine(map.get("SELECT"))).append("\n"); 104 | sb.append("FROM ").append(toSBLine(map.get("FROM"))).append("\n"); 105 | if (!map.get("WHERE").isEmpty()) { 106 | sb.append("WHERE ").append(toSBLineCondition(map.get("WHERE"))).append("\n"); 107 | } 108 | sb.append(";\n"); 109 | return sb.toString(); 110 | } 111 | 112 | 113 | } 114 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/NodeInfo.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.Comparator; 4 | 5 | /** 6 | * Immutable class indicating the SQL component for a Node. 7 | * @author keping 8 | * 9 | */ 10 | public class NodeInfo { 11 | // TODO: all fields should be private in final version. 12 | private String type; 13 | private String value; 14 | /** 15 | * Similarity score of the Node to the column/table name in schema. 16 | */ 17 | private double score = 1.0; 18 | 19 | public NodeInfo(String type, String value) { 20 | this.type = type; 21 | this.value = value; 22 | } 23 | public NodeInfo(String type, String value, double score) { 24 | this(type, value); 25 | this.score = score; 26 | } 27 | public NodeInfo(NodeInfo ni){ 28 | this.type = ni.type; 29 | this.value = ni.value; 30 | this.score = ni.score; 31 | } 32 | @Override 33 | public String toString() { 34 | return type+": "+value; 35 | } 36 | public String getType() { return type; } 37 | public String getValue() { 38 | return value; 39 | } 40 | 41 | public double getScore(){ 42 | return score; 43 | } 44 | 45 | public static class ReverseScoreComparator implements Comparator { 46 | @Override 47 | public int compare(NodeInfo a, NodeInfo b) { 48 | if (a.score < b.score) { return 1; } 49 | else if (a.score > b.score) { return -1; } 50 | else { return 0; } 51 | } 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | final int prime = 31; 57 | int result = 1; 58 | result = prime * result + ((type == null) ? 0 : type.hashCode()); 59 | result = prime * result + ((value == null) ? 0 : value.hashCode()); 60 | return result; 61 | } 62 | @Override 63 | public boolean equals(Object obj) { 64 | if (this == obj) 65 | return true; 66 | if (obj == null) 67 | return false; 68 | if (getClass() != obj.getClass()) 69 | return false; 70 | NodeInfo other = (NodeInfo) obj; 71 | if (type == null) { 72 | if (other.type != null) 73 | return false; 74 | } else if (!type.equals(other.type)) 75 | return false; 76 | if (value == null) { 77 | if (other.value != null) 78 | return false; 79 | } else if (!value.equals(other.value)) 80 | return false; 81 | return true; 82 | } 83 | 84 | public boolean ExactSameSchema (NodeInfo other) { 85 | 86 | if (type == null || other.getType() == null || value == null || other.getValue() == null) { 87 | return false; 88 | } 89 | 90 | if (type.equals(other.getType()) && value.equals(other.getValue())) { 91 | 92 | return true; 93 | } 94 | 95 | return false; 96 | } 97 | 98 | public boolean sameSchema (NodeInfo other) { 99 | 100 | if (type == null || other.getType() == null || value == null || other.getValue() == null) { 101 | return false; 102 | } 103 | 104 | int indexOfDot_Other = other.getValue().indexOf('.'); 105 | 106 | int indexOfDot = value.indexOf('.'); 107 | 108 | if (indexOfDot_Other == -1) { 109 | 110 | indexOfDot_Other = other.getValue().length(); 111 | } 112 | 113 | if (indexOfDot == -1) { 114 | 115 | indexOfDot = value.length(); 116 | } 117 | 118 | if (other.getValue().substring(0, indexOfDot_Other - 1) 119 | .equals(value.substring(0, indexOfDot - 1))) { 120 | 121 | return true; 122 | } 123 | 124 | 125 | return false; 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /client/src/utils/registerServiceWorker.js: -------------------------------------------------------------------------------- 1 | // In production, we register a com.dukenlidb.nlidb.service worker to serve assets from local cache. 2 | 3 | // This lets the app load faster on subsequent visits in production, and gives 4 | // it offline capabilities. However, it also means that developers (and users) 5 | // will only see deployed updates on the "N+1" visit to a page, since previously 6 | // cached resources are updated in the background. 7 | 8 | // To learn more about the benefits of this com.dukenlidb.nlidb.model, read https://goo.gl/KwvDNy. 9 | // This link also includes instructions on opting out of this behavior. 10 | 11 | const isLocalhost = Boolean( 12 | window.location.hostname === 'localhost' || 13 | // [::1] is the IPv6 localhost address. 14 | window.location.hostname === '[::1]' || 15 | // 127.0.0.1/8 is considered localhost for IPv4. 16 | window.location.hostname.match( 17 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/ 18 | ) 19 | ); 20 | 21 | export default function register() { 22 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) { 23 | // The URL constructor is available in all browsers that support SW. 24 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location); 25 | if (publicUrl.origin !== window.location.origin) { 26 | // Our com.dukenlidb.nlidb.service worker won't work if PUBLIC_URL is on a different origin 27 | // from what our page is served on. This might happen if a CDN is used to 28 | // serve assets; see https://github.com/facebookincubator/create-react-app/issues/2374 29 | return; 30 | } 31 | 32 | window.addEventListener('load', () => { 33 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`; 34 | 35 | if (!isLocalhost) { 36 | // Is not local host. Just register com.dukenlidb.nlidb.service worker 37 | registerValidSW(swUrl); 38 | } else { 39 | // This is running on localhost. Lets check if a com.dukenlidb.nlidb.service worker still exists or not. 40 | checkValidServiceWorker(swUrl); 41 | } 42 | }); 43 | } 44 | } 45 | 46 | function registerValidSW(swUrl) { 47 | navigator.serviceWorker 48 | .register(swUrl) 49 | .then(registration => { 50 | registration.onupdatefound = () => { 51 | const installingWorker = registration.installing; 52 | installingWorker.onstatechange = () => { 53 | if (installingWorker.state === 'installed') { 54 | if (navigator.serviceWorker.controller) { 55 | // At this point, the old content will have been purged and 56 | // the fresh content will have been added to the cache. 57 | // It's the perfect time to display a "New content is 58 | // available; please refresh." message in your web app. 59 | console.log('New content is available; please refresh.'); 60 | } else { 61 | // At this point, everything has been precached. 62 | // It's the perfect time to display a 63 | // "Content is cached for offline use." message. 64 | console.log('Content is cached for offline use.'); 65 | } 66 | } 67 | }; 68 | }; 69 | }) 70 | .catch(error => { 71 | console.error('Error during com.dukenlidb.nlidb.service worker registration:', error); 72 | }); 73 | } 74 | 75 | function checkValidServiceWorker(swUrl) { 76 | // Check if the com.dukenlidb.nlidb.service worker can be found. If it can't reload the page. 77 | fetch(swUrl) 78 | .then(response => { 79 | // Ensure com.dukenlidb.nlidb.service worker exists, and that we really are getting a JS file. 80 | if ( 81 | response.status === 404 || 82 | response.headers.get('content-type').indexOf('javascript') === -1 83 | ) { 84 | // No com.dukenlidb.nlidb.service worker found. Probably a different app. Reload the page. 85 | navigator.serviceWorker.ready.then(registration => { 86 | registration.unregister().then(() => { 87 | window.location.reload(); 88 | }); 89 | }); 90 | } else { 91 | // Service worker found. Proceed as normal. 92 | registerValidSW(swUrl); 93 | } 94 | }) 95 | .catch(() => { 96 | console.log( 97 | 'No internet connection found. App is running in offline mode.' 98 | ); 99 | }); 100 | } 101 | 102 | export function unregister() { 103 | if ('serviceWorker' in navigator) { 104 | navigator.serviceWorker.ready.then(registration => { 105 | registration.unregister(); 106 | }); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/NodeMapper.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Set; 10 | 11 | /** 12 | * A class to help map word {@link Node} in {@link ParseTree} 13 | * to SQL components (represented by class {@link NodeInfo}). 14 | * @author keping 15 | * 16 | */ 17 | public class NodeMapper { 18 | private WordNet wordNet; 19 | /** 20 | * Key is the word. Value is the corresponding SQL component. 21 | * For example: ("return", ("SN", "SELECT")) 22 | */ 23 | private Map map; 24 | 25 | 26 | /** 27 | * Initialize the NodeMapper. (The mapper could be made configurable. It can also initialize 28 | * by reading mappings from a file) 29 | * @throws Exception 30 | */ 31 | public NodeMapper() throws Exception { 32 | wordNet = new WordNet(); 33 | map = new HashMap(); 34 | map.put("return", new NodeInfo("SN", "SELECT")); // Select Node 35 | 36 | map.put("equals", new NodeInfo("ON", "=")); // Operator Node 37 | map.put("less", new NodeInfo("ON", "<")); 38 | map.put("greater", new NodeInfo("ON", ">")); 39 | map.put("not", new NodeInfo("ON", "!=")); //TODO: not is a operator node or logic node? 40 | map.put("before", new NodeInfo("ON", "<")); 41 | map.put("after", new NodeInfo("ON", ">")); 42 | map.put("more", new NodeInfo("ON", ">")); 43 | map.put("older", new NodeInfo("ON", ">")); 44 | map.put("newer", new NodeInfo("ON", "<")); 45 | 46 | map.put("fn", new NodeInfo("FN", "AVG")); // Function Node 47 | map.put("average", new NodeInfo("FN", "AVG")); 48 | map.put("most", new NodeInfo("FN", "MAX")); 49 | map.put("total", new NodeInfo("FN", "SUM")); 50 | map.put("number", new NodeInfo("FN","COUNT")); 51 | 52 | map.put("all", new NodeInfo("QN", "ALL")); // Quantifier Node 53 | map.put("any", new NodeInfo("QN", "ANY")); 54 | map.put("each", new NodeInfo("QN", "EACH")); 55 | 56 | map.put("and", new NodeInfo("LN", "AND")); // Logic Node 57 | map.put("or", new NodeInfo("LN", "OR")); 58 | 59 | 60 | } 61 | 62 | /** 63 | *

Return the a ranked list of candidate NodeInfos for this Node. This method 64 | * will be called by the controller, and then the candidates will be passed on 65 | * to the view for user to choose. If there is only one candidate in the list, 66 | * the choice is automatically made.

67 | *

The length of the list of NodeInfos is at least 1. We will have special type 68 | * in NodeInfo if the Node doesn't correspond to any SQL component (the Node is 69 | * meaningless).

70 | *

The returned list contains at most 6 elements.

71 | *

Treat all input as lower case.

72 | * @param node 73 | * @param schema 74 | * @return a ranked of NodeInfo 75 | */ 76 | public List getNodeInfoChoices(Node node, SchemaGraph schema) { 77 | List result = new ArrayList(); //final output 78 | if (node.getWord().equals("ROOT")) { 79 | result.add(new NodeInfo("ROOT", "ROOT")); 80 | return result; 81 | } 82 | Set valueNodes = new HashSet(); //used to store (type, value, score) of 100 sample values for every column in every table 83 | String word = node.getWord().toLowerCase(); // all words as lower case 84 | 85 | if (map.containsKey(word)) { 86 | result.add(map.get(word)); 87 | return result; 88 | } 89 | 90 | for (String tableName : schema.getTableNames()) { 91 | result.add(new NodeInfo("NN", tableName, 92 | WordSimilarity.getSimilarity(word, tableName, wordNet))); //map name nodes(table names) 93 | for (String colName : schema.getColumns(tableName)) { 94 | result.add(new NodeInfo("NN", tableName+"."+colName, 95 | WordSimilarity.getSimilarity(word, colName, wordNet))); //map name nodes (attribute names) 96 | for (String value : schema.getValues(tableName, colName)) { 97 | if (word == null || value == null) { 98 | System.out.println("Comparing "+word+" and "+value); 99 | System.out.println("In table "+tableName+", column "+colName); 100 | } 101 | valueNodes.add(new NodeInfo("VN", tableName+"."+colName, 102 | WordSimilarity.getSimilarity(word, value, wordNet))); //add every sample value into valueNodes 103 | } 104 | } 105 | } 106 | 107 | //map value nodes (table values), to get the value node with highest similarity, add its (type, value, score) into result 108 | // we want all candidates, not only the one with the highest similarity 109 | result.addAll(valueNodes); 110 | result.add(new NodeInfo("UNKNOWN", "meaningless", 1.0)); 111 | Collections.sort(result, new NodeInfo.ReverseScoreComparator()); 112 | return result; 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/ui/UserView.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.ui; 2 | 3 | import com.dukenlidb.nlidb.archive.app.Controller; 4 | import javafx.application.Application; 5 | import javafx.application.Platform; 6 | import javafx.collections.FXCollections; 7 | import javafx.collections.ObservableList; 8 | import javafx.geometry.Insets; 9 | import javafx.scene.Scene; 10 | import javafx.scene.control.Button; 11 | import javafx.scene.control.ComboBox; 12 | import javafx.scene.control.Label; 13 | import javafx.scene.control.TextArea; 14 | import javafx.scene.layout.HBox; 15 | import javafx.scene.layout.VBox; 16 | import javafx.scene.text.Text; 17 | import javafx.stage.Stage; 18 | import com.dukenlidb.nlidb.archive.model.NodeInfo; 19 | 20 | public class UserView extends Application { 21 | private static final String TEST_TEXT = "Return the number of authors who published theory papers before 1980."; 22 | // "Return the number of authors who published theory papers before 1980." 23 | 24 | Stage stage; // the window 25 | Scene scene; // the com.dukenlidb.nlidb.main content in the window 26 | Controller ctrl; 27 | Button btnTranslate; 28 | Text display; 29 | ComboBox choiceBox; // use scrollable comboBox instead of choiceBox 30 | Button btnConfirmChoice; 31 | ComboBox treeChoice; 32 | Button btnTreeConfirm; 33 | HBox hb; 34 | VBox vb1, vb2; 35 | 36 | 37 | public void setDisplay(String text) { 38 | display.setText(text); 39 | } 40 | 41 | public void appendDisplay(String text) { 42 | display.setText(display.getText()+text); 43 | } 44 | 45 | public void showNodesChoice() { 46 | vb2.getChildren().addAll(choiceBox, btnConfirmChoice); 47 | } 48 | 49 | public void removeChoiceBoxButton() { 50 | vb2.getChildren().remove(choiceBox); 51 | vb2.getChildren().remove(btnConfirmChoice); 52 | } 53 | 54 | public void setChoices(ObservableList choices) { 55 | choiceBox.setItems(choices); 56 | choiceBox.setValue(choices.get(0)); 57 | } 58 | 59 | public NodeInfo getChoice() { 60 | return choiceBox.getValue(); 61 | } 62 | 63 | public void showTreesChoice() { 64 | vb2.getChildren().addAll(treeChoice, btnTreeConfirm); 65 | } 66 | 67 | public void removeTreesChoices() { 68 | vb2.getChildren().removeAll(treeChoice, btnTreeConfirm); 69 | } 70 | 71 | @Override 72 | public void start(Stage primaryStage) throws Exception { 73 | 74 | stage = primaryStage; 75 | stage.setTitle("Window for NLIDB"); 76 | 77 | Label label1 = new Label("Welcome to Natural Language Interface to DataBase!"); 78 | 79 | Label lblInput = new Label("Natural Language Input:"); 80 | TextArea fieldIn = new TextArea(); 81 | fieldIn.setPrefHeight(100); 82 | fieldIn.setPrefWidth(100); 83 | fieldIn.setWrapText(true); 84 | fieldIn.setText(TEST_TEXT); 85 | 86 | btnTranslate = new Button("translate"); 87 | 88 | // Define action of the translate button. 89 | btnTranslate.setOnAction(e -> { 90 | ctrl.processNaturalLanguage(fieldIn.getText()); 91 | }); 92 | 93 | display = new Text(); 94 | display.setWrappingWidth(500); 95 | display.prefHeight(300); 96 | display.setText("Default display text"); 97 | 98 | // choices and button for nodes mapping 99 | choiceBox = new ComboBox(); 100 | choiceBox.setVisibleRowCount(6); 101 | btnConfirmChoice = new Button("confirm choice"); 102 | btnConfirmChoice.setOnAction(e -> { 103 | ctrl.chooseNode(getChoice()); 104 | }); 105 | 106 | // choices and button for tree selection 107 | treeChoice = new ComboBox(); // ! only show 3 choices now 108 | treeChoice.setItems(FXCollections.observableArrayList(0,1,2)); 109 | treeChoice.getSelectionModel().selectedIndexProperty().addListener((ov, oldV, newV) -> { 110 | ctrl.showTree(treeChoice.getItems().get((Integer) newV)); 111 | }); 112 | btnTreeConfirm = new Button("confirm tree choice"); 113 | btnTreeConfirm.setOnAction(e -> { 114 | ctrl.chooseTree(treeChoice.getValue()); 115 | }); 116 | 117 | vb1 = new VBox(); 118 | vb1.setSpacing(10); 119 | vb1.getChildren().addAll( 120 | label1, 121 | lblInput,fieldIn, 122 | btnTranslate 123 | ); 124 | 125 | vb2 = new VBox(); 126 | vb2.setSpacing(20); 127 | vb2.getChildren().addAll(display); 128 | 129 | hb = new HBox(); 130 | hb.setPadding(new Insets(15, 12, 15, 12)); 131 | hb.setSpacing(10); 132 | hb.getChildren().addAll(vb1, vb2); 133 | 134 | scene = new Scene(hb, 800, 450); 135 | 136 | stage.setScene(scene); 137 | ctrl = new Controller(this); 138 | stage.show(); 139 | 140 | } 141 | 142 | @Override 143 | public void stop() throws Exception { 144 | super.stop(); 145 | if (ctrl != null) { 146 | ctrl.closeConnection(); 147 | } 148 | Platform.exit(); 149 | System.exit(0); 150 | } 151 | 152 | public static void main(String[] args) { 153 | try { 154 | Application.launch(args); 155 | } catch (Exception e) { e.printStackTrace(); } 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/SQLTranslator.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Set; 6 | 7 | /** 8 | * See the paper by Fei Li and H. V. Jagadish for the defined grammar. 9 | * @author keping 10 | * 11 | */ 12 | public class SQLTranslator { 13 | private SQLQuery query; 14 | private SchemaGraph schema; 15 | private int blockCounter = 1; 16 | 17 | public SQLTranslator(Node root, SchemaGraph schema) { 18 | this(root, schema, false); 19 | } 20 | 21 | /** 22 | * Translating a block, starting from translateGNP. 23 | * @param root 24 | * @param schema 25 | */ 26 | public SQLTranslator(Node root, SchemaGraph schema, boolean block) { 27 | if (!block) { 28 | this.schema = schema; 29 | query = new SQLQuery(); 30 | 31 | translateSClause(root.getChildren().get(0)); 32 | if (root.getChildren().size() >= 2) { 33 | translateComplexCondition(root.getChildren().get(1)); 34 | } 35 | 36 | if (schema != null) addJoinPath(); 37 | } else { 38 | this.schema = schema; 39 | query = new SQLQuery(); 40 | translateGNP(root); 41 | } 42 | } 43 | 44 | public SQLQuery getResult() { return query; } 45 | 46 | 47 | private static boolean isNumber(String str) { 48 | int length = str.length(); 49 | if (length == 0) { return false; } 50 | int i = 0; 51 | if (str.charAt(0) == '-') { 52 | if (length == 1) { return false; } 53 | i = 1; 54 | } 55 | for (; i < length; i++) { 56 | char c = str.charAt(i); 57 | if (c < '0' || c > '9' && c != '.') { return false; } 58 | } 59 | return true; 60 | } 61 | 62 | private void translateCondition(Node node) { 63 | String attribute = "ATTRIBUTE"; 64 | String compareSymbol = "="; 65 | String value = "VALUE"; 66 | if (node.getInfo().getType().equals("VN")) { 67 | attribute = node.getInfo().getValue(); 68 | value = node.getWord(); 69 | } else if (node.getInfo().getType().equals("ON")) { 70 | compareSymbol = node.getInfo().getValue(); 71 | Node VN = node.getChildren().get(0); 72 | attribute = VN.getInfo().getValue(); 73 | value = VN.getWord(); 74 | } 75 | if (!isNumber(value)) { value = "\""+value+"\""; } 76 | query.add("WHERE", attribute+" "+compareSymbol+" "+value); 77 | query.add("FROM", attribute.split("\\.")[0]); 78 | } 79 | 80 | private void translateNN(Node node) { 81 | translateNN(node, ""); 82 | } 83 | private void translateNN(Node node, String valueFN) { 84 | if (!node.getInfo().getType().equals("NN")) { return; } 85 | if (!valueFN.equals("")) { 86 | query.add("SELECT", valueFN+"("+node.getInfo().getValue()+")"); 87 | } else { 88 | query.add("SELECT", node.getInfo().getValue()); 89 | } 90 | query.add("FROM", node.getInfo().getValue().split("\\.")[0]); 91 | } 92 | 93 | private void translateNP(Node node) { 94 | translateNP(node, ""); 95 | } 96 | private void translateNP(Node node, String valueFN) { 97 | translateNN(node, valueFN); 98 | for (Node child : node.getChildren()) { 99 | if (child.getInfo().getType().equals("NN")) { 100 | translateNN(child); 101 | } else if (child.getInfo().getType().equals("ON") || 102 | child.getInfo().getType().equals("VN")){ 103 | translateCondition(child); 104 | } 105 | } 106 | } 107 | 108 | private void translateGNP(Node node) { 109 | if (node.getInfo().getType().equals("FN")) { 110 | if (node.getChildren().isEmpty()) { return; } 111 | translateNP(node.getChildren().get(0), node.getInfo().getValue()); 112 | } else if (node.getInfo().getType().equals("NN")) { 113 | translateNP(node); 114 | } 115 | } 116 | 117 | private void translateComplexCondition(Node node) { 118 | if (!node.getInfo().getType().equals("ON")) { return; } 119 | if (node.getChildren().size() != 2) { return; } 120 | SQLTranslator transLeft = new SQLTranslator(node.getChildren().get(0), schema, true); 121 | SQLTranslator transRight= new SQLTranslator(node.getChildren().get(1), schema, true); 122 | query.addBlock(transLeft.getResult()); 123 | query.addBlock(transRight.getResult()); 124 | query.add("WHERE", "BLOCK"+(blockCounter++)+" "+node.getInfo().getValue()+" "+"BLOCK"+(blockCounter++)); 125 | } 126 | 127 | private void translateSClause(Node node) { 128 | if (!node.getInfo().getType().equals("SN")) { return; } 129 | translateGNP(node.getChildren().get(0)); 130 | } 131 | 132 | private void addJoinKeys(String table1, String table2) { 133 | Set joinKeys = schema.getJoinKeys(table1, table2); 134 | for (String joinKey : joinKeys) { 135 | query.add("WHERE", table1+"."+joinKey+" = "+table2+"."+joinKey); 136 | } 137 | } 138 | 139 | private void addJoinPath(List joinPath) { 140 | for (int i = 0; i < joinPath.size()-1; i++) { 141 | addJoinKeys(joinPath.get(i), joinPath.get(i+1)); 142 | } 143 | } 144 | 145 | private void addJoinPath() { 146 | List fromTables = new ArrayList(query.getCollection("FROM")); 147 | if (fromTables.size() <= 1) { return; } 148 | for (int i = 0; i < fromTables.size()-1; i++) { 149 | for (int j = i+1; j < fromTables.size(); j++) { 150 | List joinPath = schema.getJoinPath(fromTables.get(i), fromTables.get(j)); 151 | addJoinPath(joinPath); 152 | } 153 | } 154 | } 155 | 156 | } 157 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/Node.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | /** 8 | * Interface for a parse tree node. 9 | * @author keping 10 | * 11 | */ 12 | public class Node { 13 | 14 | /** 15 | * record if the node is copied over 16 | */ 17 | boolean outside = false; 18 | 19 | private int index = 0; 20 | /** 21 | * Information indicating the corresponding SQL component of the Node. 22 | */ 23 | NodeInfo info = null; 24 | /** 25 | * The natural language word of the Node. This is the only field of 26 | * the Node object that is immutable. 27 | */ 28 | String word; 29 | /** 30 | * Part-of-speech tag for the Node. 31 | */ 32 | String posTag; 33 | 34 | /** 35 | * Parent of the node can be directly modified by ParseTree. 36 | */ 37 | Node parent = null; // package private 38 | /** 39 | * Children of the node can be directly modified by ParseTree. 40 | */ 41 | List children = new ArrayList(); // package private 42 | 43 | //for testing purpose 44 | boolean isInvalid = false; 45 | 46 | public Node(int index, String word, String posTag){ 47 | this(index, word, posTag, null); 48 | } 49 | 50 | public Node(int index, String word, String posTag, NodeInfo info) { 51 | this.index = index; 52 | this.word = word; 53 | this.posTag = posTag; 54 | this.info = info; 55 | } 56 | 57 | public Node(String word, String posTag, NodeInfo info) { 58 | this(0, word, posTag, info); 59 | 60 | } 61 | 62 | private Node clone(Node node){ 63 | if (node == null) return null; 64 | Node copy = new Node(node.index, node.word, node.posTag, node.info); 65 | for (Node child : node.children){ 66 | Node copyChild = clone(child); 67 | copyChild.parent = copy; 68 | copy.children.add(copyChild); 69 | } 70 | return copy; 71 | } 72 | public Node clone(){ 73 | return clone(this); 74 | } 75 | 76 | 77 | public NodeInfo getInfo() { return info; } 78 | public void setInfo(NodeInfo info) { this.info = info; } 79 | public String getWord() { return word; } 80 | public void setWord(String word) {this.word = word;} 81 | public String getPosTag() { return posTag; } 82 | public List getChildren() { return children; } 83 | public void setChild(Node child) {this.children.add(child);} 84 | public Node getParent() {return parent;} 85 | public void setParent(Node parent) {this.parent = parent;} 86 | public void setOutside(boolean outside) {this.outside = outside;} 87 | public boolean getOutside() {return this.outside;} 88 | 89 | public void removeChild (Node child) { 90 | 91 | for (int i = 0; i < children.size(); i ++) { 92 | 93 | if (children.get(i).equals(child)) { 94 | 95 | children.remove(i); 96 | return; 97 | } 98 | } 99 | } 100 | 101 | public void printNodeArray () { 102 | 103 | Node [] nodes = genNodesArray(); 104 | 105 | for (int i = 0; i < nodes.length; i++) { 106 | System.out.println("type: " + nodes[i].getInfo().getType() + " value: " + nodes[i].getInfo().getValue()); 107 | } 108 | } 109 | 110 | 111 | /** 112 | * Generate an array of the nodes tree with this as root 113 | * using pre-order traversal; 114 | * @return 115 | */ 116 | public Node[] genNodesArray() { 117 | List nodesList = new ArrayList<>(); 118 | LinkedList stack = new LinkedList<>(); 119 | stack.push(this); 120 | while (!stack.isEmpty()) { 121 | Node curr = stack.pop(); 122 | nodesList.add(curr); 123 | List currChildren = curr.getChildren(); 124 | for (int i = currChildren.size()-1; i >= 0; i--) { 125 | stack.push(currChildren.get(i)); 126 | } 127 | } 128 | int N = nodesList.size(); 129 | Node[] nodes = new Node[N]; 130 | for (int i = 0; i < N; i++) { 131 | nodes[i] = nodesList.get(i); 132 | } 133 | return nodes; 134 | } 135 | 136 | /** 137 | * Only includes posTag, word, info, and children. 138 | * Return the hashCode of the tree represented by this node. 139 | */ 140 | @Override 141 | public int hashCode() { // exclude parent. 142 | final int prime = 31; 143 | int result = 17; 144 | result = prime * result + index; 145 | result = prime * result + ((posTag == null) ? 0 : posTag.hashCode()); 146 | result = prime * result + ((word == null) ? 0 : word.hashCode()); 147 | result = prime * result + ((info == null) ? 0 : info.hashCode()); 148 | if (children != null) { 149 | for (Node child : children) { 150 | result = prime * result + child.hashCode(); 151 | } 152 | } 153 | 154 | return result; 155 | } 156 | 157 | /** 158 | * Only considers word, posTag, info, and children (recursively). 159 | * See whether two trees represented by two nodes are equal. 160 | */ 161 | @Override 162 | public boolean equals(Object obj) { // exclude parent 163 | if (this == obj) { return true; } 164 | if (obj == null) { return false; } 165 | if (getClass() != obj.getClass()) { return false; } 166 | Node other = (Node) obj; 167 | if (index != other.index) { return false; } 168 | if (!word.equals(other.word)) { return false; } 169 | if (!posTag.equals(other.posTag)) { return false; } 170 | if (info != other.info) { 171 | if (info == null || other.info == null) { return false; } 172 | if (!info.equals(other.info)) { return false; } 173 | } 174 | if (children != other.children) { 175 | if (children == null || other.children == null) { return false; } 176 | if (children.size() != other.children.size()) { return false; } 177 | for (int i = 0; i < children.size(); i++) { 178 | if (!children.get(i).equals(other.children.get(i))) { return false; } 179 | } 180 | } 181 | return true; 182 | } 183 | 184 | public String toString() { 185 | String s = "("+index+")"+word; 186 | if (info != null) { 187 | s += "("+info.getType()+":"+info.getValue()+")"; 188 | } 189 | return s; 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn ( ) { 37 | echo "$*" 38 | } 39 | 40 | die ( ) { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save ( ) { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustor.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.PriorityQueue; 8 | import java.util.Set; 9 | 10 | public class TreeAdjustor { 11 | 12 | private static final int MAX_EDIT = 15; 13 | 14 | /** 15 | * Return the node in the tree that equals to the targetNode. 16 | * @param tree 17 | * @param targetNode 18 | * @return 19 | */ 20 | private static Node find(ParseTree tree, Node targetNode) { 21 | for (Node node : tree) { 22 | if (node.equals(targetNode)) { return node; } 23 | } 24 | return null; 25 | } 26 | 27 | /** 28 | * Swap this parent node and a child node. 29 | * @param parent 30 | * @param child 31 | */ 32 | private static void swap(Node parent, Node child) { 33 | // swap the attributes directly. 34 | NodeInfo childInfo = child.info; 35 | String childWord = child.word; 36 | String childPosTag = child.posTag; 37 | child.info = parent.info; 38 | child.word = parent.word; 39 | child.posTag = parent.posTag; 40 | parent.info = childInfo; 41 | parent.word = childWord; 42 | parent.posTag = childPosTag; 43 | } 44 | 45 | /** 46 | * Make the child node a rightmost sibling of the target Node. 47 | * @param target 48 | * @param child 49 | */ 50 | private static void makeSibling(Node target, Node child) { 51 | List children = target.getChildren(); 52 | target.children = new ArrayList();; 53 | for (Node anyChild : children) { 54 | if (anyChild != child) { target.getChildren().add(anyChild); } 55 | } 56 | target.parent.children.add(child); 57 | child.parent = target.parent; 58 | } 59 | 60 | /** 61 | * Make a sibling the rightmost child of the target. 62 | * @param target 63 | * @param sibling 64 | */ 65 | private static void makeChild(Node target, Node sibling) { 66 | List siblings = target.parent.children; 67 | target.parent.children = new ArrayList(); 68 | for (Node anySibling : siblings) { 69 | if (anySibling != sibling) { 70 | target.parent.children.add(anySibling); 71 | } 72 | } 73 | target.children.add(sibling); 74 | sibling.parent = target; 75 | } 76 | 77 | /** 78 | *

Return a list of adjusted trees after one adjustment to the input tree 79 | * at the target Node.

80 | *

Four possible adjustments can be made to that node:

81 | *
    82 | *
  1. Swap this node with its child. (all possible positions)
  2. 83 | *
  3. Make child its rightmost sibling.
  4. 84 | *
  5. Make sibling its rightmost child.
  6. 85 | *
  7. Swap leftmost child with other children
  8. 86 | *
87 | * @param tree 88 | * @param targetNode 89 | * @return 90 | */ 91 | private static Set adjust(ParseTree tree, Node target) { 92 | Set adjusted = new HashSet<>(); 93 | if (target.parent == null) { return adjusted; } 94 | // (1) Swap target with its children. 95 | for (Node child : target.getChildren()) { 96 | ParseTree tempTree = new ParseTree(tree); 97 | swap(find(tempTree, target), find(tempTree, child)); 98 | adjusted.add(tempTree); 99 | } 100 | // (2) Make child its rightmost sibling. 101 | for (Node child : target.getChildren()) { 102 | ParseTree tempTree = new ParseTree(tree); 103 | makeSibling(find(tempTree, target), find(tempTree, child)); 104 | adjusted.add(tempTree); 105 | } 106 | // (3) Make its sibling its rightmost child. 107 | for (Node sibling : target.parent.getChildren()) { 108 | if (sibling == target) { continue; } 109 | ParseTree tempTree = new ParseTree(tree); 110 | makeChild(find(tempTree, target), find(tempTree, sibling)); 111 | adjusted.add(tempTree); 112 | } 113 | // (4) Swap leftmost child with other children. 114 | if (target.getChildren().size() >= 2) { 115 | List children = target.getChildren(); 116 | for (int i = 1; i < children.size(); i++) { 117 | ParseTree tempTree = new ParseTree(tree); 118 | swap(find(tempTree, children.get(0)), 119 | find(tempTree, children.get(i))); 120 | adjusted.add(tempTree); 121 | } 122 | } 123 | return adjusted; 124 | } 125 | 126 | /** 127 | * Return a set of adjusted trees after one adjustment to the input tree. 128 | * @param tree 129 | * @return 130 | */ 131 | public static List adjust(ParseTree tree) { 132 | Set treeList = new HashSet(); 133 | for (Node node : tree) { 134 | treeList.addAll(adjust(tree, node)); 135 | } 136 | return new ArrayList(treeList); 137 | } 138 | 139 | 140 | public static List getAdjustedTrees(ParseTree tree) { 141 | List results = new ArrayList(); 142 | // The top of the pq is the most valid tree (highest score, lowest number of invalid nodes) 143 | PriorityQueue queue = new PriorityQueue((t1,t2) -> ( - t1.getScore() + t2.getScore() )); 144 | HashMap H = new HashMap(); 145 | queue.add(tree); 146 | results.add(tree); 147 | H.put(tree.hashCode(), tree); 148 | tree.setEdit(0); 149 | 150 | ParseTree treeWithON = tree.addON(); 151 | queue.add(treeWithON); 152 | results.add(treeWithON); 153 | H.put(treeWithON.hashCode(), treeWithON); 154 | treeWithON.setEdit(0); 155 | 156 | while (queue.size() > 0){ 157 | ParseTree oriTree = queue.poll(); 158 | if (oriTree.getEdit() >= MAX_EDIT) { continue; } 159 | List treeList = TreeAdjustor.adjust(oriTree); 160 | double numInvalidNodes = SyntacticEvaluator.numberOfInvalidNodes(oriTree); 161 | 162 | for (int i = 0; i < treeList.size(); i++){ 163 | ParseTree currentTree = treeList.get(i); 164 | int hashValue = currentTree.hashCode(); 165 | if ( !H.containsKey(hashValue) ) { 166 | H.put(hashValue, currentTree); 167 | currentTree.setEdit(oriTree.getEdit()+1); 168 | if (SyntacticEvaluator.numberOfInvalidNodes(currentTree) <= numInvalidNodes) { 169 | queue.add(currentTree); 170 | results.add(currentTree); 171 | } 172 | } 173 | } 174 | } 175 | return results; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/controller/Controller.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.controller; 2 | 3 | import com.dukenlidb.nlidb.model.request.ExecuteSQLRequest; 4 | import com.dukenlidb.nlidb.model.request.TranslateNLRequest; 5 | import com.dukenlidb.nlidb.model.response.*; 6 | import com.dukenlidb.nlidb.service.SQLExecutionService; 7 | import com.fasterxml.jackson.core.JsonProcessingException; 8 | import com.dukenlidb.nlidb.model.DBConnectionConfig; 9 | import com.dukenlidb.nlidb.model.UserSession; 10 | import com.dukenlidb.nlidb.model.request.ConnectDBRequest; 11 | import org.springframework.beans.factory.annotation.Autowired; 12 | import org.springframework.http.ResponseEntity; 13 | import org.springframework.web.bind.annotation.CookieValue; 14 | import org.springframework.web.bind.annotation.RequestBody; 15 | import org.springframework.web.bind.annotation.RequestMapping; 16 | import org.springframework.web.bind.annotation.RestController; 17 | import com.dukenlidb.nlidb.service.CookieService; 18 | import com.dukenlidb.nlidb.service.DBConnectionService; 19 | import com.dukenlidb.nlidb.service.RedisService; 20 | 21 | import javax.servlet.http.HttpServletResponse; 22 | import java.io.IOException; 23 | import java.sql.SQLException; 24 | import java.util.UUID; 25 | 26 | import static com.dukenlidb.nlidb.service.CookieService.COOKIE_NAME; 27 | import static com.dukenlidb.nlidb.service.CookieService.USER_NONE; 28 | 29 | @RestController 30 | public class Controller { 31 | 32 | private CookieService cookieService; 33 | private RedisService redisService; 34 | private DBConnectionService dbConnectionService; 35 | private SQLExecutionService sqlExecutionService; 36 | 37 | @Autowired 38 | public Controller( 39 | CookieService cookieService, 40 | RedisService redisService, 41 | DBConnectionService dbConnectionService, 42 | SQLExecutionService sqlExecutionService 43 | ) { 44 | this.cookieService = cookieService; 45 | this.redisService = redisService; 46 | this.dbConnectionService = dbConnectionService; 47 | this.sqlExecutionService = sqlExecutionService; 48 | } 49 | 50 | @RequestMapping("/api/connect/user") 51 | public ResponseEntity connectUser( 52 | @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId, 53 | HttpServletResponse res 54 | ) throws IOException { 55 | if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) { 56 | return ResponseEntity.ok(new StatusMessageResponse(false, "No user session found")); 57 | } else { 58 | redisService.refreshUser(userId); 59 | UserSession session = redisService.getUserSession(userId); 60 | cookieService.setUserIdCookie(res, userId); 61 | return ResponseEntity.ok(new ConnectResponse(true, session.getDbConnectionConfig().getUrl())); 62 | } 63 | } 64 | 65 | @RequestMapping("/api/disconnect") 66 | public ResponseEntity disconnect( 67 | @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId, 68 | HttpServletResponse res 69 | ) { 70 | if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) { 71 | return ResponseEntity.status(401).body(new MessageResponse("You are not logged in.")); 72 | } else { 73 | redisService.removeUser(userId); 74 | cookieService.expireUserIdCookie(res, userId); 75 | return ResponseEntity.status(200).body(new MessageResponse("Disconnect successfully.")); 76 | } 77 | } 78 | 79 | 80 | @RequestMapping("/api/connect/db") 81 | public ResponseEntity connectDB( 82 | @RequestBody ConnectDBRequest req, 83 | HttpServletResponse res 84 | ) throws JsonProcessingException { 85 | 86 | DBConnectionConfig config = DBConnectionConfig.builder() 87 | .host(req.getHost()) 88 | .port(req.getPort()) 89 | .database(req.getDatabase()) 90 | .username(req.getUsername()) 91 | .password(req.getPassword()) 92 | .build(); 93 | 94 | try { 95 | dbConnectionService.getConnection(config); 96 | String userId = UUID.randomUUID().toString(); 97 | UserSession session = new UserSession(config); 98 | redisService.setUserSession(userId, session); 99 | cookieService.setUserIdCookie(res, userId); 100 | return ResponseEntity.ok().body(new ConnectResponse(true, config.getUrl())); 101 | } catch (SQLException e) { 102 | // TODO: different kinds of connection failure. 103 | return ResponseEntity.status(400).body(new MessageResponse("Connection Failed!")); 104 | } 105 | } 106 | 107 | @RequestMapping("/api/translate/nl") 108 | public ResponseEntity translateNL( 109 | @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId, 110 | @RequestBody TranslateNLRequest req 111 | ) { 112 | 113 | if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) { 114 | return ResponseEntity.status(401).body(new MessageResponse("You are not connected to a Database.")); 115 | } 116 | return ResponseEntity.ok(new TranslateResponse( 117 | "We are still writing the code to translate your natural language input..." 118 | )); 119 | } 120 | 121 | @RequestMapping("/api/execute/sql") 122 | public ResponseEntity executeSQL( 123 | @CookieValue(value = COOKIE_NAME, defaultValue = USER_NONE) String userId, 124 | @RequestBody ExecuteSQLRequest req 125 | ) throws IOException, SQLException { 126 | 127 | if (userId.equals(USER_NONE) || !redisService.hasUser(userId)) { 128 | return ResponseEntity.status(401).body(new MessageResponse("You are not connected to a Database.")); 129 | } 130 | UserSession session = redisService.getUserSession(userId); 131 | String resultString = sqlExecutionService.executeSQL(session.getDbConnectionConfig(), req.getQuery()); 132 | return ResponseEntity.ok(new QueryResponse(resultString)); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/ParseTreeTest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.List; 4 | 5 | public class ParseTreeTest { 6 | 7 | /** 8 | *

We want to translate:
"Return all titles of theory papers before 1970." 9 | *
into (in for inproceedings): 10 | *
SELECT in.title
FROM in 11 | *
WHERE in.area = 'Theory' AND in.year < 1970;

12 | * 13 | *

The direct parsing result of this natural language input is:

14 | * 15 | *

 16 | 	 *           root
 17 | 	 *            |
 18 | 	 *          return
 19 | 	 *            |  `---\---------\
 20 | 	 *          titles   1970       .
 21 | 	 *          /   \      \
 22 | 	 *       all   papers  before
 23 | 	 *              /  \ 
 24 | 	 *            of   theory
 25 | 	 * 

26 | * 27 | *

Suppose we have already successfully gone through the process of 28 | * nodes mapping and structural adjustment. Then we should arrive at a ParseTree 29 | * like this: (in for inproceedings)

30 | * 31 | *

 32 | 	 *    root
 33 | 	 *      |
 34 | 	 *  return(SN:SELECT)
 35 | 	 *      |
 36 | 	 *  titles(NN:in.title)
 37 | 	 *      | `-------------\
 38 | 	 *  theory(VN:in.area)  before(ON:<)
 39 | 	 *                       |
 40 | 	 *                     1970(VN:in.year)
 41 | 	 * 

42 | * 43 | *

The next step is to translate this "perfect" ParseTree word-to-word to 44 | * an SQL translateNL, which is what this method is testing for.

45 | */ 46 | public static void testTranslation1() { 47 | // (1) Let's construct the perfect ParseTree for testing. 48 | ParseTree tree = new ParseTree(); 49 | Node[] nodes = new Node[6]; 50 | 51 | nodes[0] = new Node(0, "ROOT", "ROOT"); 52 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 53 | nodes[1] = new Node(1, "return", "--"); // posTag not useful 54 | nodes[1].info = new NodeInfo("SN", "SELECT"); 55 | nodes[2] = new Node(2, "titles", "--"); 56 | nodes[2].info = new NodeInfo("NN", "in.title"); 57 | nodes[3] = new Node(3, "theory", "--"); 58 | nodes[3].info = new NodeInfo("VN", "in.area"); 59 | nodes[4] = new Node(4, "before", "--"); 60 | nodes[4].info = new NodeInfo("ON", "<"); 61 | nodes[5] = new Node(5, "1970", "--"); 62 | nodes[5].info = new NodeInfo("VN", "in.year"); 63 | 64 | tree.root = nodes[0]; 65 | tree.root.getChildren().add(nodes[1]); 66 | nodes[1].children.add(nodes[2]); 67 | nodes[2].parent = nodes[1]; 68 | nodes[2].children.add(nodes[3]); 69 | nodes[2].children.add(nodes[4]); 70 | nodes[3].parent = nodes[2]; 71 | nodes[4].parent = nodes[2]; 72 | nodes[4].children.add(nodes[5]); 73 | nodes[5].parent = nodes[4]; 74 | /* 75 | System.out.println(tree); 76 | 77 | // (2) Do the translation. 78 | SQLQuery translateNL = tree.translateToSQL(); 79 | 80 | // (3) Print out the translateNL and see. 81 | System.out.println(translateNL); 82 | */ 83 | 84 | System.out.println("===========test for Running SyntacticEvaluator.numberOfInvalidNodes==========="); 85 | System.out.println("Input tree: "+tree.toString()); 86 | System.out.println("Number of Invalid nodes: "+SyntacticEvaluator.numberOfInvalidNodes(tree)+"\n"); 87 | System.out.println("Invalid nodes: "); 88 | for (int i = 1; i < tree.size(); i++){ 89 | if (nodes[i].isInvalid) 90 | System.out.println(nodes[i]); 91 | } 92 | 93 | System.out.println("===========test for Running mergeLNQN==========="); 94 | System.out.println("Input tree: "+tree.toString()); 95 | ParseTree newTree = tree.mergeLNQN(); 96 | System.out.println("Output tree: "+newTree.toString()); 97 | System.out.println("===========test for Running adjust() in TreeAdjustor==========="); 98 | System.out.println("Input tree: "+tree.toString()); 99 | List treeList = TreeAdjustor.adjust(tree); 100 | System.out.println("Output size: "+treeList.size()); 101 | System.out.println("Output trees:"); 102 | for (int j = 0; j < treeList.size(); j++){ 103 | System.out.println("Tree "+j+" :"); 104 | System.out.println(treeList.get(j).toString()); 105 | } 106 | 107 | System.out.println("===========test for Running getAdjustedTrees() in TreeAdjustor==========="); 108 | System.out.println("Number of possible trees for choice:"); 109 | List result = TreeAdjustor.getAdjustedTrees(tree); 110 | System.out.println(result.size()); 111 | for (ParseTree t:result) 112 | System.out.println(t); 113 | } 114 | 115 | /** 116 | * Using natural language input "Return all titles of theory papers before 1970." 117 | *

The original tree:

118 | *

119 | 	 *           root
120 | 	 *            |
121 | 	 *          return
122 | 	 *            |  `---\---------\
123 | 	 *          titles   1970       .
124 | 	 *          /   \      \
125 | 	 *       all   papers  before
126 | 	 *              /  \ 
127 | 	 *            of   theory
128 | 	 * 

129 | * 130 | *

The tree after removing meaningless nodes:

131 | *

132 | 	 *    root
133 | 	 *      |
134 | 	 *  return(SN:SELECT)
135 | 	 *      |     `----------\
136 | 	 *  titles(NN:in.title) 1970(VN:in.year)
137 | 	 *      |                | 
138 | 	 *  theory(VN:in.area)  before(ON:<)
139 | 	 * 

140 | * 141 | *

Still need the adjustor to swap the position of "1970" and "before".

142 | */ 143 | public static void removeMeaninglessNodesTest() { 144 | String input = "Return all titles of theory papers before 1970."; 145 | NLParser parser = new NLParser(); 146 | ParseTree tree = new ParseTree(input, parser); 147 | System.out.println("ParseTree: "); 148 | System.out.println(tree); 149 | 150 | // Set NodeInfo 151 | Node[] nodes = tree.genNodesArray(); 152 | nodes[1].info = new NodeInfo("SN", "SELECT"); 153 | nodes[2].info = new NodeInfo("UNKNOWN", "meaningless"); 154 | nodes[3].info = new NodeInfo("NN", "in.title"); 155 | nodes[4].info = new NodeInfo("UNKNOWN", "meaningless"); 156 | nodes[5].info = new NodeInfo("VN", "in.area"); 157 | nodes[6].info = new NodeInfo("UNKNOWN", "meaningless"); 158 | nodes[7].info = new NodeInfo("ON", "<"); 159 | nodes[8].info = new NodeInfo("VN", "in.year"); 160 | nodes[9].info = new NodeInfo("UNKNOWN", "meaningless"); 161 | 162 | System.out.println("After setting nodeinfo:"); 163 | System.out.println(tree); 164 | 165 | tree.removeMeaninglessNodes(); 166 | 167 | System.out.println("After removing meaningless nodes"); 168 | System.out.println(tree); 169 | 170 | SQLQuery query = tree.translateToSQL(); 171 | 172 | System.out.println(query); 173 | 174 | } 175 | 176 | public static void main(String[] args) { 177 | testTranslation1(); 178 | //removeMeaninglessNodesTest(); 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/app/Controller.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.app; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DriverManager; 5 | import java.sql.SQLException; 6 | import java.util.List; 7 | 8 | import javafx.collections.FXCollections; 9 | import com.dukenlidb.nlidb.archive.model.NLParser; 10 | import com.dukenlidb.nlidb.archive.model.Node; 11 | import com.dukenlidb.nlidb.archive.model.NodeInfo; 12 | import com.dukenlidb.nlidb.archive.model.NodeMapper; 13 | import com.dukenlidb.nlidb.archive.model.ParseTree; 14 | import com.dukenlidb.nlidb.archive.model.ParseTree.ParseTreeIterator; 15 | import com.dukenlidb.nlidb.archive.model.SQLQuery; 16 | import com.dukenlidb.nlidb.archive.model.SchemaGraph; 17 | import com.dukenlidb.nlidb.archive.ui.UserView; 18 | 19 | 20 | /** 21 | * The controller between com.dukenlidb.nlidb.model and view. 22 | * @author keping 23 | */ 24 | public class Controller { 25 | private Connection connection = null; 26 | private SchemaGraph schema; 27 | private NLParser parser; 28 | private NodeMapper nodeMapper; 29 | private ParseTree parseTree; 30 | private UserView view; 31 | /** 32 | * Iterator for nodes mapping. 33 | */ 34 | private ParseTreeIterator iter; 35 | /** 36 | * Attribute for nodes mapping, to indicate the current Node. 37 | */ 38 | private Node node; 39 | private boolean mappingNodes = false; 40 | private boolean selectingTree = false; 41 | private boolean processing = false; 42 | private List treeChoices; 43 | private SQLQuery query; 44 | 45 | /** 46 | * Initialize the Controller. 47 | */ 48 | public Controller(UserView view) { 49 | this.view = view; 50 | startConnection(); 51 | 52 | try { nodeMapper = new NodeMapper(); 53 | } catch (Exception e) { e.printStackTrace(); } 54 | parser = new NLParser(); // initialize parser, takes some time 55 | 56 | System.out.println("Controller initialized."); 57 | } 58 | 59 | /** 60 | * ONLY FOR TESTING. An empty constructor. 61 | */ 62 | public Controller() { 63 | 64 | } 65 | 66 | /** 67 | * Start connection with the database and read schema graph 68 | */ 69 | public void startConnection() { 70 | 71 | try { Class.forName("org.postgresql.Driver"); } 72 | catch (ClassNotFoundException e1) { } 73 | 74 | System.out.println("PostgreSQL JDBC Driver Registered!"); 75 | 76 | try { 77 | connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser"); 78 | } catch (SQLException e) { 79 | e.printStackTrace(); 80 | } 81 | System.out.println("Connection successful!"); 82 | 83 | try { 84 | schema = new SchemaGraph(connection); 85 | view.setDisplay("Database Schema:\n\n"+schema.toString()); 86 | } catch (SQLException e) { 87 | e.printStackTrace(); 88 | } 89 | 90 | } 91 | 92 | /** 93 | * Close connection with the database. 94 | */ 95 | public void closeConnection() { 96 | try { 97 | if (connection != null) { connection.close(); } 98 | } catch (SQLException e) { 99 | e.printStackTrace(); 100 | } 101 | System.out.println("Connection closed."); 102 | } 103 | 104 | // ---- Methods for nodes mapping ---- // 105 | /** 106 | * Helper method for nodes mapping, displaying the currently mapping Node 107 | * and the choices on the view. 108 | * @param choices 109 | */ 110 | private void setChoicesOnView(List choices) { 111 | view.setDisplay("Mapping nodes: \n"+parseTree.getSentence()+"\n"); 112 | view.appendDisplay("Currently on: "+node); 113 | view.setChoices(FXCollections.observableArrayList(choices)); 114 | } 115 | 116 | /** 117 | * Terminates the mapping Nodes process by setting the boolean mappingNodes false; 118 | */ 119 | private void finishNodesMapping() { 120 | view.setDisplay("Nodes mapped.\n"+parseTree.getSentence()); 121 | mappingNodes = false; 122 | view.removeChoiceBoxButton(); 123 | processAfterNodesMapping(); 124 | } 125 | 126 | /** 127 | * Start the nodes mapping process. A boolean will be set to indicate that 128 | * the application is in the process of mapping Nodes. Cannot call startMappingNodes 129 | * again during mapping Nodes. After this is called, the view shows the choices 130 | * of NodeInfos for a node, waiting for the user to choose one. 131 | */ 132 | public void startMappingNodes() { 133 | if (mappingNodes) { return; } 134 | view.showNodesChoice(); 135 | 136 | mappingNodes = true; 137 | iter = parseTree.iterator(); 138 | if (!iter.hasNext()) { 139 | finishNodesMapping(); 140 | return; 141 | } 142 | 143 | node = iter.next(); 144 | List choices = nodeMapper.getNodeInfoChoices(node, schema); 145 | if (choices.size() == 1) { chooseNode(choices.get(0)); } 146 | else { setChoicesOnView(choices); } 147 | // After this wait for the button to call chooseNode 148 | } 149 | 150 | /** 151 | * Choose NodeInfo for the current Node. This method is called when the user 152 | * clicked the confirmChoice button, or automatically called when the choices 153 | * of NodeInfo contains only one element. 154 | * @param info {@link NodeInfo} 155 | */ 156 | public void chooseNode(NodeInfo info) { 157 | if (!mappingNodes) { return; } 158 | // System.out.println("Now the tree is:"); 159 | // System.out.println(parseTree); 160 | node.setInfo(info); 161 | if (!iter.hasNext()) { 162 | finishNodesMapping(); 163 | return; 164 | } 165 | node = iter.next(); 166 | List choices = nodeMapper.getNodeInfoChoices(node, schema); 167 | if (choices.size() == 1) { chooseNode(choices.get(0)); } 168 | else { setChoicesOnView(choices); } 169 | // After this wait for the button to call chooseNode 170 | } 171 | // ----------------------------------- // 172 | 173 | 174 | // ---- Methods for trees selection ---- // 175 | public void startTreeSelection() { 176 | if (selectingTree) { return; } 177 | view.showTreesChoice(); 178 | selectingTree = true; 179 | treeChoices = parseTree.getAdjustedTrees(); 180 | } 181 | 182 | public void showTree(int index) { 183 | view.setDisplay(treeChoices.get(index).toString()); 184 | } 185 | 186 | public void chooseTree(int index) { 187 | parseTree = treeChoices.get(index); 188 | finishTreeSelection(); 189 | } 190 | 191 | public void finishTreeSelection() { 192 | selectingTree = false; 193 | view.removeTreesChoices(); 194 | processAfterTreeSelection(); 195 | } 196 | // ------------------------------------- // 197 | 198 | public void processAfterTreeSelection() { 199 | System.out.println("The tree before implicit nodes insertion: "); 200 | System.out.println(parseTree); 201 | parseTree.insertImplicitNodes(); 202 | System.out.println("Going to do translation for tree: "); 203 | System.out.println(parseTree); 204 | query = parseTree.translateToSQL(schema); 205 | view.setDisplay(query.toString()); 206 | processing = false; 207 | } 208 | 209 | public void processAfterNodesMapping() { 210 | System.out.println("Going to remove meaningless nodes for tree: "); 211 | System.out.println(parseTree); 212 | parseTree.removeMeaninglessNodes(); 213 | parseTree.mergeLNQN(); 214 | startTreeSelection(); 215 | } 216 | 217 | /** 218 | * Process natural language and return an sql translateNL. 219 | * @param nl 220 | * @return 221 | */ 222 | public void processNaturalLanguage(String input) { 223 | if (processing) { view.appendDisplay("\nCurrently processing a sentence!\n"); } 224 | processing = true; 225 | parseTree = new ParseTree(input, parser); 226 | startMappingNodes(); 227 | } 228 | 229 | } 230 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/WordNet.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.io.File; 4 | import java.net.URL; 5 | import java.util.ArrayList; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Set; 9 | 10 | import edu.mit.jwi.IRAMDictionary; 11 | import edu.mit.jwi.RAMDictionary; 12 | import edu.mit.jwi.data.ILoadPolicy; 13 | import edu.mit.jwi.item.IIndexWord; 14 | import edu.mit.jwi.item.ISynset; 15 | import edu.mit.jwi.item.ISynsetID; 16 | import edu.mit.jwi.item.IWordID; 17 | import edu.mit.jwi.item.POS; 18 | import edu.mit.jwi.item.Pointer; 19 | import edu.mit.jwi.morph.WordnetStemmer; 20 | 21 | public class WordNet { 22 | String sep = File.separator; 23 | String wordNetDir = "lib" + sep + "WordNet-3.0" + sep + "dict"; 24 | URL url; 25 | IRAMDictionary dict; 26 | WordnetStemmer stemmer; 27 | 28 | public WordNet() throws Exception { 29 | url = new URL("file", null, wordNetDir); 30 | dict = new RAMDictionary(url, ILoadPolicy.NO_LOAD); 31 | dict.open(); 32 | System.out.println("Loading wordNet..."); 33 | dict.load(true); // load dictionary into memory 34 | System.out.println("WordNet loaded."); 35 | 36 | stemmer = new WordnetStemmer(dict); 37 | } 38 | 39 | /** 40 | * Find the similarity of two nouns. 41 | * @param word1 42 | * @param word2 43 | * @return 44 | */ 45 | public double similarity(String word1, String word2) { 46 | // System.out.println("Finding similarity between: "+word1+" and "+word2); 47 | // remove all special characters from words 48 | if (word1.equals("") || word2.equals("")) { return 0.0; } 49 | word1 = word1.replaceAll("[^a-zA-Z0-9]", ""); 50 | word2 = word2.replaceAll("[^a-zA-Z0-9]", ""); 51 | if (word1.equals("") || word2.equals("")) { return 0.0; } 52 | // ? why NullPointerException here ??? Doesn't seem to be my fault! 53 | // Here special symbols in word causes Exception. 54 | List stems1 = stemmer.findStems(word1, POS.NOUN); 55 | List stems2 = stemmer.findStems(word2, POS.NOUN); 56 | 57 | if (stems1.isEmpty() || stems2.isEmpty()) { 58 | // System.out.println("One word cannot be identified in WordNet"); 59 | return 0.0; 60 | } 61 | 62 | ArrayList> visited1, visited2; 63 | visited1 = new ArrayList<>(); 64 | visited2 = new ArrayList<>(); 65 | 66 | List wordIDs1 = new ArrayList<>(); 67 | for (String stem : stems1) { 68 | IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN); 69 | if (indexWord != null) { 70 | wordIDs1.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs()); 71 | } 72 | } 73 | if (wordIDs1.isEmpty()) { return 0.0; } 74 | List synsets1 = new ArrayList<>(); 75 | for (IWordID wID : wordIDs1) { synsets1.add(dict.getWord(wID).getSynset()); } 76 | visited1.add(new HashSet (synsets1)); 77 | 78 | List wordIDs2 = new ArrayList<>(); 79 | for (String stem : stems2) { 80 | IIndexWord indexWord = dict.getIndexWord(stem, POS.NOUN); 81 | if (indexWord != null) { 82 | wordIDs2.addAll(dict.getIndexWord(stem, POS.NOUN).getWordIDs()); 83 | } 84 | } 85 | if (wordIDs2.isEmpty()) { return 0.0; } 86 | List synsets2 = new ArrayList<>(); 87 | for (IWordID wID : wordIDs2) { synsets2.add(dict.getWord(wID).getSynset()); } 88 | visited2.add(new HashSet (synsets2)); 89 | 90 | boolean commonFound = false; 91 | ISynset commonSynset = null; 92 | boolean endSearch1 = false; 93 | boolean endSearch2 = false; 94 | 95 | int commonSynsetPos1 = -1; 96 | int commonSynsetPos2 = -1; 97 | 98 | while (!commonFound && !(endSearch1 && endSearch2)) { 99 | int sz1 = visited1.size(); 100 | int sz2 = visited2.size(); 101 | if (!commonFound && !endSearch1) { // check the newest of 1 against all of 2 102 | for (int i = 0; i < sz2; i++) { 103 | if (intersection(visited1.get(sz1-1), visited2.get(i)) != null) { 104 | commonSynsetPos1 = sz1-1; 105 | commonSynsetPos2 = i; 106 | commonSynset = intersection(visited1.get(sz1-1), visited2.get(i)); 107 | commonFound = true; 108 | break; 109 | } 110 | } 111 | } 112 | if (!commonFound && !endSearch2) { // check the newest of 2 against all of 1 113 | for (int i = 0; i < sz1; i++) { 114 | if (intersection(visited1.get(i), visited2.get(sz2-1)) != null) { 115 | commonSynsetPos1 = i; 116 | commonSynsetPos2 = sz2-1; 117 | commonSynset = intersection(visited1.get(i), visited2.get(sz2-1)); 118 | commonFound = true; 119 | break; 120 | } 121 | } 122 | } 123 | if (!commonFound) { 124 | if (!endSearch1) { 125 | Set hyperSet1 = getHyperSet(visited1.get(sz1-1)); 126 | if (hyperSet1.isEmpty()) { endSearch1 = true; } 127 | else { visited1.add(hyperSet1); } 128 | } 129 | if (!endSearch2) { 130 | Set hyperSet2 = getHyperSet(visited2.get(sz2-1)); 131 | if (hyperSet2.isEmpty()) { endSearch2 = true; } 132 | else { visited2.add(hyperSet2); } 133 | } 134 | } 135 | } 136 | 137 | if (commonSynset == null) { return 0.0; } 138 | 139 | // System.out.println("Common ancestor synset found: "); 140 | // System.out.println(commonSynset.getWord(1).getLemma()); 141 | // System.out.println(commonSynset.getGloss()); 142 | // System.out.println("Common synset pos1: "+commonSynsetPos1); 143 | // System.out.println("Common synset pos2: "+commonSynsetPos2); 144 | // System.out.println("Depth of this common ancestor is:"+findDepth(commonSynset)); 145 | 146 | int N1 = commonSynsetPos1; 147 | int N2 = commonSynsetPos2; 148 | int N3 = findDepth(commonSynset); 149 | 150 | return 2*N3 / (double) (N1+N2+2*N3); 151 | } 152 | 153 | private int findDepth(ISynset synset) { 154 | if (synset.getRelatedSynsets(Pointer.HYPERNYM).isEmpty()) { return 0; } 155 | List> list = new ArrayList<>(); 156 | Set set = new HashSet<>(); 157 | set.add(synset); 158 | list.add(set); 159 | boolean topReached = false; 160 | int depth = -1; 161 | while (!topReached) { 162 | Set nextSet = new HashSet<>(); 163 | for (ISynset syn : list.get(list.size()-1)) { 164 | List hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM); 165 | if (!hyperIDs.isEmpty()) { 166 | for (ISynsetID hyperID : hyperIDs) { nextSet.add(dict.getSynset(hyperID)); } 167 | } else { 168 | topReached = true; 169 | depth = list.size()-1; 170 | break; 171 | } 172 | } 173 | list.add(nextSet); 174 | } 175 | return depth; 176 | } 177 | 178 | private Set getHyperSet(Set set) { 179 | Set hyperSet = new HashSet<>(); 180 | for (ISynset syn : set) { 181 | List hyperIDs = syn.getRelatedSynsets(Pointer.HYPERNYM); 182 | if (!hyperIDs.isEmpty()) { 183 | for (ISynsetID hyperID : hyperIDs) { hyperSet.add(dict.getSynset(hyperID)); } 184 | } 185 | } 186 | return hyperSet; 187 | } 188 | 189 | private ISynset intersection(Set set1, Set set2) { 190 | for (ISynset syn2 : set2) { 191 | if (set1.contains(syn2)) { return syn2; } 192 | } 193 | return null; 194 | } 195 | 196 | /** 197 | * Testing method 198 | * @param args 199 | * @throws Exception 200 | */ 201 | public static void main(String[] args) throws Exception { 202 | WordNet net = new WordNet(); 203 | String word1 = "scopes"; 204 | String word2 = "book"; 205 | System.out.printf("WUP similarity between %s and %s is: %f\n", word1, word2, net.similarity(word1, word2)); 206 | // String word = "SCOPES"; 207 | // List wordIDs = net.dict.getIndexWord(word, POS.NOUN).getWordIDs(); 208 | // List synsets = new ArrayList<>(); 209 | // for (IWordID wID : wordIDs) { synsets.add(net.dict.getWord(wID).getSynset()); } 210 | // 211 | // for (ISynset syn : synsets) { 212 | // System.out.println(syn.getGloss()); 213 | // System.out.println("Words in this synset:"); 214 | // for (IWord w : syn.getWords()) { 215 | // System.out.println(w.getLemma()); 216 | // } 217 | // } 218 | // 219 | // ISynset hyper = net.dict.getSynset(synsets.get(0).getRelatedSynsets(Pointer.HYPERNYM).get(0)); 220 | // System.out.println(hyper.getGloss());; 221 | // System.out.println(hyper.getWords().get(0).getLemma()); 222 | 223 | } 224 | 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/SyntacticEvaluator.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.List; 4 | 5 | public class SyntacticEvaluator { 6 | 7 | int numOfInvalid; 8 | 9 | public SyntacticEvaluator() { 10 | numOfInvalid = 0; 11 | } 12 | 13 | /** 14 | * a root is invalid if: 15 | * it has no child; 16 | * it has only one child and this child is not SN; 17 | * it has more than one child and other than the first child is not ON. 18 | * @param node 19 | * @return 20 | */ 21 | private static int checkROOT(Node node){ 22 | int numOfInvalid = 0; 23 | List children = node.getChildren(); 24 | int sizeOfChildren = children.size(); 25 | 26 | if (sizeOfChildren == 0){ 27 | numOfInvalid++; 28 | node.isInvalid = true; 29 | } 30 | else if (sizeOfChildren == 1 && !children.get(0).getInfo().getType().equals("SN")){ 31 | numOfInvalid++; 32 | node.isInvalid = true; 33 | } 34 | else if (sizeOfChildren > 1){ 35 | if (!children.get(0).getInfo().getType().equals("SN")){ 36 | numOfInvalid++; 37 | node.isInvalid = true; 38 | } 39 | else { 40 | for (int j = 1; j < sizeOfChildren; j++){ 41 | if (!children.get(j).getInfo().getType().equals("ON")){ 42 | numOfInvalid++; 43 | node.isInvalid = true; 44 | } 45 | } 46 | } 47 | } 48 | return numOfInvalid; 49 | } 50 | 51 | /** 52 | * a SN is not valid if: 53 | * it has more than 1 child; 54 | * it has 1 child but this child is not GNP (FN or NN). 55 | * @param node 56 | * @return 57 | */ 58 | private static int checkSN(Node node){ 59 | int numOfInvalid = 0; 60 | List children = node.getChildren(); 61 | int sizeOfChildren = children.size(); 62 | 63 | //SN can only have one child from FN or NN 64 | if (sizeOfChildren != 1){ 65 | numOfInvalid++; 66 | node.isInvalid = true; 67 | } 68 | else{ 69 | String childType = children.get(0).getInfo().getType(); 70 | if (!(childType.equals("NN") || childType.equals("FN"))){ 71 | numOfInvalid++; 72 | node.isInvalid = true; 73 | } 74 | } 75 | 76 | return numOfInvalid; 77 | } 78 | 79 | /** 80 | * a ON is invalid if: 81 | * (1) in ComplexCondition (its parent is ROOT): 82 | * its number of children is not 2 (left & right subtrees); 83 | * it has 2 children, but first one is not GNP, or second one is not GNP/VN/FN. 84 | * (2) in Condition (its parent is NN): 85 | * its number of children is not 1; 86 | * it has 1 child, but the child is not VN. 87 | * @param node 88 | * @return 89 | */ 90 | private static int checkON(Node node){ 91 | int numOfInvalid = 0; 92 | String parentType = node.getParent().getInfo().getType(); 93 | List children = node.getChildren(); 94 | int sizeOfChildren = children.size(); 95 | 96 | if (parentType.equals("ROOT")){ 97 | if (sizeOfChildren != 2){ 98 | numOfInvalid++; 99 | node.isInvalid = true; 100 | } 101 | else{ 102 | for (int j = 0; j children = node.getChildren(); 146 | int sizeOfChildren = children.size(); 147 | 148 | //NP=NN+NN*Condition. Second NN has no child. 149 | if (parentType.equals("NN")){ 150 | if (sizeOfChildren != 0){ //this rule is different from figure 7 (a), but I think this makes sense 151 | numOfInvalid++; 152 | node.isInvalid = true; 153 | } 154 | } 155 | //SN+GNP, or ON+GNP, or FN+GNP. and GNP=NP=NN+NN*Condition. First NN can have any number of children from NN,ON,VN. 156 | else if (parentType.equals("SN") || parentType.equals("FN") || parentType.equals("ON")){ 157 | if (sizeOfChildren != 0){ 158 | for (int j = 0; j < sizeOfChildren; j++){ 159 | String childType = children.get(j).getInfo().getType(); 160 | if (!(childType.equals("NN") || childType.equals("VN") || childType.equals("ON"))){ 161 | numOfInvalid++; 162 | node.isInvalid = true; 163 | break; 164 | } 165 | } 166 | } 167 | } 168 | 169 | return numOfInvalid; 170 | } 171 | 172 | /** 173 | * a VN is invalid if: 174 | * it has children. 175 | * @param node 176 | * @return 177 | */ 178 | private static int checkVN(Node node){ 179 | int numOfInvalid = 0; 180 | //String parentType = node.getParent().getInfo().getType(); 181 | List children = node.getChildren(); 182 | int sizeOfChildren = children.size(); 183 | 184 | if (sizeOfChildren != 0){ //VN cannot have children 185 | numOfInvalid++; 186 | node.isInvalid = true; 187 | } 188 | /* 189 | else if (!(parentType.equals("ON") || parentType.equals("NN"))){ //VN can only be child of ON and NN 190 | numOfInvalid++; 191 | node.isInvalid = true; 192 | } 193 | */ 194 | return numOfInvalid; 195 | } 196 | 197 | /** 198 | * a FN is valid if: 199 | * ON+FN, or ON+GNP, or SN+GNP, or FN+GNP. and GNP=FN+GNP, 200 | * FN can be child of ON, without children or only 1 child of NN or FN, 201 | * FN can be child of SN, with only 1 child of NN or FN, 202 | * FN can be child of FN, with only 1 child of NN or FN. 203 | * @param node 204 | * @return 205 | */ 206 | private static int checkFN(Node node){ 207 | int numOfInvalid = 0; 208 | String parentType = node.getParent().getInfo().getType(); 209 | List children = node.getChildren(); 210 | int sizeOfChildren = children.size(); 211 | 212 | if (sizeOfChildren == 0){ 213 | if (!parentType.equals("ON")){ 214 | numOfInvalid++; 215 | node.isInvalid = true; 216 | } 217 | } 218 | else if (sizeOfChildren == 1){ 219 | String childType = children.get(0).getInfo().getType(); 220 | if (!(parentType.equals("ON") || parentType.equals("SN") /*|| parentType.equals("FN")*/)){ 221 | numOfInvalid++; 222 | node.isInvalid = true; 223 | } 224 | else if (!(childType.equals("NN") /*|| childType.equals("FN")*/)){ 225 | numOfInvalid++; 226 | node.isInvalid = true; 227 | } 228 | } 229 | else{ 230 | numOfInvalid++; 231 | node.isInvalid = true; 232 | } 233 | 234 | return numOfInvalid; 235 | } 236 | 237 | /** 238 | * Number of invalid tree nodes according to the grammar: 239 | * Q -> (SClause)(ComplexCindition)* 240 | * SClause -> SELECT + GNP 241 | * ComplexCondition -> ON + (LeftSubTree*RightSubTree) 242 | * LeftSubTree -> GNP 243 | * RightSubTree -> GNP | VN | FN 244 | * GNP -> (FN + GNP) | NP 245 | * NP -> NN + (NN)*(Condition)* 246 | * Condition -> VN | (ON + VN) 247 | * 248 | * +: parent-child relationship 249 | * *: sibling relationship 250 | * |: or 251 | * 252 | * Basic rule: Check invalidity only considering its children 253 | * @param T 254 | * @return 255 | */ 256 | public static int numberOfInvalidNodes (ParseTree T){ 257 | int numOfInvalid = 0; //number of invalid tree nodes 258 | for (Node curNode : T) { 259 | String curType = curNode.getInfo().getType(); 260 | if (curType.equals("ROOT")){ //ROOT 261 | numOfInvalid = numOfInvalid + checkROOT(curNode); 262 | } 263 | if (curType.equals("SN")){ // select node 264 | numOfInvalid = numOfInvalid + checkSN(curNode); 265 | } 266 | else if (curType.equals("ON")){ //operator node 267 | numOfInvalid = numOfInvalid + checkON(curNode); 268 | } 269 | else if (curType.equals("NN")){ //name node 270 | numOfInvalid = numOfInvalid + checkNN(curNode); 271 | } 272 | else if (curType.equals("VN")){ //value node 273 | numOfInvalid = numOfInvalid + checkVN(curNode); 274 | } 275 | else if (curType.equals("FN")){ //function nodes 276 | numOfInvalid = numOfInvalid + checkFN(curNode); 277 | } 278 | } 279 | return numOfInvalid; 280 | } 281 | 282 | } 283 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/SchemaGraph.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DatabaseMetaData; 5 | import java.sql.DriverManager; 6 | import java.sql.ResultSet; 7 | import java.sql.SQLException; 8 | import java.sql.Statement; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.HashSet; 12 | import java.util.LinkedList; 13 | import java.util.List; 14 | import java.util.Map; 15 | import java.util.Set; 16 | 17 | 18 | public class SchemaGraph { 19 | 20 | /** 21 | * table name, column name, column type 22 | */ 23 | private Map> tables; 24 | //table name, column name, column values 25 | private Map>> tableRows; 26 | 27 | /** 28 | * table name, primary key (set of column names). 29 | * Two tables are connected only if pubkey of table1 is a 30 | * column of table2, but NOT the pubkey of table2. Graph no direction. 31 | */ 32 | private Map> keys; 33 | 34 | /** 35 | * table1Name, table2Name 36 | */ 37 | private Map> connectivity; 38 | 39 | /** 40 | * Construct a schemaGraph from database meta data. 41 | * @see document of getTables 44 | * @param meta 45 | * @throws SQLException 46 | */ 47 | public SchemaGraph(Connection c) throws SQLException { 48 | System.out.println("Retrieving schema graph..."); 49 | DatabaseMetaData meta = c.getMetaData(); 50 | tables = new HashMap<>(); 51 | tableRows = new HashMap<>(); 52 | String[] types = {"TABLE"}; 53 | ResultSet rsTable = meta.getTables(null, null, "%", types); 54 | 55 | 56 | Statement stmt = c.createStatement(); 57 | while (rsTable.next()) { 58 | String tableName = rsTable.getString("TABLE_NAME"); 59 | tables.put(tableName, new HashMap<>()); 60 | tableRows.put(tableName, new HashMap<>()); 61 | 62 | Map table = tables.get(tableName); 63 | Map> tableRow = tableRows.get(tableName); 64 | 65 | ResultSet rsColumn = meta.getColumns(null, null, tableName, null); 66 | while (rsColumn.next()){ 67 | /*retrieve column info for each table, insert into tables*/ 68 | String columnName = rsColumn.getString("COLUMN_NAME"); 69 | String columnType = rsColumn.getString("TYPE_NAME"); 70 | table.put(columnName, columnType); 71 | /*draw random sample of size 10000 from each table, insert into tableRows*/ 72 | String query = "SELECT " + columnName + " FROM " + tableName + " ORDER BY RANDOM() LIMIT 2000;"; 73 | ResultSet rows = stmt.executeQuery(query); 74 | tableRow.put(columnName, new HashSet()); 75 | Set columnValues = tableRow.get(columnName); 76 | while (rows.next()){ 77 | String columnValue = rows.getString(1); 78 | //testing if the last column read has a SQL NULL 79 | if (!rows.wasNull()) 80 | columnValues.add(columnValue); 81 | } 82 | } 83 | } 84 | if (stmt != null) { stmt.close(); } 85 | readPrimaryKeys(meta); 86 | findConnectivity(); 87 | System.out.println("Schema graph retrieved."); 88 | } 89 | 90 | private void readPrimaryKeys(DatabaseMetaData meta) throws SQLException { 91 | keys = new HashMap<>(); 92 | for (String tableName : tables.keySet()) { 93 | ResultSet rsPrimaryKey = meta.getPrimaryKeys(null, null, tableName); 94 | keys.put(tableName, new HashSet()); 95 | while (rsPrimaryKey.next()) { 96 | keys.get(tableName).add(rsPrimaryKey.getString("COLUMN_NAME")); 97 | } 98 | } 99 | // System.out.println(keys); 100 | } 101 | 102 | private void findConnectivity() { 103 | connectivity = new HashMap>(); 104 | for (String tableName : tables.keySet()) { 105 | connectivity.put(tableName, new HashSet()); 106 | } 107 | for (String table1 : tables.keySet()) { 108 | for (String table2 : tables.keySet()) { 109 | if (table1.equals(table2)) { continue; } 110 | if (!getJoinKeys(table1, table2).isEmpty()) { 111 | connectivity.get(table1).add(table2); 112 | connectivity.get(table2).add(table1); 113 | } 114 | } 115 | } 116 | } 117 | 118 | public Set getJoinKeys(String table1, String table2) { 119 | Set table1Keys = keys.get(table1); 120 | Set table2Keys = keys.get(table2); 121 | if (table1Keys.equals(table2Keys)) { return new HashSet(); } 122 | boolean keys1ContainedIn2 = true; 123 | for (String table1Key : table1Keys) { 124 | if (!tables.get(table2).containsKey(table1Key)) { 125 | keys1ContainedIn2 = false; 126 | break; 127 | } 128 | } 129 | if (keys1ContainedIn2) { return new HashSet(table1Keys); } 130 | 131 | boolean keys2ContainedIn1 = true; 132 | for (String table2Key : table2Keys) { 133 | if (!tables.get(table1).containsKey(table2Key)) { 134 | keys2ContainedIn1 = false; 135 | break; 136 | } 137 | } 138 | if (keys2ContainedIn1) { return new HashSet(table2Keys); } 139 | 140 | return new HashSet(); 141 | } 142 | 143 | /** 144 | * Return a list of String as join path in the form of: 145 | *
table1 table3 table2 146 | *
Shortest join path is found using BFS. 147 | *
The join keys can be found using {@link #getJoinKeys(String, String)} 148 | * @param table1 149 | * @param table2 150 | * @return 151 | */ 152 | public List getJoinPath(String table1, String table2) { 153 | if (!tables.containsKey(table1) || !tables.containsKey(table2)) { 154 | return new ArrayList(); 155 | } 156 | // Assume table1 and table2 are different. 157 | // Find shortest path using BFS. 158 | HashMap visited = new HashMap<>(); 159 | for (String tableName : tables.keySet()) { 160 | visited.put(tableName, false); 161 | } 162 | HashMap prev = new HashMap<>(); // the parent tableName 163 | LinkedList queue = new LinkedList<>(); 164 | queue.addLast(table1); 165 | visited.put(table1, true); 166 | boolean found = false; 167 | while (!queue.isEmpty() && !found) { 168 | String tableCurr = queue.removeFirst(); 169 | for (String tableNext : connectivity.get(tableCurr)) { 170 | if (!visited.get(tableNext)) { 171 | visited.put(tableNext, true); 172 | queue.addLast(tableNext); 173 | prev.put(tableNext, tableCurr); 174 | } 175 | if (tableNext.equals(table2)) { found = true; } 176 | } 177 | } 178 | 179 | LinkedList path = new LinkedList<>(); 180 | if (visited.get(table2)) { 181 | String tableEnd = table2; 182 | path.push(tableEnd); 183 | while (prev.containsKey(tableEnd)) { 184 | tableEnd = prev.get(tableEnd); 185 | path.push(tableEnd); 186 | } 187 | } 188 | return path; 189 | } 190 | 191 | public Set getTableNames() { 192 | return tables.keySet(); 193 | } 194 | 195 | public Set getColumns(String tableName) { 196 | return tables.get(tableName).keySet(); 197 | } 198 | 199 | public Set getValues(String tableName, String columnName){ 200 | return tableRows.get(tableName).get(columnName); 201 | } 202 | 203 | @Override 204 | public String toString() { 205 | String s = ""; 206 | for (String tableName : tables.keySet()) { 207 | s += "table: "+tableName+"\n"; 208 | s += "{"; 209 | Map columns = tables.get(tableName); 210 | for (String colName : columns.keySet()) { 211 | s += colName+": "+columns.get(colName)+"\t"; 212 | } 213 | s += "}\n\n"; 214 | } 215 | return s; 216 | } 217 | 218 | public static void main(String[] args) throws Exception { 219 | Connection connection = DriverManager.getConnection("jdbc:postgresql://127.0.0.1:5432/dblp", "dblpuser", "dblpuser"); 220 | SchemaGraph schema = new SchemaGraph(connection); 221 | System.out.println("The join path between article and authorship:"); 222 | System.out.println(schema.getJoinPath("article", "authorship")); 223 | System.out.println("The join path between authorship and article:"); 224 | System.out.println(schema.getJoinPath("authorship", "article")); 225 | System.out.println("The join path between inproceedings and authorship:"); 226 | System.out.println(schema.getJoinPath("inproceedings", "authorship")); 227 | System.out.println("The join path between article and inproceedings:"); 228 | System.out.println(schema.getJoinPath("article", "inproceedings")); 229 | System.out.println("----------------------------------------------"); 230 | System.out.println("The join keys between article and authorship:"); 231 | System.out.println(schema.getJoinKeys("article", "authorship")); 232 | System.out.println("The join keys between article and inproceedings:"); 233 | System.out.println(schema.getJoinKeys("article", "inproceedings")); 234 | System.out.println("The join keys between inproceedings and authorship:"); 235 | System.out.println(schema.getJoinKeys("inproceedings", "authorship")); 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/TreeAdjustorTest.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.util.Collections; 4 | import java.util.List; 5 | 6 | public class TreeAdjustorTest { 7 | public static void numberOfInvalidNodesTest(){ 8 | //construct a tree in the paper, 9 | //current test case is Figure 3 (a), output should be 3 (node 6 should not be invalid) 10 | ParseTree T = new ParseTree(); 11 | Node[] nodes = new Node[9]; 12 | 13 | nodes[0] = new Node(0, "ROOT", "--"); 14 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 15 | nodes[1] = new Node(1, "return", "--"); 16 | nodes[1].info = new NodeInfo("SN","SELECT"); 17 | nodes[2] = new Node(2, "author", "--"); 18 | nodes[2].info = new NodeInfo("NN", "Author"); 19 | nodes[3] = new Node(3, "paper", "--"); 20 | nodes[3].info = new NodeInfo("NN", ">"); 21 | nodes[4] = new Node(4, "more", "--"); 22 | nodes[4].info = new NodeInfo("ON", "Title"); 23 | nodes[5] = new Node(5, "Bob", "--"); 24 | nodes[5].info = new NodeInfo("VN", "Author"); 25 | nodes[6] = new Node(6, "VLDB", "--"); 26 | nodes[6].info = new NodeInfo("VN", "Journal"); 27 | nodes[7] = new Node(7, "after", "--"); 28 | nodes[7].info = new NodeInfo("ON", ">"); 29 | nodes[8] = new Node(8, "2000", "--"); 30 | nodes[8].info = new NodeInfo("VN", "Year"); 31 | 32 | T.root = nodes[0]; 33 | nodes[0].children.add(nodes[1]); 34 | nodes[1].parent = nodes[0]; 35 | nodes[1].children.add(nodes[2]); 36 | nodes[2].parent = nodes[1]; 37 | nodes[2].children.add(nodes[3]); 38 | nodes[3].parent = nodes[2]; 39 | nodes[2].children.add(nodes[5]); 40 | nodes[5].parent = nodes[2]; 41 | nodes[2].children.add(nodes[7]); 42 | nodes[7].parent = nodes[2]; 43 | nodes[3].children.add(nodes[4]); 44 | nodes[4].parent = nodes[3]; 45 | nodes[5].children.add(nodes[6]); 46 | nodes[6].parent = nodes[5]; 47 | nodes[7].children.add(nodes[8]); 48 | nodes[8].parent = nodes[7]; 49 | 50 | System.out.println("===========test for Running SyntacticEvaluator.numberOfInvalidNodes==========="); 51 | System.out.println("Input tree: "+T.toString()); 52 | System.out.println("Number of Invalid nodes: "+SyntacticEvaluator.numberOfInvalidNodes(T)+"\n"); 53 | System.out.println("Invalid nodes: "); 54 | for (int i = 1; i < nodes.length; i++){ 55 | if (nodes[i].isInvalid) 56 | System.out.println(nodes[i]); 57 | } 58 | } 59 | 60 | public static void mergeLNQNTest() { 61 | ParseTree T = new ParseTree(); 62 | Node[] nodes = new Node[9]; 63 | 64 | nodes[0] = new Node(0, "ROOT", "--"); 65 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 66 | nodes[1] = new Node(1, "return", "--"); 67 | nodes[1].info = new NodeInfo("SN","SELECT"); 68 | nodes[2] = new Node(2, "conference", "--"); 69 | nodes[2].info = new NodeInfo("NN", "Author"); 70 | nodes[3] = new Node(3, "area", "--"); 71 | nodes[3].info = new NodeInfo("NN", "Title"); 72 | nodes[4] = new Node(4, "each", "--"); 73 | nodes[4].info = new NodeInfo("QN", ">"); 74 | nodes[5] = new Node(5, "papers", "--"); 75 | nodes[5].info = new NodeInfo("NN", "Author"); 76 | nodes[6] = new Node(6, "citations", "--"); 77 | nodes[6].info = new NodeInfo("NN", "Journal"); 78 | nodes[7] = new Node(7, "most", "--"); 79 | nodes[7].info = new NodeInfo("FN", ">"); 80 | nodes[8] = new Node(8, "total", "--"); 81 | nodes[8].info = new NodeInfo("FN", "Year"); 82 | 83 | T.root = nodes[0]; 84 | nodes[0].children.add(nodes[1]); 85 | nodes[1].parent = nodes[0]; 86 | nodes[1].children.add(nodes[2]); 87 | nodes[2].parent = nodes[1]; 88 | nodes[2].children.add(nodes[3]); 89 | nodes[3].parent = nodes[2]; 90 | nodes[2].children.add(nodes[5]); 91 | nodes[5].parent = nodes[2]; 92 | nodes[3].children.add(nodes[4]); 93 | nodes[4].parent = nodes[3]; 94 | nodes[5].children.add(nodes[6]); 95 | nodes[6].parent = nodes[5]; 96 | nodes[6].children.add(nodes[7]); 97 | nodes[7].parent = nodes[6]; 98 | nodes[6].children.add(nodes[8]); 99 | nodes[8].parent = nodes[6]; 100 | 101 | System.out.println("===========test for Running mergeLNQN==========="); 102 | System.out.println("Input tree: "+T.toString()); 103 | ParseTree tree = T.mergeLNQN(); 104 | System.out.println("Output tree: "+tree.toString()); 105 | } 106 | 107 | public static void adjustTest(){ 108 | ParseTree T = new ParseTree(); 109 | Node[] nodes = new Node[9]; 110 | 111 | nodes[0] = new Node(0, "ROOT", "--"); 112 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 113 | nodes[1] = new Node(1, "return", "--"); 114 | nodes[1].info = new NodeInfo("SN","SELECT"); 115 | nodes[2] = new Node(2, "conference", "--"); 116 | nodes[2].info = new NodeInfo("NN", "Author"); 117 | nodes[3] = new Node(3, "area", "--"); 118 | nodes[3].info = new NodeInfo("NN", "Title"); 119 | nodes[4] = new Node(4, "each", "--"); 120 | nodes[4].info = new NodeInfo("QN", ">"); 121 | nodes[5] = new Node(5, "papers", "--"); 122 | nodes[5].info = new NodeInfo("NN", "Author"); 123 | nodes[6] = new Node(6, "citations", "--"); 124 | nodes[6].info = new NodeInfo("NN", "Journal"); 125 | nodes[7] = new Node(7, "most", "--"); 126 | nodes[7].info = new NodeInfo("FN", ">"); 127 | nodes[8] = new Node(8, "total", "--"); 128 | nodes[8].info = new NodeInfo("FN", "Year"); 129 | 130 | T.root = nodes[0]; 131 | nodes[0].children.add(nodes[1]); 132 | nodes[1].parent = nodes[0]; 133 | nodes[1].children.add(nodes[2]); 134 | nodes[2].parent = nodes[1]; 135 | nodes[2].children.add(nodes[3]); 136 | nodes[3].parent = nodes[2]; 137 | nodes[2].children.add(nodes[5]); 138 | nodes[5].parent = nodes[2]; 139 | nodes[3].children.add(nodes[4]); 140 | nodes[4].parent = nodes[3]; 141 | nodes[5].children.add(nodes[6]); 142 | nodes[6].parent = nodes[5]; 143 | nodes[6].children.add(nodes[7]); 144 | nodes[7].parent = nodes[6]; 145 | nodes[6].children.add(nodes[8]); 146 | nodes[8].parent = nodes[6]; 147 | 148 | System.out.println("===========test for Running adjust() in TreeAdjustor==========="); 149 | System.out.println("Input tree: "+T.toString()); 150 | List treeList = TreeAdjustor.adjust(T); 151 | System.out.println("Output size: "+treeList.size()); 152 | System.out.println("Output trees:"); 153 | for (int j = 0; j < treeList.size(); j++){ 154 | System.out.println("Tree "+j+" :"); 155 | System.out.println(treeList.get(j)); 156 | } 157 | } 158 | 159 | public static void getAdjustedTreesTest(){ 160 | ParseTree T = new ParseTree(); 161 | Node[] nodes = new Node[8]; 162 | 163 | nodes[0] = new Node(0, "ROOT", "--"); 164 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 165 | nodes[1] = new Node(1, "return", "--"); 166 | nodes[1].info = new NodeInfo("SN","SELECT"); 167 | nodes[2] = new Node(2, "conference", "--"); 168 | nodes[2].info = new NodeInfo("NN", "Author"); 169 | nodes[3] = new Node(3, "area", "--"); 170 | nodes[3].info = new NodeInfo("NN", "Title"); 171 | nodes[4] = new Node(4, "papers", "--"); 172 | nodes[4].info = new NodeInfo("NN", "Author"); 173 | nodes[5] = new Node(5, "citations", "--"); 174 | nodes[5].info = new NodeInfo("NN", "Journal"); 175 | nodes[6] = new Node(6, "most", "--"); 176 | nodes[6].info = new NodeInfo("FN", ">"); 177 | nodes[7] = new Node(7, "total", "--"); 178 | nodes[7].info = new NodeInfo("FN", "Year"); 179 | 180 | T.root = nodes[0]; 181 | nodes[0].children.add(nodes[1]); 182 | nodes[1].parent = nodes[0]; 183 | nodes[1].children.add(nodes[2]); 184 | nodes[2].parent = nodes[1]; 185 | nodes[2].children.add(nodes[3]); 186 | nodes[3].parent = nodes[2]; 187 | nodes[2].children.add(nodes[4]); 188 | nodes[4].parent = nodes[2]; 189 | nodes[4].children.add(nodes[5]); 190 | nodes[5].parent = nodes[4]; 191 | nodes[5].children.add(nodes[6]); 192 | nodes[6].parent = nodes[5]; 193 | nodes[5].children.add(nodes[7]); 194 | nodes[7].parent = nodes[5]; 195 | 196 | System.out.println("===========test for Running getAdjustedTrees() in TreeAdjustor==========="); 197 | System.out.println("The original tree:"); 198 | System.out.println(T); 199 | System.out.println("Number of possible trees for choice:"); 200 | List result = TreeAdjustor.getAdjustedTrees(T); 201 | System.out.println(result.size()); 202 | Collections.sort(result, (t1, t2) -> (- t1.getScore() + t2.getScore())); 203 | System.out.println("The three trees with highest scores look like:"); 204 | for (int i = 0; i < 5; i++) { 205 | System.out.println(result.get(i)); 206 | } 207 | } 208 | 209 | public static void testAddON (){ 210 | ParseTree T = new ParseTree(); 211 | Node[] nodes = new Node[8]; 212 | 213 | nodes[0] = new Node(0, "ROOT", "--"); 214 | nodes[0].info = new NodeInfo("ROOT","ROOT"); 215 | nodes[1] = new Node(1, "return", "--"); 216 | nodes[1].info = new NodeInfo("SN","SELECT"); 217 | nodes[2] = new Node(2, "conference", "--"); 218 | nodes[2].info = new NodeInfo("NN", "Author"); 219 | nodes[3] = new Node(3, "area", "--"); 220 | nodes[3].info = new NodeInfo("NN", "Title"); 221 | nodes[4] = new Node(4, "papers", "--"); 222 | nodes[4].info = new NodeInfo("NN", "Author"); 223 | nodes[5] = new Node(5, "citations", "--"); 224 | nodes[5].info = new NodeInfo("NN", "Journal"); 225 | nodes[6] = new Node(6, "most", "--"); 226 | nodes[6].info = new NodeInfo("FN", ">"); 227 | nodes[7] = new Node(7, "total", "--"); 228 | nodes[7].info = new NodeInfo("FN", "Year"); 229 | 230 | T.root = nodes[0]; 231 | nodes[0].children.add(nodes[1]); 232 | nodes[1].parent = nodes[0]; 233 | nodes[1].children.add(nodes[2]); 234 | nodes[2].parent = nodes[1]; 235 | nodes[2].children.add(nodes[3]); 236 | nodes[3].parent = nodes[2]; 237 | nodes[2].children.add(nodes[4]); 238 | nodes[4].parent = nodes[2]; 239 | nodes[4].children.add(nodes[5]); 240 | nodes[5].parent = nodes[4]; 241 | nodes[5].children.add(nodes[6]); 242 | nodes[6].parent = nodes[5]; 243 | nodes[5].children.add(nodes[7]); 244 | nodes[7].parent = nodes[5]; 245 | 246 | System.out.println("===========test for Running addON() in ParseTree==========="); 247 | System.out.println("The original tree:"); 248 | System.out.println(T); 249 | ParseTree tree = T.addON(); 250 | System.out.println("After adding ON:"); 251 | System.out.println(tree); 252 | System.out.println("The original tree:"); 253 | System.out.println(T); 254 | } 255 | 256 | public static void main(String[] args) { 257 | // numberOfInvalidNodesTest(); 258 | // mergeLNQNTest(); 259 | // adjustTest(); 260 | getAdjustedTreesTest(); 261 | // testAddON(); 262 | } 263 | 264 | } 265 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, com.dukenlidb.nlidb.service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /doc/report/midterm/midterm.tex: -------------------------------------------------------------------------------- 1 | \documentclass[twocolumn]{article} 2 | 3 | % Feel free to add more packages 4 | \usepackage{float, amsmath, amssymb, mathtools} 5 | \usepackage{graphicx, caption, color} 6 | \usepackage{tabularx, fullpage} 7 | %\usepackage{kotex} 8 | %\usepackage{multicol} 9 | \setlength{\columnsep}{1cm} 10 | \usepackage{comment, cite, wrapfig} 11 | \usepackage[utf8]{inputenc} 12 | \usepackage[hidelinks]{hyperref} 13 | \usepackage{courier} 14 | %\usepackage{geometry} 15 | \hypersetup{breaklinks=true} 16 | \urlstyle{same} 17 | 18 | \newcommand{\red}[1]{{\bf \color{red}#1}} 19 | \newcommand{\blue}[1]{{\bf \color{blue}#1}} 20 | \newcommand{\cut}[1]{} 21 | 22 | 23 | \begin{document} 24 | 25 | \title{Natural Language Interface for Relational Database\\ 26 | \small{Midterm Report}} 27 | 28 | %Authors in alphabetical order of last names 29 | \author{Yilin Gao \\ 30 | \small \texttt{yilin.gao@duke.edu} \and 31 | Keping Wang \\ 32 | \small \texttt{kw238@duke.edu} \and 33 | Chengkang Xu \\ 34 | \small \texttt{cx33@duke.edu} } 35 | 36 | \date{\today} 37 | \maketitle 38 | 39 | %%%================================================================%%% 40 | \section{Introduction}\label{sec:introduction} 41 | 42 | Writing SQL queries can be difficult, especially when it involves complex logic. As more and more non-expert users are accessing relational databases, it is very important to simplify their process of writing SQL queries. This project is going to build a Natural Language Interface for relational DataBases (NLIDB), closely following Li and Jagadish (2014)\cite{li2014}. NLIDB will be a tool for everyone to query data easily from relational databases. 43 | 44 | Translating natural language into an SQl query isn't an easy job. Not only because of the disambiguity of natural language, but also that users may make mistakes in writing natural language input, such as mis-spelling. We want the users to feel at ease using our interface, not afraid of being mis-interpreted by the NLIDB, even if they cannot remember the exact names of the database column names. So we follow Li and Jagadish (2014)\cite{li2014} to use an interactive interface to let user make choices in several ambiguous phases of the translation. 45 | 46 | The com.dukenlidb.nlidb.main components for translating a natural language to an SQL query are as follows: 47 | 48 | \begin{enumerate} 49 | \item Parse the natural language input into a parse tree using dependency syntax parser. 50 | \item Map the nodes in the parse tree to SQL keywords, table names, column names, and values. Here users may choose the desired mapping from ranked options. 51 | \item Adjust the structure of the parse tree to make it follow the structure of an SQL query. Here users may choose the desired structure from ranked options. 52 | \item Translate the parse tree to an SQL query. 53 | \end{enumerate} 54 | 55 | Up until this midterm report, we have completed steps 1 - 2 above. We have built an interactive graphical user interface (GUI), and established connection with a database to experiment with the two steps. Now the user can already choose the desired node mappings from the choices offered by our application. 56 | 57 | Before the final report, we will finish steps 3 - 4 and tune the com.dukenlidb.nlidb.model with some hand-written natural language and SQL query pairs. 58 | 59 | %%%================================================================%%% 60 | \section{Related Work} 61 | 62 | Early day NLIDB systems were usually based on small scale database, which requires a small set of supported queries. Their parsing mechanism could only support ad-hoc methods and rules. Thus, early work would produce ambiguity if the database is scalable and natural language queries are "open-domain". Moreover, without the help of machine learning, early NLIDB systems cannot update their parsing methods as they accumulate more data.\cite{QATutorial} 63 | 64 | Our approach involves machine learning in parsing the natural language input into a parse tree. Then we adjust the structure of the parse tree to obey with the SQL syntax. Our approach can handle natural language input with more complicated structures than the simple key-word matching method. 65 | 66 | NLIDB here is a concrete application of the natural language QA systems.\cite{QATutorial} Currently, the mainstream approach for QA is the semantic parsing of questions. It can map natural language questions to logic forms or structured queries, and produce accurate answers when the query is complete and clear. However, the accuracy of answers will decrease if the input language is ambiguous, or if the logic relationship of the query is complicated. Due to our lack of training data, our NLIDB system cannot adopt the popular RNN (LSTM) for a direct and efficient translation. Still we are trying to allow more input ambiguity and structural complexity by letting the users choose the mappings and structures interactively. 67 | 68 | %%%================================================================%%% 69 | \section{Problem Definition} 70 | 71 | For this NLIDB, we have to first develop a GUI, and then design the \texttt{ParseTree} class. Then we need to develop parse tree node mapper, parse tree structure adjuster, and SQL query translator. 72 | 73 | There are three com.dukenlidb.nlidb.main problems that we face. The first one is the use of data structure in \texttt{ParseTree}. The second is what algorithms to use for each phase of the translation. The last problem is to specify the rules for different phases, such as what word should be mapped to the ``SELECT'' key word, and what rules should a legal parse tree follow before being translated to an SQL query. 74 | 75 | %%%================================================================%%% 76 | \section{Algorithms} 77 | 78 | In the natural language parsing phase, we use the feature based pos-tagger\cite{toutanova2003feature} and the neural network dependency parser\cite{chen2014fast} from the Stanford NLP package. 79 | 80 | In the node mapping phase, other than mapping words with hard coded rules, we compare words with table and column names in the database, which requires a word similarity score. The similarity score is the maximum of two subscores. The first is lexical similarity (similarity in spelling), which is Jaccord coefficient here. The second is semantic similarity, for which we use WUP similarity\cite{wu1994verbs}. To compute WUP similarity, we have to do a breadth-first-search to find the lowest common ancestor of two words in the WordNet. The calculation of word similarity will be explained in detail in the next section. 81 | 82 | More algorithms will be needed for parse tree adjustment and SQL query generation in the future. 83 | 84 | %%%================================================================%%% 85 | \section{System Design} 86 | 87 | \begin{figure*}[ht] 88 | \centering 89 | \includegraphics[width=0.8\linewidth]{figures/nlidb_system_diagram.pdf} 90 | \caption{System Diagram} 91 | \end{figure*} 92 | 93 | Our current system is implemented in Java, using maven as the project management tool. The source code are divided into three parts: com.dukenlidb.nlidb.model, control, and view. The com.dukenlidb.nlidb.model part takes care of how to realize major functions of the natural language database interface, like parsing natural language, mapping nodes, adjusting node tree structure, and translating the tree into SQL query. The controller wraps many models as attribute variables, and it takes charge of the interaction between database and the view (GUI). And the view part uses JavaFX to design a GUI. 94 | 95 | Figure 1 is a diagram of our system. The boxes with solid frame lines are the ones we've already written, and the boxes with dashed frame lines are to be completed in the future. 96 | 97 | Below we’ll introduce the design ideas on two steps that we’ve completed: parsing natural language into a parse tree, and mapping the nodes of the parse tree to SQL components. 98 | 99 | \subsection{Natural Language Parser} 100 | We write the NLParser class to parse natural language from the user input in GUI to a dependency parse tree. The NLParser is just a wrapper of the Standford NLP pos-tagger and dependency syntax parser. A natural language sentence is first tagged with part-of-speech labels, and then parsed with dependency parser to a ParseTree. 101 | 102 | A ParseTree consists of an array of Nodes. Each Node has information about the natural language word and its corresponding SQL component. A Node also contains parent and children links pointing to other Nodes in the ParseTree. 103 | 104 | \subsection{Node Mapper} 105 | Then we map each of the Node into an SQL component. We iterate over the tree and map each Node according to a certain Node Type in Figure 2, according to predefined rules. There are 7 node types in total, and 5 of them, SN, ON, FN, QN, and LN have hard-coded mapping rules. For example, map word “return” or “Return” to an “SN” node with value “SELECT”. A word will first be searched against these five Node Types. If there is no match, the search will go on to the remaining two types, NN and VN. 106 | 107 | \begin{figure}[ht] 108 | \centering 109 | \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_rules.png} 110 | \caption[caption for nodes mapping rules]{Nodes Mapping Rules\protect\footnotemark} 111 | \end{figure} 112 | \footnotetext{Taken from \cite{li2014}.} 113 | 114 | The remaining two types, Name Node and Value Node, are decided by searching over the database for matching names or values. The matching of word to names or values are decided by the word similarity score of two words.The word similarity score here is the maximum of semantic similarity and lexical similarity. 115 | 116 | Semantic similarity is the WUP similarity\cite{wu1994verbs} function using WordNet. WordNet is a net of synonym sets (synsets) connected with semantic and lexical pointers. Two most important semantic pointers are hypernym and hyponym, which connect the synsets to the tree that we are interested here, as Figure 3. In Figure 3, the WUP similarity between $C1$ and $C2$ is: 117 | 118 | $$ Sim_{WUP} = \frac{2*N3}{N1+N2+2*N3} $$ 119 | 120 | \begin{figure}[ht] 121 | \centering 122 | \includegraphics[width=0.7\linewidth]{figures/wordnet_tree.png} 123 | \caption[caption for wordnet tree]{WUP word similarity.\protect\footnotemark } 124 | \end{figure} 125 | \footnotetext{Taken from \cite{wu1994verbs}.} 126 | 127 | One thing to note about WordNet is that each word can be in multiple synsets, and each synset can have multiple parents, so we use breadth-first-search to find the lowest of all possible common parents of two words. 128 | 129 | 130 | For lexical similarity between two words, we use the Jaccard coefficient: 131 | 132 | $$ J(A, B) = \frac{|A \cap B|}{|A \cup B|}$$ 133 | 134 | where $A$ and $B$ are the set of characters of the two words respectively. The Jaccard coefficient may not be as good for measuring the lexical similarity of two words (as edit distance), but it is currently still used because it is a measure in range (0,1), which makes it easily compared with the WUP semantic similarity. 135 | 136 | To search over the database, we first visit the database, retrieve its schema, and store the Schema Graph as an attribute variable in the Controller class, so that each node mapping task don’t have to go through the slow database query. The Schema contains the table names, the column names of each column, and some sample distinct values of each column, such that they can be searched over to map Name Node or Value Node. 137 | 138 | Once we have the word similarity scores of one word to names and values in database, we rank different mapping choices by their similarity score, and return the highest several choices to the GUI for the user to choose. Here we add another node type for the user to choose from, that is “UNKNOWN”, which means that node doesn’t correspond to any meaningful SQL component. These meaningless nodes will be removed in later steps. 139 | 140 | Figure 4 is an example of a parse tree with nodes mapped to SQL components. The left part is a parse tree, and the right part is the mappings of all its nodes. 141 | 142 | \begin{figure}[ht] 143 | \centering 144 | \includegraphics[width=0.9\linewidth]{figures/nodes_mapping_example.png} 145 | \caption[caption for nodes mapping example]{Node Mapping Example.\protect\footnotemark } 146 | \end{figure} 147 | \footnotetext{Taken from \cite{li2014}.} 148 | 149 | \subsection{Implicit Node} 150 | The com.dukenlidb.nlidb.main idea of inserting implicit node into parse tree is to make sure that two nodes which are being mapped have corresponding schema in database. Assuming invalid nodes are removed from the tree properly, there shoud be a tree with at most three branches. The leftmost tree should contain select node (SN) and name node (NN). If a name node in the left tree does not have ancestor, then it is the core node of the left tree. If the type of core node in left tree is different from right tree, the real core node in right tree is deemed hidden, i.e. implicit. The implicit core node may cause unresonable comparision between two variables of different types due to the change of semantic meaning. An example of implicit node: return all author who wrote more than 100 paper. In our previous process of the parse tree, the right subtree should contain only the number 100, which is a value node (VN). In order to make the tree semantically meaningful, nodes in left subtree are copied over to the right subtree. 151 | 152 | After inserting name node based on the core node comparision, next step is to check the constrain for both core node. For example: if left core node has constrain of year greater than 2007 and area of "Database" , right core node should also have the same constrain. If right node does not conform to this constrain, then constrains nodes should be copied from the left subtree to right subtree. 153 | 154 | Our implementation of processing the implicit nodes insertion starts from the root of the tree. It checks if any nodes below select Node (SN) is missing in the middle treel. If there is, copy it over to the middle tree. Then repeat the same procedure to the middle tree and rightmost subtree. After the name node is copied over, it starts from the middle tree to check if there is any constrain missing in the rightmost tree. If there is, copy those over to the right tree. Finally,  if the root of subtree is an ON (operator node), and the first node connect to root in the subtree is a name node, there may be a function node missing. Our implementation tries to insert a function node in between to make the subtree semantically meaningful. 155 | 156 | %%%================================================================%%% 157 | \section{Experiments} 158 | The JavaFX application runs on JVM, and we’ve tested it on an Ubuntu 16.04 machine. We are using JDBC to connect to the PostgreSQL database of dblp, which we used in homework 1. 159 | 160 | Our program has already finished part of final target. 161 | 162 | \begin{figure}[ht] 163 | \centering 164 | \includegraphics[width=0.8\linewidth]{figures/program_structure.png} 165 | \caption{Program Structure} 166 | \end{figure} 167 | 168 | Figure 5 is a detailed structure on programs that we have already finished or at least conceived. 169 | 170 | We have programmed a GUI in \texttt{UserView.java} and a connection between database and GUI in \texttt{Controller.java}. To realize natural language query, our first step in implementing the translation process is to parse the natural language into SQL keywords using a predefined natural language parser called Stanford NLP. The parsing process is written in \texttt{NLParser.java} and \texttt{ParserTree.java}. After we get the parser tree, we map each tree node (word in initial natural language input) to certain component of SQL and database. The mapping is written in \texttt{NodeMapper.java}. 171 | 172 | \begin{figure*}[ht] 173 | \centering 174 | \includegraphics[width=0.7\linewidth]{figures/gui_nodes_mapping.png} 175 | \caption{GUI during Node Mapping} 176 | \end{figure*} 177 | 178 | Figure 6 is a screenshot of our application during the nodes mapping stage. The upper left part is where the user input comes. The bottom left part is supposed to be the translated SQL query (which hasn’t been completed). The upper left part shows the current information on nodes mapping. The choice box showing “NN: inproceedings.title” contains a drop down list of node types and values for the user to choose from. Once the user confirms the choice by pressing the “confirm choice” button, the app will go on to map the next word. The mapping choices will only be shown to the user if the word doesn’t match with the five predefined node types. 179 | 180 | As for the node mapper, currently we’ve only defined very limited number of explicit rules for nodes mapping. There are only a few predefined keywords, such as return, equals, all, etc (thus limited SQL query functions as well). We plan to tune the app after we’ve completed writing the whole process of translation. The nodes mapping for name nodes and value nodes doesn’t work perfectly well, maybe in the future we will try some more sensible measures of word similarity. But it is ok for now, since the users can almost always find the right name node or value node from the multiple choices. 181 | 182 | As for basic GUI functions, we may need to design a much fancier GUI as the last step of our program. 183 | 184 | %%%================================================================%%% 185 | \section{Contributions of Project Members} 186 | 187 | \begin{itemize} 188 | \item {\bf Yilin Gao:} GUI implementation, controller design, report writing. 189 | \item {\bf Keping Wang:} database connection, schema retrieval, Stanford NLP parser usage, parse tree design, word similarity score, report writing. 190 | \item {\bf Chengkang Xu:} node mapping, meaningless nodes removal, inserting implicit nodes, report writing. 191 | \end{itemize} 192 | 193 | 194 | \Urlmuskip=0mu plus 1mu\relax 195 | \bibliographystyle{abbrv} 196 | \bibliography{nlidb} 197 | 198 | \end{document} 199 | -------------------------------------------------------------------------------- /src/main/java/com/dukenlidb/nlidb/archive/model/ParseTree.java: -------------------------------------------------------------------------------- 1 | package com.dukenlidb.nlidb.archive.model; 2 | 3 | import java.io.StringReader; 4 | import java.util.ArrayList; 5 | import java.util.Collections; 6 | import java.util.Iterator; 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | 10 | import edu.stanford.nlp.ling.HasWord; 11 | import edu.stanford.nlp.ling.TaggedWord; 12 | import edu.stanford.nlp.process.DocumentPreprocessor; 13 | import edu.stanford.nlp.trees.GrammaticalStructure; 14 | import edu.stanford.nlp.trees.TypedDependency; 15 | 16 | public class ParseTree implements IParseTree { 17 | 18 | /** 19 | * Order of parse tree reformulation (used in getAdjustedTrees()) 20 | */ 21 | int edit; 22 | // We no longer use an array to store the nodes! 23 | /** 24 | * Root Node. Supposed to be "ROOT". 25 | */ 26 | Node root; 27 | 28 | /** 29 | * Empty constructor, only for testing. 30 | */ 31 | public ParseTree() { } 32 | 33 | /** 34 | * Construct a parse tree using the stanford NLP parser. Only one sentence. 35 | * Here we are omitting the information of dependency labels (tags). 36 | * @param text input text. 37 | */ 38 | public ParseTree(String text, NLParser parser) { 39 | // pre-processing the input text 40 | DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); 41 | List sentence = null; 42 | for (List sentenceHasWord : tokenizer) { 43 | sentence = sentenceHasWord; 44 | break; 45 | } 46 | // part-of-speech tagging 47 | List tagged = parser.tagger.tagSentence(sentence); 48 | // dependency syntax parsing 49 | GrammaticalStructure gs = parser.parser.predict(tagged); 50 | 51 | // Reading the parsed sentence into ParseTree 52 | int N = sentence.size()+1; 53 | Node[] nodes = new Node[N]; 54 | root = new Node(0, "ROOT", "ROOT"); 55 | nodes[0] = root; 56 | for (int i = 0; i < N-1; i++) { 57 | nodes[i+1] = new Node(i+1, 58 | sentence.get(i).word(), tagged.get(i).tag()); 59 | } 60 | for (TypedDependency typedDep : gs.allTypedDependencies()) { 61 | int from = typedDep.gov().index(); 62 | int to = typedDep.dep().index(); 63 | // String label = typedDep.reln().getShortName(); // omitting the label 64 | nodes[to].parent = nodes[from]; 65 | nodes[from].children.add(nodes[to]); 66 | } 67 | } 68 | 69 | public ParseTree(Node node) { 70 | root = node.clone(); 71 | } 72 | public ParseTree(ParseTree other) { 73 | this(other.root); 74 | } 75 | 76 | @Override 77 | public int size() { 78 | return root.genNodesArray().length; 79 | } 80 | 81 | @Override 82 | public int getEdit() { 83 | return edit; 84 | } 85 | 86 | @Override 87 | public void setEdit(int edit){ 88 | this.edit = edit; 89 | } 90 | 91 | /** 92 | * Helper method for {@link #removeMeaninglessNodes()}. 93 | * (1) If curr node is meaning less, link its children to its parent. 94 | * (2) Move on to remove the meaningless nodes of its children. 95 | */ 96 | private void removeMeaninglessNodes(Node curr) { 97 | if (curr == null) { return; } 98 | List currChildren = new ArrayList<>(curr.getChildren()); 99 | for (Node child : currChildren) { 100 | removeMeaninglessNodes(child); 101 | } 102 | if (curr != root && curr.getInfo().getType().equals("UNKNOWN")) { 103 | curr.parent.getChildren().remove(curr); 104 | for (Node child : curr.getChildren()) { 105 | curr.parent.getChildren().add(child); 106 | child.parent = curr.parent; 107 | } 108 | } 109 | 110 | } 111 | 112 | /** 113 | * Remove a node from tree if its NodeInfo is ("UNKNOWN", "meaningless"). 114 | * To remove the meaningless node, link the children of this node 115 | * to its parent. 116 | */ 117 | @Override 118 | public void removeMeaninglessNodes() { 119 | if (root.getChildren().get(0).getInfo() == null) { 120 | System.out.println("ERR! Node info net yet mapped!"); 121 | } 122 | // Remove meaningless nodes. 123 | removeMeaninglessNodes(root); 124 | } 125 | 126 | @Override 127 | 128 | public void insertImplicitNodes() { 129 | 130 | List childrenOfRoot = root.getChildren(); 131 | 132 | // no condition 133 | if (childrenOfRoot.size() <= 1) { 134 | 135 | 136 | return; 137 | } 138 | 139 | //phase 1, add nodes under select to left subtree 140 | 141 | System.out.println("Phase 1, add nodes under select node to left subtree"); 142 | 143 | int IndexOfSN = 0; 144 | for (int i = 0; i < childrenOfRoot.size(); i ++) { 145 | 146 | if (childrenOfRoot.get(i).getInfo().getType().equals("SN")) { 147 | 148 | IndexOfSN = i; 149 | break; 150 | } 151 | } 152 | 153 | //start from the name node 154 | 155 | Node SN = childrenOfRoot.get(IndexOfSN); 156 | List SN_children = SN.getChildren(); 157 | 158 | int IndexOfSN_NN = 0; 159 | 160 | 161 | for (int i = 0; i < SN_children.size(); i ++) { 162 | 163 | if (SN_children.get(i).getInfo().getType().equals("NN")) { 164 | 165 | IndexOfSN_NN = i; 166 | break; 167 | } 168 | } 169 | 170 | //add them to left subtree of all branches 171 | 172 | Node copy; 173 | int indexOfAppendedNode; 174 | Node SN_NN = SN_children.get(IndexOfSN_NN); 175 | 176 | for (int i = 0; i < childrenOfRoot.size(); i ++) { 177 | 178 | if (i != IndexOfSN) { 179 | 180 | Node [] nodes_SN_NN = childrenOfRoot.get(i).genNodesArray(); 181 | indexOfAppendedNode = nameNodeToBeAppended(nodes_SN_NN); 182 | 183 | if (indexOfAppendedNode != -1) { 184 | 185 | copy = SN_NN.clone(); 186 | copy.setOutside(true); 187 | 188 | nodes_SN_NN[indexOfAppendedNode].setChild(copy); 189 | copy.setParent(nodes_SN_NN[indexOfAppendedNode]); 190 | } 191 | } 192 | } 193 | 194 | System.out.println(toString() + '\n'); 195 | 196 | 197 | //phase 2, compare left core node with right core node 198 | 199 | System.out.println("Phase 2, core node insertion"); 200 | 201 | int indexOfRightCoreNode = -1; 202 | int indexOfLeftCoreNode = -1; 203 | 204 | for (int i = 0; i < childrenOfRoot.size(); i ++) { 205 | 206 | if (i != IndexOfSN) { 207 | 208 | Node [] nodes = childrenOfRoot.get(i).genNodesArray(); 209 | int startOfRightBranch = endOfLeftBranch(nodes) + 1; 210 | int sizeOfRightTree = nodes[startOfRightBranch].getChildren().size() + 1; 211 | 212 | //if right tree only contains numbers, skip it 213 | 214 | if (sizeOfRightTree != 1 || !isNumeric(nodes[startOfRightBranch].getWord())) { 215 | 216 | indexOfLeftCoreNode = coreNode(nodes, true); 217 | indexOfRightCoreNode = coreNode(nodes, false); 218 | 219 | //if left core node exists 220 | 221 | if (indexOfLeftCoreNode != -1) { 222 | 223 | boolean doInsert = false; 224 | 225 | //if right subtree neither have core node nor it only contains number 226 | if (indexOfRightCoreNode == -1) { 227 | 228 | //copy core node only 229 | 230 | doInsert = true; 231 | } 232 | 233 | //if right core node & left core node are different schema 234 | 235 | else if (!nodes[indexOfRightCoreNode].getInfo(). 236 | ExactSameSchema(nodes[indexOfLeftCoreNode].getInfo())) { 237 | 238 | //copy core node only 239 | 240 | doInsert = true; 241 | } 242 | 243 | if (doInsert) { 244 | 245 | copy = nodes[indexOfLeftCoreNode].clone(); 246 | copy.children = new ArrayList(); 247 | copy.setOutside(true); 248 | 249 | 250 | boolean insertAroundFN = false; 251 | 252 | int indexOfNewRightCN = IndexToInsertCN(nodes); 253 | 254 | if (indexOfNewRightCN == -1) { 255 | 256 | for (int j = nodes.length - 1; j > endOfLeftBranch(nodes); j --) { 257 | 258 | if (nodes[j].getInfo().getType().equals("FN")) { 259 | 260 | indexOfNewRightCN = j + 1; 261 | insertAroundFN = true; 262 | break; 263 | } 264 | } 265 | } 266 | 267 | if (insertAroundFN) { 268 | 269 | //THIS ONLY HANDLES FN NODE HAS NO CHILD OR ONE NAME NODE CHILD 270 | 271 | List FN_children = nodes[indexOfNewRightCN - 1].getChildren(); 272 | 273 | for (int j = 0; j < FN_children.size(); j ++) { 274 | 275 | copy.setChild(FN_children.get(j)); 276 | FN_children.get(j).setParent(copy); 277 | } 278 | 279 | copy.setParent(nodes[indexOfNewRightCN - 1]); 280 | nodes[indexOfNewRightCN - 1].children = new ArrayList(); 281 | nodes[indexOfNewRightCN - 1].setChild(copy); 282 | } 283 | 284 | else { 285 | 286 | //if right subtree only contains VN, adjust index 287 | 288 | if (indexOfNewRightCN == -1) { 289 | 290 | indexOfNewRightCN = endOfLeftBranch(nodes) + 1; 291 | } 292 | 293 | copy.setChild(nodes[indexOfNewRightCN]); 294 | copy.setParent(nodes[indexOfNewRightCN].getParent()); 295 | nodes[indexOfNewRightCN].getParent().removeChild(nodes[indexOfNewRightCN]); 296 | nodes[indexOfNewRightCN].getParent().setChild(copy); 297 | nodes[indexOfNewRightCN].setParent(copy); 298 | 299 | } 300 | } 301 | 302 | System.out.println(toString()); 303 | 304 | //phase 3, map each NV under left core node to right core node 305 | 306 | System.out.println("Phase 3, transfer constrain nodes from left to right"); 307 | 308 | List NV_children_left = nodes[indexOfLeftCoreNode].getChildren(); 309 | 310 | for (int j = 0; j < NV_children_left.size(); j ++) { 311 | 312 | Node [] nodes_new = childrenOfRoot.get(i).genNodesArray(); 313 | indexOfRightCoreNode = coreNode(nodes_new, false); 314 | List NV_children_right = nodes_new[indexOfRightCoreNode].getChildren(); 315 | 316 | boolean found_NV = false; 317 | 318 | Node curr_left = NV_children_left.get(j); 319 | String curr_left_type = curr_left.getInfo().getType(); 320 | 321 | for (int k = 0; k < NV_children_right.size(); k ++) { 322 | 323 | //compare 324 | 325 | Node curr_right = NV_children_right.get(k); 326 | 327 | //strictly compare, exact match ON 328 | 329 | if (curr_left_type.equals("ON")) { 330 | 331 | if (curr_left.equals(curr_right)) { 332 | 333 | found_NV = true; 334 | break; 335 | } 336 | } 337 | 338 | else { 339 | 340 | if (curr_left.getInfo().sameSchema(curr_right.getInfo())) { 341 | 342 | found_NV = true; 343 | break; 344 | } 345 | } 346 | } 347 | 348 | if (!found_NV) { 349 | 350 | //insert 351 | 352 | copy = curr_left.clone(); 353 | nodes_new[indexOfRightCoreNode].setChild(copy); 354 | copy.setOutside(true); 355 | copy.setParent(nodes_new[indexOfRightCoreNode]); 356 | } 357 | } 358 | 359 | System.out.println(toString()); 360 | 361 | //phase 4, insert function node 362 | 363 | System.out.println("Phase 4, insert missing function node"); 364 | 365 | Node [] nodes_final_temp = childrenOfRoot.get(i).genNodesArray(); 366 | 367 | int indexOfLeftFN_Tail = -1; 368 | 369 | for (int j = indexOfLeftCoreNode; j > 0; j --) { 370 | 371 | if (nodes_final_temp[j].getInfo().getType().equals("FN")) { 372 | 373 | indexOfLeftFN_Tail = j; 374 | break; 375 | } 376 | } 377 | 378 | if (indexOfLeftFN_Tail != -1) { 379 | 380 | //ASSUMPTION: if FN exists, it always before core node 381 | 382 | for (int k = 1; k < indexOfLeftFN_Tail + 1; k ++) { 383 | 384 | Node [] nodes_final = childrenOfRoot.get(i).genNodesArray(); 385 | indexOfRightCoreNode = coreNode(nodes_final, false); 386 | 387 | boolean found_FN = false; 388 | 389 | for (int j = endOfLeftBranch(nodes_final) + 1; j < indexOfRightCoreNode; j ++) { 390 | 391 | if (nodes_final[j].getInfo().ExactSameSchema(nodes_final[k].getInfo())) { 392 | 393 | found_FN = true; 394 | } 395 | } 396 | 397 | if(!found_FN) { 398 | copy = nodes_final[k].clone(); 399 | copy.setOutside(true); 400 | copy.children = new ArrayList(); 401 | 402 | nodes[0].removeChild(nodes_final[endOfLeftBranch(nodes_final) + 1]); 403 | nodes[0].setChild(copy); 404 | 405 | copy.setParent(nodes[0]); 406 | copy.setChild(nodes[endOfLeftBranch(nodes_final) + 1]); 407 | nodes[endOfLeftBranch(nodes_final) + 1].setParent(copy); 408 | } 409 | } 410 | } 411 | System.out.println(toString()); 412 | } 413 | } 414 | } 415 | } 416 | } 417 | 418 | /** 419 | * find the index in the right tree to append core node 420 | */ 421 | 422 | public int IndexToInsertCN (Node [] nodes) { 423 | 424 | 425 | for (int i = endOfLeftBranch(nodes) + 1; i < nodes.length; i ++) { 426 | 427 | if (nodes[i].getInfo().getType().equals("NN")) { 428 | 429 | return i; 430 | } 431 | } 432 | 433 | return -1; 434 | } 435 | 436 | /** 437 | * Appending the name node under SELECT to the last name node in leftsubtree 438 | */ 439 | 440 | public int nameNodeToBeAppended (Node [] nodes) { 441 | 442 | for (int i = endOfLeftBranch(nodes); i > 0; i --) { 443 | 444 | if (nodes[i].getInfo().getType().equals("NN")) { 445 | 446 | return i; 447 | } 448 | } 449 | 450 | return -1; 451 | } 452 | 453 | /** 454 | * find the index of the last node in the left subtree 455 | */ 456 | 457 | public int endOfLeftBranch (Node [] nodes) { 458 | 459 | for (int i = 2; i < nodes.length; i ++) { 460 | 461 | if(nodes[i].getParent().equals(nodes[0])) { 462 | 463 | return i - 1; 464 | } 465 | 466 | } 467 | 468 | return -1; 469 | } 470 | 471 | /** 472 | * check if right branch contains only number 473 | */ 474 | public boolean isNumeric(String str) { 475 | try { 476 | double d = Double.parseDouble(str); 477 | } 478 | catch(NumberFormatException e) { 479 | return false; 480 | } 481 | return true; 482 | } 483 | 484 | /** 485 | * find index of core node 486 | */ 487 | 488 | public int coreNode (Node [] nodes, boolean left) { 489 | 490 | int startIndex = 1; 491 | int endIndex = endOfLeftBranch(nodes); 492 | 493 | if (!left) { 494 | 495 | startIndex = endOfLeftBranch(nodes) + 1; 496 | endIndex = nodes.length - 1; 497 | } 498 | 499 | for (int i = startIndex; i <= endIndex; i ++) { 500 | 501 | if (nodes[i].getInfo().getType().equals("NN")) { 502 | 503 | return i; 504 | } 505 | } 506 | 507 | return -1; 508 | } 509 | 510 | 511 | @Override 512 | public ParseTree mergeLNQN(){ 513 | Node[] nodes = this.root.genNodesArray(); 514 | for (int i=0; i [child1, child2, ...]" 661 | * @param curr 662 | * @return 663 | */ 664 | private String nodeToString(Node curr) { 665 | if (curr == null) { return ""; } 666 | String s = curr.toString() + " -> "; 667 | s += curr.getChildren().toString() + "\n"; 668 | for (Node child : curr.getChildren()) { 669 | s += nodeToString(child); 670 | } 671 | return s; 672 | } 673 | 674 | @Override 675 | public String toString() { 676 | StringBuilder sb = new StringBuilder(); 677 | sb.append("Sentence: ").append(getSentence()).append("\n"); 678 | sb.append(nodeToString(root)); 679 | return sb.toString(); 680 | } 681 | 682 | /** 683 | * Score of a tree measures the syntactic legality of 684 | * the tree. It is negative number of Invalid nodes. 685 | * @return 686 | */ 687 | public int getScore(){ 688 | return - SyntacticEvaluator.numberOfInvalidNodes(this); 689 | } 690 | 691 | } 692 | --------------------------------------------------------------------------------