├── .gitignore ├── Makefile ├── README.md ├── doc ├── design.tex ├── screenshots │ ├── create1.png │ ├── create2.png │ ├── create3.png │ ├── create4.png │ ├── create5.png │ ├── delete1.png │ ├── delete2.png │ ├── delete3.png │ ├── error1.png │ ├── error2.png │ ├── exp1.png │ ├── expr1.png │ ├── expr2.png │ ├── expr3.png │ ├── expr4.png │ ├── expr5.png │ ├── insert1.png │ ├── insert2.png │ ├── insert3.png │ ├── insert4.png │ ├── insert5.png │ ├── insert6.png │ ├── insert7.png │ ├── insert8.png │ ├── query1.png │ ├── query2.png │ ├── query3.png │ ├── query4.png │ ├── query5.png │ ├── query6.png │ ├── query7.png │ ├── query8.png │ ├── query9.png │ ├── zero1.png │ ├── zero2.png │ └── zero3.png └── test.md ├── draft ├── Lexer.cpp ├── Parser.cpp ├── Token.cpp ├── draft.txt ├── engine.cpp ├── first-follow.txt ├── main.cpp ├── parse.xlsx └── parsing-table ├── src ├── Engine.cpp ├── Engine.h ├── Expr.cpp ├── Expr.h ├── IO.cpp ├── IO.h ├── Lexer.cpp ├── Lexer.h ├── Parser.cpp ├── Parser.h ├── Statements.cpp ├── Statements.h ├── Table.cpp ├── Table.h ├── Token.cpp ├── Token.h ├── main.cpp ├── test_lexer.cpp └── test_parser.cpp └── test ├── all.good ├── all.in ├── lexer.good ├── lexer.in ├── parser.good └── parser.in /.gitignore: -------------------------------------------------------------------------------- 1 | bin/* 2 | *.o 3 | *.out 4 | core 5 | doc/* 6 | !doc/*.tex 7 | !doc/*.md 8 | *~ 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=clang++ 2 | CXXFLAGS=-c -Wall -std=c++11 3 | 4 | #For debug 5 | #CXXFLAGS=-c -Wall -DTRACK -std=c++11 6 | 7 | #For environment without clang++ 8 | #CC=g++ 9 | #CXXFLAGS=-c -Wall -std=c++0x 10 | 11 | TOKEN=src/Token.cpp 12 | LEXER=src/Lexer.cpp 13 | EXPR=src/Expr.cpp 14 | PARSER=src/Parser.cpp 15 | TABLE=src/Table.cpp 16 | ENGINE=src/Engine.cpp 17 | IO=src/IO.cpp 18 | 19 | LEXMAIN=src/test_lexer.cpp 20 | PARSEMAIN=src/test_parser.cpp 21 | MAIN=src/main.cpp 22 | 23 | LEXIN=test/lexer.in 24 | PARSEIN=test/parser.in 25 | ALLIN=test/all.in 26 | 27 | SOURCES=$(TOKEN) $(LEXER) $(EXPR) $(PARSER) $(TABLE) $(ENGINE) $(IO) 28 | OBJECTS=$(SOURCES:.cpp=.o) 29 | 30 | MAINOBJ=$(MAIN:.cpp=.o) 31 | LEXMAINOBJ=$(LEXMAIN:.cpp=.o) 32 | PARSEMAINOBJ=$(PARSEMAIN:.cpp=.o) 33 | 34 | EXECUTABLE=bin/ssql 35 | 36 | all: main 37 | 38 | main: $(OBJECTS) $(MAINOBJ) 39 | mkdir -p bin 40 | $(CC) $(OBJECTS) $(MAINOBJ) -g -o $(EXECUTABLE) 41 | 42 | $(OBJECTS) : $(SOURCES) 43 | 44 | $(MAINOBJ) : $(MAIN) 45 | 46 | $(LEXMAINOBJ) : $(LEXMAIN) 47 | 48 | $(PARSEMAINOBJ) : $(PARSEMAIN) 49 | 50 | %.o: %.cpp 51 | $(CC) $(CXXFLAGS) $< -g -o $@ 52 | 53 | clean: 54 | rm -f src/*.o $(EXECUTABLE) 55 | 56 | testparser: $(OBJECTS) $(PARSEMAINOBJ) $(PARSEIN) 57 | mkdir -p bin 58 | $(CC) $(OBJECTS) $(PARSEMAINOBJ) -g -o $(EXECUTABLE) 59 | $(EXECUTABLE) $(PARSEIN) > test/parser.out 60 | diff test/parser.out test/parser.good 61 | 62 | testlexer: $(OBJECTS) $(LEXMAINOBJ) $(LEXIN) 63 | mkdir -p bin 64 | $(CC) $(OBJECTS) $(LEXMAINOBJ) -g -o $(EXECUTABLE) 65 | $(EXECUTABLE) $(LEXIN) > test/lexer.out 66 | diff test/lexer.out test/lexer.good 67 | 68 | checkmem: main 69 | valgrind --leak-check=full -v $(EXECUTABLE) $(ALLIN) 70 | 71 | test: main 72 | $(EXECUTABLE) $(ALLIN) > test/all.out 73 | diff test/all.out test/all.good 74 | 75 | .PHONY: testparser testlexer checkmem test 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | 3 | 1. `clang++` 3.4 or higher / `g++` 4.6.3 or higher (needs `C++11/C++0x` support) 4 | 2. GNU Make 5 | 3. Linux 6 | 7 | ## How to build the project 8 | 9 | If you have `clang++` 3.4 or higher installed, and it can be invoke with `clang++`, you don't need to change the `Makefile`. 10 | 11 | If your `clang++` can't be invoked with `clang++`, e.g. has to be invoked with `clang++-3.4`, you need to change the first line of `Makefile` 12 | 13 | CC=clang++ 14 | 15 | into 16 | 17 | CC=clang++-3.4 18 | 19 | If you only have `g++`, uncomment these lines in the `Makefile` 20 | 21 | CC=g++ 22 | CXXFLAGS=-c -Wall -std=c++0x 23 | 24 | If you still can't build the program, contact the authors. (See "About" at the end of this README). 25 | 26 | After configuring the `Makefile`, enter the project root directory, run `make` 27 | 28 | $ make 29 | 30 | The built program wil show up under `bin` directory as `bin/ssql` 31 | 32 | $ ls bin 33 | ssql 34 | 35 | ## How to pass inputs 36 | 37 | To pass inputs into the program, under the root directory, run 38 | 39 | $ bin/ssql 40 | 41 | For example, to pass `test/all.in`, run 42 | 43 | $ bin/ssql test/all.in 44 | 45 | Or under the `bin` directory, run 46 | 47 | $ ./ssql ../test/all.in 48 | 49 | For interactive mode, simply run the program without arguments 50 | 51 | $ bin/ssql 52 | 53 | and the standard input stream will be used as input. 54 | 55 | ### How to run the tests 56 | 57 | To test the lexer 58 | 59 | $ make testlexer 60 | 61 | To test the parser 62 | 63 | $ make testparser 64 | 65 | To test the whole project 66 | 67 | $ make test 68 | 69 | If you have valgrind installed, you can check the memory usage with 70 | 71 | $ make checkmem 72 | 73 | 74 | ##Directory structure 75 | 76 | . 77 | |-- Makefile 78 | |-- README.md 79 | | 80 | |-- bin 81 | | `-- ssql (the program. will show up after make) 82 | | 83 | |-- doc (documents) 84 | | |-- design.pdf 85 | | `-- test.pdf 86 | | 87 | |-- src (sorce code) 88 | | |-- Engine.cpp 89 | | |-- Engine.h 90 | | |-- Expr.cpp 91 | | |-- Expr.h 92 | | |-- IO.cpp 93 | | |-- IO.h 94 | | |-- Lexer.cpp 95 | | |-- Lexer.h 96 | | |-- Parser.cpp 97 | | |-- Parser.h 98 | | |-- Statements.cpp 99 | | |-- Statements.h 100 | | |-- Table.cpp 101 | | |-- Table.h 102 | | |-- Token.cpp 103 | | |-- Token.h 104 | | |-- main.cpp 105 | | |-- test_lexer.cpp 106 | | `-- test_parser.cpp 107 | | 108 | `-- test (test cases) 109 | |-- all.good 110 | |-- all.in 111 | |-- lexer.good 112 | |-- lexer.in 113 | |-- parser.good 114 | `-- parser.in 115 | 116 | 117 | ##About 118 | * [Github repository](https://github.com/joyeecheung/simple-sql-parser) 119 | * Time: Jan. 2015 120 | * Contact: [joyeec9h3@gmail.com](mailto:joyeec9h3@gmail.com) -------------------------------------------------------------------------------- /doc/design.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage[a4paper,top=0.75in, bottom=0.75in, left=1in, right=1in,footskip=0.2in]{geometry} 3 | %\usepackage{fullpage} 4 | %-----------------Hyperlink Packages-------------------- 5 | \usepackage{hyperref} 6 | \hypersetup{ 7 | colorlinks = true, 8 | citecolor = black, 9 | linkcolor = black, 10 | urlcolor = black 11 | } 12 | %-----------------Figure Packages-------------------- 13 | \usepackage{graphicx} % For figures 14 | %\usepackage{epsfig} % for postscript graphics files 15 | %------------------Math Packages------------------------ 16 | \usepackage{amssymb,amsmath} 17 | \usepackage{textcomp} 18 | \usepackage{mdwmath} 19 | \usepackage{mdwtab} 20 | \usepackage{eqparbox} 21 | %------------------Table Packages----------------------- 22 | \usepackage{rotating} % Used to rotate tables 23 | \usepackage{array} % Fixed column widths for tables 24 | %-----------------Algorithm Packages-------------------- 25 | \usepackage{listings} % Source code 26 | \usepackage{algorithm} % Pseudo Code 27 | \usepackage{algpseudocode} 28 | %--------------------------------------------------------- 29 | \usepackage{pgf} 30 | \usepackage{tikz} 31 | \usepackage[utf8]{inputenc} 32 | \usetikzlibrary{arrows,automata} 33 | \usetikzlibrary{positioning} 34 | \usepackage{tikz-qtree,tikz-qtree-compat} 35 | 36 | \tikzset{ 37 | state/.style={ 38 | rectangle, 39 | draw=black, 40 | minimum height=2em, 41 | inner sep=2pt, 42 | text centered, 43 | }, 44 | } 45 | 46 | \setcounter{tocdepth}{3} 47 | %opening 48 | 49 | \begin{document} 50 | 51 | \title{ 52 | Course Project for Compilers \\ 53 | System Design 54 | } 55 | \author{Class 1, Team 10} 56 | \date{\today} 57 | \maketitle 58 | \tableofcontents 59 | \section{Team Members} 60 | 61 | \begin{table}[H] 62 | \centering 63 | \begin{tabular}{l l l} 64 | Name & Student ID & Job\\ 65 | \hline 66 | Fan Ziyao & 12330081 & Team leader, database implementation\\ 67 | Chen Zeyu & 12330056 & System implementation \\ 68 | Huang Long & 12330132 & Database implementation \\ 69 | Zhang Qiuyi (Class 2) & 12330402 & Frontend, documentation \\ 70 | Zhu Lichen (Class 2) & 12330439 & Testing 71 | \end{tabular} 72 | \end{table} 73 | 74 | \section{System Design} 75 | 76 | The system is divided into four parts, with functionalities shown in Figure~\ref{fig:sys}. 77 | 78 | The parser doesn't need to concern individual characters, the engine doesn't need to know about tokens and schema/data of tables, and the table doesn't need to know about (raw) statemets. In this way, we make the coupling between modules as loose as possible. 79 | 80 | \begin{figure}[H] 81 | \centering 82 | \begin{tikzpicture}[->,>=stealth'] 83 | 84 | \node[state, text width=4.5cm] (LEXER) 85 | {\begin{tabular}{l} 86 | \textbf{Lexer}\\[0.3em] 87 | \parbox{4cm}{Extract tokens from the stream 88 | } 89 | \end{tabular}}; 90 | 91 | \node[state, 92 | node distance=10cm, 93 | text width=4.5cm, 94 | right of=LEXER, 95 | ] (PARSER) 96 | {\begin{tabular}{l} 97 | \textbf{Parser}\\[0.3em] 98 | \parbox{4cm}{Derive statements with tokens 99 | } 100 | \end{tabular}}; 101 | 102 | \node[state, 103 | node distance=3cm, 104 | text width=4.5cm, 105 | below of=PARSER, 106 | ] (ENGINE) 107 | {\begin{tabular}{l} 108 | \textbf{Engine}\\[0.3em] 109 | \parbox{4cm}{Analyze statments, 110 | perform simple semantic analysis, 111 | keep track of tables 112 | } 113 | \end{tabular}}; 114 | 115 | \node[state, 116 | left of=ENGINE, 117 | node distance=10cm, 118 | text width=4.5cm] (TABLE) 119 | {\begin{tabular}{l} 120 | \textbf{Table}\\[0.3em] 121 | \parbox{4cm}{Perform transactions and 122 | semantic analysis that needs the schema 123 | } 124 | \end{tabular}}; 125 | 126 | 127 | \path (LEXER) edge node[anchor=east, above] 128 | { 129 | Tokens 130 | } (PARSER) 131 | (PARSER) edge node[anchor=east,right] 132 | { 133 | Statements 134 | }(ENGINE) 135 | (ENGINE) edge node[anchor=east,above] 136 | { 137 | Components of Statements 138 | }(TABLE) 139 | ; 140 | \end{tikzpicture} 141 | \caption{System design} 142 | \label{fig:sys} 143 | \end{figure} 144 | 145 | \section{Lexer Design} 146 | 147 | \subsection {Token Specification} 148 | \begin{align*} 149 | \text{num}\quad\to\quad & \texttt{[0-9]+} \\ 150 | \text{id}\quad\to\quad & \texttt{[\_A-Za-z][\_A-Za-z0-9]*} \\ 151 | \text{CREATE}\quad\to\quad & \texttt{CREATE} \\ 152 | \text{TABLE}\quad\to\quad & \texttt{TABLE} \\ 153 | \text{INT}\quad\to\quad & \texttt{INT} \\ 154 | \text{DEFAULT}\quad\to\quad & \texttt{DEFAULT} \\ 155 | \text{PRIMARY}\quad\to\quad & \texttt{PRIMARY} \\ 156 | \text{KEY}\quad\to\quad & \texttt{KEY} \\ 157 | \text{INSERT}\quad\to\quad & \texttt{INSERT} \\ 158 | \text{INTO}\quad\to\quad & \texttt{INTO} \\ 159 | \text{VALUES}\quad\to\quad & \texttt{VALUES} \\ 160 | \text{DELETE}\quad\to\quad & \texttt{DELETE} \\ 161 | \text{FROM}\quad\to\quad & \texttt{FROM} \\ 162 | \text{WHERE}\quad\to\quad & \texttt{WHERE} \\ 163 | \text{SELECT}\quad\to\quad & \texttt{SELECT} \\ 164 | \text{LT}\quad\to\quad & \texttt{<} \\ 165 | \text{GT}\quad\to\quad & \texttt{>} \\ 166 | \text{NEQ}\quad\to\quad & \texttt{<>} \\ 167 | \text{EQ}\quad\to\quad & \texttt{==} \\ 168 | \text{GEQ}\quad\to\quad & \texttt{>=} \\ 169 | \text{LEQ}\quad\to\quad & \texttt{<=} \\ 170 | \text{PLUS}\quad\to\quad & \texttt{+} \\ 171 | \text{MINUS}\quad\to\quad & \texttt{-} \\ 172 | \text{MUL}\quad\to\quad & \texttt{*} \\ 173 | \text{DIV}\quad\to\quad & \texttt{/} \\ 174 | \text{AND}\quad\to\quad & \texttt{\&\&} \\ 175 | \text{OR}\quad\to\quad & \texttt{||} \\ 176 | \text{NOT}\quad\to\quad & \texttt{!} \\ 177 | \text{COMMA}\quad\to\quad & \texttt{,} \\ 178 | \text{SEMICOLON}\quad\to\quad & \texttt{;} \\ 179 | \text{L\_PAREN}\quad\to\quad & \texttt{(} \\ 180 | \text{R\_PAREN}\quad\to\quad & \texttt{)} 181 | \end{align*} 182 | \begin{align*} 183 | words \quad\to\quad & \text{CREATE} \quad | \quad \text{TABLE} \quad | \quad \text{INT} \\ 184 | &| \quad \text{DEFAULT} \quad | \quad \text{PRIMARY} \quad | \quad \text{KEY} \quad \\ 185 | &| \quad \text{INSERT} \quad | \quad \text{INTO} \quad | \quad \text{VALUES} \quad \\ 186 | &| \quad \text{DELETE} \quad | \quad \text{FROM} \quad \\ 187 | &| \quad \text{WHERE} \quad | \quad \text{SELECT} \\ 188 | singleOp \quad\to\quad & 189 | \text{PLUS} \quad | \quad \text{MINUS} \quad | \quad \text{MUL} \\ 190 | &| \quad \text{DIV} \quad | \quad \text{L\_PAREN} \quad | \quad \text{R\_PAREN} \\ 191 | &| \quad \text{COMMA} \quad | \quad \text{SEMICOLON} \\ 192 | ops \quad\to\quad & 193 | \text{AND} \quad | \quad \text{OR} \quad | \quad \text{NOT} \quad | \quad \text{LT} \\ 194 | &| \quad \text{GT} \quad | \quad \text{NEQ} \quad | \quad \text{EQ} \quad | \quad \text{GEQ} \\ 195 | &| \quad \text{LEQ} \quad | \quad \text{PLUS} \quad | \quad \text{MINUS} \quad | \quad \text{MUL} \\ 196 | &| \quad \text{DIV} \quad | \quad \text{L\_PAREN} \quad | \quad \text{R\_PAREN} \\ 197 | &| \quad \text{COMMA} \quad | \quad \text{SEMICOLON} 198 | \end{align*} 199 | \subsection{DFA} 200 | 201 | A simplified DFA is shown in Figure~\ref{fig:dfa}. After getting into accepting states for identifiers and operators, we need to further process the buffer to determine the right token to return. 202 | 203 | \begin{figure}[H] 204 | \centering 205 | \begin{tikzpicture}[scale=0.2] 206 | \tikzstyle{every node}+=[inner sep=0pt] 207 | \draw [black] (32.8,-10.4) circle (3); 208 | \draw (32.8,-10.4) node {$1$}; 209 | \draw [black] (63.1,-10.4) circle (3); 210 | \draw (63.1,-10.4) node {$2$}; 211 | \draw [black] (63.1,-10.4) circle (2.4); 212 | \draw [black] (17.9,-19.6) circle (3); 213 | \draw (17.9,-19.6) node {$0$}; 214 | \draw [black] (63.1,-19.6) circle (3); 215 | \draw (63.1,-19.6) node {$3$}; 216 | \draw [black] (63.1,-19.6) circle (2.4); 217 | \draw [black] (42.6,-27.8) circle (3); 218 | \draw (42.6,-27.8) node {$4$}; 219 | \draw [black] (35.1,-42.5) circle (3); 220 | \draw (35.1,-42.5) node {$5$}; 221 | \draw [black] (61.9,-42.5) circle (3); 222 | \draw (61.9,-42.5) node {$6$}; 223 | \draw [black] (61.9,-42.5) circle (2.4); 224 | \draw [black] (63,-27.8) circle (3); 225 | \draw (63,-27.8) node {$7$}; 226 | \draw [black] (63,-27.8) circle (2.4); 227 | \draw [black] (20.45,-18.02) -- (30.25,-11.98); 228 | \fill [black] (30.25,-11.98) -- (29.3,-11.97) -- (29.83,-12.82); 229 | \draw (27.35,-15.5) node [below] {$0-9$}; 230 | \draw [black] (35.8,-10.4) -- (60.1,-10.4); 231 | \fill [black] (60.1,-10.4) -- (59.3,-9.9) -- (59.3,-10.9); 232 | \draw (47.95,-9.9) node [above] {$non-digit$}; 233 | \draw [black] (31.477,-7.72) arc (234:-54:2.25); 234 | \draw (32.8,-3.15) node [above] {$0-9$}; 235 | \fill [black] (34.12,-7.72) -- (35,-7.37) -- (34.19,-6.78); 236 | \draw [black] (17.514,-22.563) arc (20.30993:-267.69007:2.25); 237 | \draw (8.36,-25.72) node [below] {$white-space$}; 238 | \fill [black] (15.31,-21.1) -- (14.39,-20.9) -- (14.74,-21.84); 239 | \draw [black] (20.9,-19.6) -- (60.1,-19.6); 240 | \fill [black] (60.1,-19.6) -- (59.3,-19.1) -- (59.3,-20.1); 241 | \draw (40.5,-20.1) node [below] {$singleOp$}; 242 | \draw [black] (20.75,-20.55) -- (39.75,-26.85); 243 | \fill [black] (39.75,-26.85) -- (39.15,-26.13) -- (38.84,-27.08); 244 | \draw (28.83,-24.26) node [below] {$op$}; 245 | \draw [black] (43.923,-30.48) arc (54:-234:2.25); 246 | \draw (42.6,-35.05) node [below] {$op$}; 247 | \fill [black] (41.28,-30.48) -- (40.4,-30.83) -- (41.21,-31.42); 248 | \draw [black] (19.7,-22) -- (33.3,-40.1); 249 | \fill [black] (33.3,-40.1) -- (33.22,-39.16) -- (32.42,-39.76); 250 | \draw (25.92,-32.45) node [left] {$_,\mbox{ }A-Z,\mbox{ }a-z$}; 251 | \draw [black] (36.423,-45.18) arc (54:-234:2.25); 252 | \draw (35.1,-49.75) node [below] {$_,\mbox{ }a-z,\mbox{ }A-Z,\mbox{ }0-9$}; 253 | \fill [black] (33.78,-45.18) -- (32.9,-45.53) -- (33.71,-46.12); 254 | \draw [black] (38.1,-42.5) -- (58.9,-42.5); 255 | \fill [black] (58.9,-42.5) -- (58.1,-42) -- (58.1,-43); 256 | \draw (48.5,-43) node [below] {$other\mbox{ }character$}; 257 | \draw [black] (45.6,-27.8) -- (60,-27.8); 258 | \fill [black] (60,-27.8) -- (59.2,-27.3) -- (59.2,-28.3); 259 | \draw (52.8,-28.3) node [below] {$other\mbox{ }charcter$}; 260 | \end{tikzpicture} 261 | \caption{Simplified DFA} 262 | \label{fig:dfa} 263 | \end{figure} 264 | 265 | \section{Parser Design} 266 | 267 | \subsection{Grammar} 268 | 269 | \subsection{\texttt{First} Sets} 270 | \begin{align*} 271 | \textsc{first}(ssql\_stmt) \quad =& \quad\{\text{CREATE, INSERT, DELETE, SELECT}\} \\ 272 | \textsc{first}(create\_stmt) \quad =& \quad\{\text{CREATE}\} \\ 273 | \textsc{first}(decl\_list) \quad =& \quad\{id\text{, PRIMARY}\} \\ 274 | \textsc{first}(\_decl\_list) \quad =& \quad\{\text{COMMA,} \epsilon\} \\ 275 | \textsc{first}(decl) \quad =& \quad\{{id\text, PRIMARY}\} \\ 276 | \textsc{first}(default\_spec) \quad =& \quad\{\text{DEFAULT,} \epsilon\} \\ 277 | \textsc{first}(expr[simple=true]) \quad =& \quad\{\text{PLUS, MINUS, } num \text{, L\_PAREN}\} \\ 278 | \textsc{first}(expr[simple=false]) \quad =& \quad\{\text{PLUS, MINUS, }num\text{, }id\} \\ 279 | \textsc{first}(\_expr) \quad =& \quad\{\text{PLUS, MINUS, }\epsilon\} \\ 280 | \textsc{first}(term[simple=true]) \quad =& \quad\{\text{PLUS, MINUS, }num\text{, L\_PAREN}\} \\ 281 | \textsc{first}(term[simple=false]) \quad =& \quad\{\text{PLUS, MINUS, }num\text{, }id\} \\ 282 | \textsc{first}(\_term) \quad =& \quad\{\text{MUL, DIV, }\epsilon\} \\ 283 | \textsc{first}(unary[simple=true]) \quad =& \quad\{\text{PLUS, MINUS, } num \text{, L\_PAREN}\} \\ 284 | \textsc{first}(unary[simple=false]) \quad =& \quad\{\text{PLUS, MINUS, }num\text{, }id\} \\ 285 | %\end{align*} 286 | %\begin{align*} 287 | \textsc{first}(column\_list) \quad =& \quad\{id\} \\ 288 | \textsc{first}(\_column\_list) \quad =& \quad\{\text{COMMA, }\epsilon\} \\ 289 | \textsc{first}(insert\_stmt) \quad =& \quad\{\text{INSERT}\} \\ 290 | \textsc{first}(value\_list) \quad =& \quad\{\text{PLUS, MINUS, } num \text{, L\_PAREN}\} \\ 291 | \textsc{first}(\_value\_list) \quad =& \quad\{\text{COMMA, }\epsilon\} \\ 292 | \textsc{first}(delete\_stmt) \quad =& \quad\{\text{DELETE}\} \\ 293 | \textsc{first}(where\_clause) \quad =& \quad\{\text{WHERE,} \epsilon\} \\ 294 | \textsc{first}(disjunct) \quad =& \quad\{\text{L\_PAREN, NOT, PLUS, MINUS, }num\text{, }id\} \\ 295 | \textsc{first}(\_disjunct) \quad =& \quad\{\text{OR, }\epsilon\} \\ 296 | \textsc{first}(conjunct) \quad =& \quad\{\text{L\_PAREN, NOT, PLUS, MINUS, }num\text{, }id\} \\ 297 | \textsc{first}(\_conjunct) \quad =& \quad\{\text{AND, }\epsilon\} \\ 298 | \textsc{first}(bool) \quad =& \quad\{\text{L\_PAREN, NOT, PLUS, MINUS, }num\text{, }id\} \\ 299 | \textsc{first}(comp) \quad =& \quad\{\text{PLUS, MINUS, }num\text{, }id\} \\ 300 | \textsc{first}(rop) \quad =& \quad\{\text{NEQ, EQ, LT, GT, LEQ, GEQ}\} \\ 301 | \textsc{first}(query\_stmt) \quad =& \quad\{\text{SELECT}\} \\ 302 | \textsc{first}(select\_list) \quad =& \quad\{\text{MUL, }id\} \\ 303 | \end{align*} 304 | 305 | \subsection{\texttt{Follow} Sets} 306 | \begin{align*} 307 | \textsc{follow}(ssql\_stmt)\quad =& \quad \{\text{\$}\} \\ 308 | \textsc{follow}(create\_stmt)\quad =& \quad \{\text{\$}\} \\ 309 | \textsc{follow}(decl\_list)\quad =& \quad \{\text{R\_PAREN}\} \\ 310 | \textsc{follow}(\_decl\_list)\quad =& \quad \{\text{R\_PAREN}\} \\ 311 | \textsc{follow}(decl)\quad =& \quad \{\text{COMMA, R\_PAREN}\} \\ 312 | \textsc{follow}(default\_spec)\quad =& \quad \{\text{COMMA, R\_PAREN}\} \\ 313 | \textsc{follow}(expr[true])\quad =& \quad \{\text{COMMA, R\_PAREN}\} \\ 314 | \textsc{follow}(expr[false])\quad =& \quad \{\text{NEQ, EQ, LT, GT, LEQ,}\\ 315 | & \quad \text{GEQ, AND, OR, SEMICOLON, R\_PAREN}\} \\ 316 | \textsc{follow}(\_expr[true])\quad =& \quad \{\text{COMMA, R\_PAREN}\} \\ 317 | \textsc{follow}(\_expr[false])\quad =& \quad \{\text{NEQ, EQ, LT, GT, LEQ,}\\ 318 | & \quad \text{GEQ, AND, OR, SEMICOLON, R\_PAREN}\} \\ 319 | \textsc{follow}(term[true])\quad =& \quad \{\text{PLUS, MINUS, COMMA, R\_PAREN}\} \\ 320 | \textsc{follow}(term[false])\quad =& \quad \{\text{PLUS, MINUS, NEQ, EQ, LT, GT,}\\ 321 | & \quad \text{LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN}\} \\ 322 | \textsc{follow}(\_term[true])\quad =& \quad \{\text{PLUS, MINUS, COMMA, R\_PAREN}\} \\ 323 | \textsc{follow}(\_term[false])\quad =& \quad \{\text{PLUS, MINUS, NEQ, EQ, LT, GT,}\\ 324 | & \quad \text{LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN}\} \\ 325 | \textsc{follow}(unary[true])\quad =& \quad \{\text{MUL, DIV, PLUS, MINUS, COMMA, R\_PAREN}\} \\ 326 | \textsc{follow}(unary[false])\quad =& \quad \{\text{MUL, DIV, PLUS, MINUS, NEQ, EQ, LT,} \\ 327 | & \quad \text{GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN}\} \\ 328 | %\end{align*} 329 | %\begin{align*} 330 | \textsc{follow}(column\_list)\quad =& \quad \{\text{FROM, R\_PAREN}\} \\ 331 | \textsc{follow}(\_column\_list)\quad =& \quad \{\text{FROM, R\_PAREN}\} \\ 332 | \textsc{follow}(insert\_stmt)\quad =& \quad \{\text{\$}\} \\ 333 | \textsc{follow}(value\_list)\quad =& \quad \{\text{R\_PAREN}\} \\ 334 | \textsc{follow}(\_value\_list)\quad =& \quad \{\text{R\_PAREN}\} \\ 335 | \textsc{follow}(delete\_stmt)\quad =& \quad \{\text{\$}\} \\ 336 | \textsc{follow}(where\_clause)\quad =& \quad \{\text{SEMICOLON}\} \\ 337 | \textsc{follow}(disjunct)\quad =& \quad \{\text{SEMICOLON, R\_PAREN}\} \\ 338 | \textsc{follow}(\_disjunct)\quad =& \quad \{\text{SEMICOLON, R\_PAREN}\} \\ 339 | \textsc{follow}(conjunct)\quad =& \quad \{\text{OR, SEMICOLON, R\_PAREN}\} \\ 340 | \textsc{follow}(\_conjunct)\quad =& \quad \{\text{OR, SEMICOLON, R\_PAREN}\} \\ 341 | \textsc{follow}(bool)\quad =& \quad \{\text{AND, OR, SEMICOLON, R\_PAREN}\} \\ 342 | \textsc{follow}(comp)\quad =& \quad \{\text{AND, OR, SEMICOLON, R\_PAREN}\} \\ 343 | \textsc{follow}(rop)\quad =& \quad \{\text{PLUS, MINUS, }num \text{, } id\} \\ 344 | \textsc{follow}(query\_stmt)\quad =& \quad \{\text{\$}\} \\ 345 | \textsc{follow}(select\_list)\quad =& \quad \{\text{FROM}\} 346 | \end{align*} 347 | \subsection{Parsing Table} 348 | 349 | Format: 350 | \begin{align*} 351 | \text{Current production} \quad \to & \quad\\ 352 | \text{Lookahead}: & \quad \text{Production body to match} 353 | \end{align*} 354 | 355 | For simplicity, we combine $simple\_expr$ and $expr$ into $expr[simple]$, distinguished by $expr[true]$ and $expr[false]$. 356 | 357 | \begin{align*} 358 | ssql\_stmt \quad \to & \quad\\ 359 | \text{CREATE}: & \quad create\_stmt \\ 360 | \text{INSERT}: & \quad insert\_stmt \\ 361 | \text{DELETE}: & \quad delete\_stmt \\ 362 | \text{SELECT}: & \quad query\_stmt 363 | \end{align*} 364 | 365 | \begin{align*} 366 | create\_stmt \quad \to & \quad\\ 367 | \text{CREATE:} & \quad \text{CREATE TABLE } id \text{ L\_PAREN } decl\_list \text{ R\_PAREN SEMICOLON} 368 | \end{align*} 369 | 370 | \begin{align*} 371 | decl\_list \quad \to & \quad\\ 372 | id: & \quad decl\text{ }\_decl\_list \\ 373 | \text{PRIMARY}: & \quad decl\text{ }\_decl\_list 374 | \end{align*} 375 | 376 | \begin{align*} 377 | \_decl\_list \quad \to & \quad\\ 378 | \text{COMMA}: & \quad \text{COMMA } decl\text{ }\_decl\_list \\ 379 | \text{R\_PAREN}: & \quad \epsilon 380 | \end{align*} 381 | 382 | \begin{align*} 383 | decl \quad \to & \quad\\ 384 | id: & \quad id \text{ INT } default\_spec \\ 385 | \text{PRIMARY}: & \quad \text{PRIMARY KEY L\_PAREN }column\_list \text{ R\_PAREN} 386 | \end{align*} 387 | 388 | \begin{align*} 389 | default\_spec \quad \to & \quad\\ 390 | \text{DEFAULT}: & \quad \text{DEFAULT ASSIGN } expr[true] \\ 391 | \text{COMMA}: & \quad \epsilon \\ 392 | \text{R\_PAREN}: & \quad \epsilon 393 | \end{align*} 394 | 395 | \begin{align*} 396 | column\_list \quad \to & \quad\\ 397 | id: & \quad id \text{ } \_column\_list 398 | \end{align*} 399 | 400 | 401 | \begin{align*} 402 | \_column\_list \quad \to & \quad\\ 403 | \text{COMMA}: & \quad \text{COMMA } id \text{ } \_column\_list \\ 404 | \text{FROM}: & \quad \epsilon \\ 405 | \text{R\_PAREN}: & \quad \epsilon 406 | \end{align*} 407 | 408 | \begin{align*} 409 | insert\_stmt \quad \to & \quad\\ 410 | \text{INSERT}: & \quad \text{INSERT INTO } id \text{ L\_PAREN } column\_list \text{ R\_PAREN} \\ 411 | & \quad \text{VALUES L\_PAREN } value\_list \text{ R\_PAREN SEMICOLON} 412 | \end{align*} 413 | 414 | \begin{align*} 415 | value\_list \quad \to & \quad\\ 416 | \text{PLUS}: & \quad expr[true] \text{ } \_value\_list \\ 417 | \text{MINUS}: & \quad expr[true] \text{ } \_value\_list \\ 418 | num: & \quad expr[true] \text{ } \_value\_list \\ 419 | \text{L\_PAREN}: & \quad expr[true] \text{ } \_value\_list 420 | \end{align*} 421 | 422 | \begin{align*} 423 | \_value\_list \quad \to & \quad\\ 424 | \text{COMMA}: & \quad \text{COMMA } expr[true] \text{ } \_value\_list \\ 425 | \text{R\_PAREN}: & \quad \epsilon 426 | \end{align*} 427 | 428 | \begin{align*} 429 | delete\_stmt \quad \to & \quad\\ 430 | \text{DELETE}: & \quad \text{DELETE FROM } id \text{ } where\_clause \text{ SEMICOLON} 431 | \end{align*} 432 | 433 | 434 | \begin{align*} 435 | disjunct \quad \to & \quad\\ 436 | \text{L\_PAREN}: & \quad conjunct \text{ } \_disjunct \\ 437 | \text{NOT}: & \quad conjunct \text{ } \_disjunct \\ 438 | \text{PLUS}: & \quad conjunct \text{ } \_disjunct \\ 439 | \text{MINUS}: & \quad conjunct \text{ } \_disjunct \\ 440 | num: & \quad conjunct \text{ } \_disjunct \\ 441 | id: & \quad conjunct \text{ } \_disjunct 442 | \end{align*} 443 | 444 | \begin{align*} 445 | \_disjunct \quad \to & \quad\\ 446 | \text{OR}: & \quad \text{OR } conjunct \text{ } \_disjunct\\ 447 | \text{SEMICOLON}: & \quad \epsilon \\ 448 | \text{R\_PAREN}: & \quad \epsilon 449 | \end{align*} 450 | 451 | \begin{align*} 452 | conjunct \quad \to & \quad\\ 453 | \text{L\_PAREN}: & \quad bool \text{ } \_conjunct \\ 454 | \text{NOT}: & \quad bool \text{ } \_conjunct \\ 455 | \text{PLUS}: & \quad bool \text{ } \_conjunct \\ 456 | \text{MINUS}: & \quad bool \text{ } \_conjunct \\ 457 | num: & \quad bool \text{ } \_conjunct \\ 458 | id: & \quad bool \text{ } \_conjunct 459 | \end{align*} 460 | 461 | \begin{align*} 462 | \_conjunct \quad \to & \quad\\ 463 | \text{AND}: & \quad \text{AND } bool \text{ } \_conjunct\\ 464 | \text{OR}: & \quad \epsilon \\ 465 | \text{SEMICOLON}: & \quad \epsilon \\ 466 | \text{R\_PAREN}: & \quad \epsilon 467 | \end{align*} 468 | 469 | \begin{align*} 470 | bool \quad \to & \quad\\ 471 | \text{L\_PAREN}: & \quad \text{L\_PAREN } disjunct \text{ R\_PAREN} \\ 472 | \text{NOT}: & \quad \text{NOT } bool \\ 473 | \text{PLUS}: & \quad comp \\ 474 | \text{MINUS}: & \quad comp \\ 475 | num: & \quad comp \\ 476 | id: & \quad comp 477 | \end{align*} 478 | 479 | \begin{align*} 480 | comp \quad \to & \quad\\ 481 | \text{PLUS}: & \quad expr[false] \text{ } rop \text{ } expr[false] \\ 482 | \text{MINUS}: & \quad expr[false] \text{ } rop \text{ } expr[false] \\ 483 | num: & \quad expr[false] \text{ } rop \text{ } expr[false] \\ 484 | id: & \quad expr[false] \text{ } rop \text{ } expr[false] 485 | \end{align*} 486 | 487 | Here things like $term[simple]$ means passing down the current value $simple$. 488 | 489 | \begin{align*} 490 | expr[simple] \quad \to & \quad\\ 491 | \text{PLUS}: & \quad term[simple] \text{ } \_expr[simple] \\ 492 | \text{MINUS}: & \quad term[simple] \text{ } \_expr[simple] \\ 493 | num: & \quad term[simple] \text{ } \_expr[simple] \\ 494 | if (simple == false): &\\ 495 | \text{L\_PAREN}: & \quad term[simple] \text{ } \_expr[simple] \\ 496 | if (simple == true): &\\ 497 | id: & \quad term[simple] \text{ } \_expr[simple] 498 | \end{align*} 499 | 500 | \begin{align*} 501 | \_expr[simple] \quad \to & \quad\\ 502 | \text{PLUS}: &\quad \text{PLUS } term[simple] \text{ } \_expr[simple] \\ 503 | \text{MINUS}: &\quad \text{MINUS } term[simple] \text{ } \_expr[simple] \\ 504 | if (simple == true): &\\ 505 | \text{COMMA} : & \quad \epsilon \\ 506 | \text{R\_PAREN} : & \quad \epsilon \\ 507 | if (simple == true): &\\ 508 | \text{NEQ} : & \quad \epsilon \\ 509 | \text{EQ} : & \quad \epsilon \\ 510 | \text{LT} : & \quad \epsilon \\ 511 | \text{GT} : & \quad \epsilon \\ 512 | \text{LEQ} : & \quad \epsilon \\ 513 | \text{GEQ} : & \quad \epsilon \\ 514 | \text{AND} : & \quad \epsilon \\ 515 | \text{OR} : & \quad \epsilon \\ 516 | \text{SEMICOLON} : & \quad \epsilon \\ 517 | \text{R\_PAREN} : & \quad \epsilon 518 | \end{align*} 519 | 520 | \begin{align*} 521 | term[simple] \quad \to & \quad\\ 522 | \text{PLUS}: & \quad unary[simple] \text{ } \_term[simple] \\ 523 | \text{MINUS}: & \quad unary[simple] \text{ } \_term[simple] \\ 524 | num: & \quad unary[simple] \text{ } \_term[simple] \\ 525 | if (simple == false): &\\ 526 | \text{L\_PAREN}: & \quad unary[simple] \text{ } \_term[simple] \\ 527 | if (simple == true): &\\ 528 | id: & \quad unary[simple] \text{ } \_term[simple] 529 | \end{align*} 530 | 531 | \begin{align*} 532 | \_term[simple] \quad \to & \quad\\ 533 | \text{MUL}: &\quad \text{MUL } unary[simple] \text{ } \_term[simple] \\ 534 | \text{DIV}: &\quad \text{DIV } unary[simple] \text{ } \_term[simple] \\ 535 | if (simple == true): &\\ 536 | \text{COMMA} : & \quad \epsilon \\ 537 | \text{R\_PAREN} : & \quad \epsilon \\ 538 | if (simple == true): &\\ 539 | \text{NEQ} : & \quad \epsilon \\ 540 | \text{EQ} : & \quad \epsilon \\ 541 | \text{LT} : & \quad \epsilon \\ 542 | \text{GT} : & \quad \epsilon \\ 543 | \text{LEQ} : & \quad \epsilon \\ 544 | \text{GEQ} : & \quad \epsilon \\ 545 | \text{AND} : & \quad \epsilon \\ 546 | \text{OR} : & \quad \epsilon \\ 547 | \text{SEMICOLON} : & \quad \epsilon \\ 548 | \text{R\_PAREN} : & \quad \epsilon 549 | \end{align*} 550 | 551 | \begin{align*} 552 | unary[simple] \quad \to & \quad\\ 553 | \text{PLUS}: & \quad \text{PLUS } unary[simple]\\ 554 | \text{MINUS}: & \quad \text{MINUS } unary[simple]\\ 555 | num: & \quad num \\ 556 | if (simple == false): &\\ 557 | \text{L\_PAREN}: & \quad \text{L\_PRAEN } expr[true] \text{ R\_PRAEN}\\ 558 | if (simple == true): &\\ 559 | id: & \quad id 560 | \end{align*} 561 | 562 | \begin{align*} 563 | rop \quad \to & \quad\\ 564 | \text{NEQ} : & \quad \text{NEQ} \\ 565 | \text{EQ} : & \quad \text{EQ} \\ 566 | \text{LT} : & \quad \text{LT} \\ 567 | \text{GT} : & \quad \text{GT} \\ 568 | \text{LEQ} : & \quad \text{LEQ} \\ 569 | \text{GEQ} : & \quad \text{GEQ} 570 | \end{align*} 571 | 572 | \begin{align*} 573 | query\_stmt \quad \to & \quad\\ 574 | \text{SELECT}: & \quad \text{SELECT } select\_list \text{ FROM } id \text{ } where\_clause \text{ SEMICOLON} 575 | \end{align*} 576 | 577 | \begin{align*} 578 | select\_list \quad \to & \quad\\ 579 | \text{MUL}: & \quad \text{MUL} \\ 580 | id: & \quad column\_list 581 | \end{align*} 582 | 583 | \subsection{About expressions} 584 | 585 | To handle expressions, we build expression trees during parsing, then attach them to statements as a component for later evaluation(in-order). An example is shown in Figure~\ref{fig:tree}. 586 | 587 | \begin{figure}[H] 588 | \centering 589 | \begin{tikzpicture}[level 1/.style={level distance=1.5cm}] 590 | \Tree 591 | [.GT 592 | [.PLUS 593 | [.DIV [.MUL [.MINUS [.NONE ] [.ID age ] ] [.NUM 5 ] ] [.NUM 3 ] ] 594 | [.MUL [.ID height ] [.NUM 2 ] ] 595 | ] 596 | [.NUM 5 ] 597 | ] 598 | \end{tikzpicture} 599 | \caption{Expression tree for $-age * 5 / 3 + height * 2 > 5$} 600 | \label{fig:tree} 601 | \end{figure} 602 | 603 | \subsection{Statements} 604 | During parsing, we collect items for list-type components in statements. For example, when parsing a column list, we pass a vector around, pushing $id$s into it each time a terminal $id$ is consumed. Similarly, default specifications for a $Create$ statement is a map from strings to int, value list is a vector of int, etc. In this way, we can completely seperate the parser from the table. 605 | 606 | \section{Database Design} 607 | 608 | As shown in Figure~\ref{fig:db}, we divide the backend into two parts: engine and table. 609 | 610 | Engine is responsible for inter-table operations. It receives statements produced by the parser, performs basic semantic analysis(those that can be done without knowing the data or schema in any table), and keeps track of tables in the memory. Engine can extract legal components inside statements (e.g. a vector of primary keys in a $Create$ statement), and pass it down to the right table to perform transactions. 611 | 612 | Table is responsible for intra-table operations. It receives components of statements from the engine, performs semantic analysis that needs the knowledge of its data and schema, and finishes transactions. 613 | 614 | Both engine and table provide services for creating tables, inserting records, deleting records and querying records, though at different levels. 615 | 616 | \begin{figure}[H] 617 | \centering 618 | \begin{tikzpicture}[->,>=stealth'] 619 | \node[state, 620 | node distance=3cm, 621 | text width=4.5cm, 622 | ] (ENGINE) 623 | {\begin{tabular}{l} 624 | \textbf{Engine}\\[0.3em] 625 | \parbox{4cm}{Analyze statments, 626 | perform simple semantic analysis, 627 | keep track of tables 628 | } 629 | \end{tabular}}; 630 | 631 | \node[state, 632 | right of=ENGINE, 633 | node distance=10cm, 634 | text width=4.5cm] (TABLE) 635 | {\begin{tabular}{l} 636 | \textbf{Table}\\[0.3em] 637 | \parbox{4cm}{Perform transactions and 638 | semantic analysis that needs the schema 639 | } 640 | \end{tabular}}; 641 | 642 | \path (ENGINE) edge node[anchor=west,above] 643 | { 644 | Components of Statements 645 | }(TABLE) 646 | ; 647 | \end{tikzpicture} 648 | \caption{Database Design} 649 | \label{fig:db} 650 | \end{figure} 651 | 652 | \subsection{Error Recovery} 653 | 654 | For errors ocurred during lexical analysis, if the invalid lexeme appears as a start symbol, we stop the program since this can lead to numerous parse errors. Otherwise we simply igonre this lexeme, and ignore the whole statement if necessary. 655 | 656 | For parse errors, runtime errors(division by zero, column can't be found in the schema) and database errors, we skip to the next start symbol, and parse the next statement. 657 | 658 | \section{Complexity Analysis} 659 | 660 | Our implementation of the lexer is rather simple. It only needs to backtrack when matching operators that can be a prefix of another operator(e.g. $<$ and $<=$). Since in this language the length of an operator is no more than 2, the overhead of the backracking can be ignored. 661 | 662 | For this context-free grammar, we implement a LL(1) predictive parser. The overhead is also negligible. 663 | 664 | Since it is not a database course, we implement the backend in a naive way. The data is stored unsorted in a 2-dimensional vector of ints. 665 | 666 | The creation of tables has a negligible overhead when there are just a few tables in the database. The other three operations, however, has a bigger overhead because of the naive implementation. Assume that a table has $k$ columns, $N$ records, the complexity of insertion is about $O(Nk)$ since we need to check key constraints against every record. For a where clause containing an expression with $E$ operations, deleting a record costs about $O(NE)$, querying a record costs about $O(NEk)$ ($O(NE)$ if no reordering is needed). Again, this is not a database course, so we didn't build any special indexes for it. This performance is sufficient for testing our implementation of the language. 667 | 668 | \end{document} 669 | -------------------------------------------------------------------------------- /doc/screenshots/create1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/create1.png -------------------------------------------------------------------------------- /doc/screenshots/create2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/create2.png -------------------------------------------------------------------------------- /doc/screenshots/create3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/create3.png -------------------------------------------------------------------------------- /doc/screenshots/create4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/create4.png -------------------------------------------------------------------------------- /doc/screenshots/create5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/create5.png -------------------------------------------------------------------------------- /doc/screenshots/delete1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/delete1.png -------------------------------------------------------------------------------- /doc/screenshots/delete2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/delete2.png -------------------------------------------------------------------------------- /doc/screenshots/delete3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/delete3.png -------------------------------------------------------------------------------- /doc/screenshots/error1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/error1.png -------------------------------------------------------------------------------- /doc/screenshots/error2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/error2.png -------------------------------------------------------------------------------- /doc/screenshots/exp1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/exp1.png -------------------------------------------------------------------------------- /doc/screenshots/expr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/expr1.png -------------------------------------------------------------------------------- /doc/screenshots/expr2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/expr2.png -------------------------------------------------------------------------------- /doc/screenshots/expr3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/expr3.png -------------------------------------------------------------------------------- /doc/screenshots/expr4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/expr4.png -------------------------------------------------------------------------------- /doc/screenshots/expr5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/expr5.png -------------------------------------------------------------------------------- /doc/screenshots/insert1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert1.png -------------------------------------------------------------------------------- /doc/screenshots/insert2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert2.png -------------------------------------------------------------------------------- /doc/screenshots/insert3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert3.png -------------------------------------------------------------------------------- /doc/screenshots/insert4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert4.png -------------------------------------------------------------------------------- /doc/screenshots/insert5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert5.png -------------------------------------------------------------------------------- /doc/screenshots/insert6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert6.png -------------------------------------------------------------------------------- /doc/screenshots/insert7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert7.png -------------------------------------------------------------------------------- /doc/screenshots/insert8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/insert8.png -------------------------------------------------------------------------------- /doc/screenshots/query1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query1.png -------------------------------------------------------------------------------- /doc/screenshots/query2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query2.png -------------------------------------------------------------------------------- /doc/screenshots/query3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query3.png -------------------------------------------------------------------------------- /doc/screenshots/query4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query4.png -------------------------------------------------------------------------------- /doc/screenshots/query5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query5.png -------------------------------------------------------------------------------- /doc/screenshots/query6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query6.png -------------------------------------------------------------------------------- /doc/screenshots/query7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query7.png -------------------------------------------------------------------------------- /doc/screenshots/query8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query8.png -------------------------------------------------------------------------------- /doc/screenshots/query9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/query9.png -------------------------------------------------------------------------------- /doc/screenshots/zero1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/zero1.png -------------------------------------------------------------------------------- /doc/screenshots/zero2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/zero2.png -------------------------------------------------------------------------------- /doc/screenshots/zero3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/doc/screenshots/zero3.png -------------------------------------------------------------------------------- /doc/test.md: -------------------------------------------------------------------------------- 1 | # Test document for Compliers Course Project 2 | ### Class 1 - group 10 3 | 4 | | Name | Student ID | Job | 5 | |--------|--------|--------| 6 | | Fan Ziyao | 12330081 | Team leader, database implementation| 7 | | Chen Zeyu | 12330056 | System implementation | 8 | | Huang Long | 12330132 | Database implementation | 9 | | Zhang Qiuyi (Class 2) | 12330402 | Frontend, documentation | 10 | | Zhu Lichen (Class 2) | 12330439 | Testing | 11 | 12 | ## Create 13 | 14 | ### #1 Normal create 15 | 16 | #### Input 17 | 18 | CREATE TABLE Student(sid INT, 19 | age INT DEFAULT = 18, 20 | PRIMARY KEY (sid)); 21 | 22 | #### Expected Output 23 | Created table Student 24 | 25 | #### Result: Passed 26 | 27 | ![](screenshots/create1.png) 28 | 29 | ### #2 Two or more columns with the same name 30 | #### Input 31 | 32 | CREATE TABLE Student2(sid INT, 33 | age INT DEFAULT = 18, 34 | PRIMARY KEY (sid), 35 | age INT); 36 | 37 | #### Expected Output 38 | 39 | line #: Multiple definitions for age 40 | 41 | (`#` means a line number) 42 | 43 | #### Result: Passed 44 | 45 | ![](screenshots/create2.png) 46 | 47 | ### #3 Multiple primary key definitions 48 | #### Input 49 | 50 | CREATE TABLE Student3(sid INT, 51 | PRIMARY KEY (sid), 52 | age INT DEFAULT = 18, 53 | PRIMARY KEY (age)); 54 | 55 | #### Expected Output 56 | 57 | line #: Multiple primary key definitions 58 | 59 | #### Result: Passed 60 | ![](screenshots/create3.png) 61 | 62 | ### #4 Primary key is not defined as a column 63 | #### Input 64 | 65 | CREATE TABLE Student4(sid INT, 66 | PRIMARY KEY (sid, height), 67 | age INT DEFAULT = 18); 68 | 69 | #### Expected Output 70 | 71 | line #: Undefined key height 72 | 73 | #### Result: Passed 74 | ![](screenshots/create4.png) 75 | 76 | ### #5 Create an existing table 77 | #### Input 78 | 79 | CREATE TABLE Student(sid INT, 80 | age INT DEFAULT = 18, 81 | PRIMARY KEY (sid)); 82 | 83 | #### Expected Output 84 | 85 | line #: Table Student already exists 86 | 87 | #### Result: Passed 88 | ![](screenshots/create5.png) 89 | 90 | ## Insert 91 | 92 | ### #1 Normal insert 93 | #### Input 94 | 95 | INSERT INTO Student(sid, age) VALUES(1111, 18); 96 | 97 | #### Expected Output 98 | 99 | Inserted 1 rows into table Student 100 | 101 | #### Result: Passed 102 | ![](screenshots/insert1.png) 103 | 104 | ### #2 Duplicate column 105 | #### Input 106 | 107 | INSERT INTO Student(sid, age, age) VALUES(1111, 18, 19); 108 | 109 | #### Expected Output 110 | 111 | line #: Duplicate column age 112 | 113 | #### Result: Passed 114 | ![](screenshots/insert2.png) 115 | 116 | ### #3 Insert with a column that is not in the schema 117 | #### Input 118 | 119 | INSERT INTO Student(sid, height) VALUES(1111, 18); 120 | 121 | #### Expected Output 122 | 123 | line #: Column height is not in the schema 124 | 125 | #### Result: Passed 126 | ![](screenshots/insert3.png) 127 | 128 | ### #4 Insert with differnt number of columns and values 129 | #### Input 130 | 131 | INSERT INTO Student(sid, age) VALUES(1111, 18, 19); 132 | 133 | #### Expected Output 134 | 135 | line #: Numbers of columns and values do not match 136 | 137 | #### Result: Passed 138 | ![](screenshots/insert4.png) 139 | 140 | ### #5 Key constraint violation 141 | #### Input 142 | 143 | INSERT INTO Student(sid, age) VALUES(1111, 18); 144 | 145 | #### Expected Output 146 | 147 | line #: Record already exists 148 | 149 | #### Result: Passed 150 | ![](screenshots/insert5.png) 151 | 152 | ### #6 Insert into a table that doesn't exist 153 | #### Input 154 | 155 | INSERT INTO Student5(sid, age) VALUES(1111, 18); 156 | 157 | #### Expected Output 158 | 159 | line #: Cannot find table Student5 160 | 161 | #### Result: Passed 162 | ![](screenshots/insert6.png) 163 | 164 | ### #7 Insert without keys 165 | #### Input 166 | 167 | INSERT INTO Student(age) VALUES(20); 168 | 169 | #### Expected Output 170 | 171 | line #: Key sid not found 172 | 173 | #### Result: Passed 174 | ![](screenshots/insert7.png) 175 | 176 | ### #6 Insert with default values 177 | #### Input 178 | 179 | INSERT INTO Student(sid) VALUES(20); 180 | 181 | #### Expected Output 182 | 183 | Inserted 1 rows into table Student 184 | 185 | #### Result: Passed 186 | ![](screenshots/insert8.png) 187 | 188 | ## Query 189 | 190 | ### #1 Select every columns and every rows 191 | #### Input 192 | 193 | SELECT * FROM Student; 194 | 195 | #### Expected Output 196 | 197 | Records just inserted. 198 | 199 | #### Result: Passed 200 | ![](screenshots/query1.png) 201 | 202 | ### #2 Select specific columns 1 203 | #### Input 204 | 205 | SELECT sid, age FROM Student; 206 | 207 | #### Expected Output 208 | 209 | Data same as #1, with order of column switched. 210 | 211 | #### Result: Passed 212 | ![](screenshots/query2.png) 213 | 214 | ### #3 Select specific columns 2 215 | #### Input 216 | 217 | SELECT age FROM Student; 218 | 219 | #### Expected Output 220 | 221 | Data same as #1, with column age only. 222 | 223 | #### Result: Passed 224 | ![](screenshots/query3.png) 225 | 226 | ### #4 Select with no matching rows 227 | #### Input 228 | 229 | SELECT sid, age FROM Student WHERE age < 18; 230 | 231 | #### Expected Output 232 | 233 | No matching rows in Student 234 | 235 | #### Result: Passed 236 | ![](screenshots/query4.png) 237 | 238 | ### #5 Select with a simple where clause 239 | #### Input 240 | 241 | SELECT sid, age FROM Student WHERE age < sid; 242 | 243 | #### Expected Output 244 | 245 | Same as #2. 246 | 247 | #### Result: Passed 248 | ![](screenshots/query6.png) 249 | 250 | ### #6 Select a column that is not in the schema 251 | 252 | #### Input 253 | 254 | SELECT sid, height FROM Student WHERE age < 18; 255 | 256 | #### Expected Output 257 | 258 | line #: Column height is not in the schema 259 | 260 | #### Result: Passed 261 | ![](screenshots/query7.png) 262 | 263 | ### #7 Select with a where clause referencing a column not in the schema 264 | #### Input 265 | 266 | SELECT sid, age FROM Student WHERE height < 180; 267 | 268 | #### Expected Output 269 | 270 | line #, height not found in the scheme 271 | 272 | #### Result: Passed 273 | ![](screenshots/query8.png) 274 | 275 | ## Expressions 276 | ### #1 Select with a complex where clause 277 | #### Input 278 | 279 | INSERT INTO Student(sid, age) VALUES(90, 25); 280 | SELECT * FROM Student 281 | WHERE age + 7 > 19 + 6 && sid <> 6 / 3 - 2; 282 | 283 | #### Expected Output 284 | 285 | Only the record with age = 25, side = 90. 286 | 287 | #### Result: Passed 288 | ![](screenshots/expr1.png) 289 | 290 | ### #2 Select with a complex where clause 291 | #### Input 292 | 293 | SELECT sid, age FROM Student WHERE age + 1 / 2 >= 18 && sid <> 1111; 294 | 295 | #### Expected Output 296 | 297 | 2 records, one with age = 25, sid = 90, one with age = 18, sid = 20. 298 | 299 | #### Result: Passed 300 | ![](screenshots/expr2.png) 301 | 302 | ### #3 Insert with a complex value 303 | #### Input 304 | 305 | INSERT INTO Student(sid, age) VALUES(6 * 3 / 4 + 1, 32 * 3 - 6 / 2); 306 | SELECT * FROM Student; 307 | 308 | #### Expected Output 309 | 310 | Inserted 1 rows into table Student 311 | 312 | A record with age = 93, sid = 5 should appear in the query result. 313 | 314 | #### Result: Passed 315 | ![](screenshots/expr3.png) 316 | 317 | ### #4 Create a table with complex defaults 318 | #### Input 319 | 320 | CREATE TABLE Student2(sid INT DEFAULT = 6 * 3 / 4 + 1, 321 | age INT DEFAULT = 32 * 3 - 6 / 2, PRIMARY KEY (sid)); 322 | INSERT INTO Student2(sid) VALUES(0); 323 | SELECT * FROM Student2; 324 | 325 | #### Expected Output 326 | 327 | Created table Student2 328 | Inserted 1 rows into table Student2 329 | 330 | A record with age = 93, sid = 0 should appear in the query result. 331 | 332 | #### Result: Passed 333 | ![](screenshots/expr4.png) 334 | 335 | ### #5 Delete with a complex where clause 336 | #### Input 337 | 338 | SELECT * FROM Student; 339 | DELETE FROM Student 340 | WHERE age + 7 > 19 + 6 && sid <> 6 / 3 - 2; 341 | SELECT * FROM Student; 342 | 343 | #### Expected Output 344 | 345 | Before deletion, there should be 4 records in the query result. The delete statement should have a feed back: 346 | 347 | Deleted 2 rows from table Student 348 | 349 | After deletion, there should be 2 records in the query result, one with age = 18, sid = 1111, one with age = 18, age = 20. 350 | 351 | #### Result: Passed 352 | ![](screenshots/expr5.png) 353 | 354 | ## Delete 355 | ### #1 Delete with a simple where clause 356 | #### Input 357 | 358 | INSERT INTO Student(age, sid) VALUES(17, 25); 359 | SELECT * FROM Student; 360 | DELETE FROM Student WHERE age < 18 && age > 14; 361 | SELECT * FROM Student; 362 | 363 | #### Expected Output 364 | 365 | Before deletion, there should be 3 records in the query result. The delete statement should have a feed back: 366 | 367 | Deleted 1 rows from table Student 368 | 369 | After deletion, there should be 2 records in the query result, one with age = 18, sid = 1111, one with age = 18, age = 20. 370 | 371 | #### Result: Passed 372 | ![](screenshots/delete1.png) 373 | 374 | ### #2 Delete all rows 375 | #### Input 376 | 377 | SELECT * FROM Student; 378 | DELETE FROM Student; 379 | SELECT * FROM Student; 380 | 381 | #### Expected Output 382 | 383 | Before deletion, there should be 2 records in the query result. The delete statement should have a feed back: 384 | 385 | Deleted 2 rows from table Student 386 | 387 | After deletion, there should be no more records in the table. Hence the feedback should be: 388 | 389 | No matching rows in Student 390 | 391 | #### Result: Passed 392 | ![](screenshots/delete2.png) 393 | 394 | ### #3 Delete with a where clause referencing a column not in the schema 395 | #### Input 396 | 397 | INSERT INTO Student(sid, age) VALUES(1, 10); 398 | SELECT * FROM Student; 399 | DELETE FROM Student WHERE age < 18 && height > 180; 400 | SELECT * FROM Student; 401 | 402 | #### Expected Output 403 | 404 | Before deletion, there should be 1 record in the query result. The delete statement should have a feed back: 405 | 406 | line #, height not found in the scheme 407 | 408 | After deletion, the records in the table should not be changed. 409 | 410 | #### Result: Passed 411 | ![](screenshots/delete3.png) 412 | 413 | ## Division by zero 414 | 415 | ### Division by zero in simple expressions 416 | #### Input 417 | 418 | SELECT age FROM Student WHERE age / 0 + 7 > sid; 419 | INSERT INTO Student(sid, age) VALUES(6 * 3 / 0, 32 * 3 - 6 / 2); 420 | SELECT age FROM Student WHERE age / 0 + 7 > sid; 421 | 422 | #### Expected Output 423 | 424 | Before insertion, there should be 1 record in the query result. The delete statement should have a feed back: 425 | 426 | line #, Division by zero 427 | 428 | After insertion, the records in the table should not be changed. 429 | 430 | #### Result: Passed 431 | ![](screenshots/zero1.png) 432 | 433 | ### Division by zero in where clase 434 | #### Input 435 | 436 | INSERT INTO Student(sid, age) VALUES(6 * 3 / 0, 32 * 3 - 6 / 2); 437 | SELECT age FROM Student WHERE age / 0 + 7 > sid; 438 | 439 | #### Expected Output 440 | 441 | The delete statement should have a feed back: 442 | 443 | line #, Division by zero 444 | 445 | #### Result: Passed 446 | ![](screenshots/zero2.png) 447 | 448 | ### Divsion by zero in row evaluations 449 | #### Input 450 | 451 | INSERT INTO Student(sid, age) VALUES(77, 0); 452 | SELECT * FROM Student; 453 | SELECT * FROM Student WHERE sid / age > sid; 454 | 455 | #### Expected Output 456 | 457 | Before insertion, there should be 2 records in the query result. The query statement should have a feed back: 458 | 459 | line #, Division by zero 460 | 461 | #### Result: Passed 462 | ![](screenshots/zero3.png) 463 | 464 | ## Error Recoveries 465 | 466 | ### Parse error 467 | #### Input 468 | 469 | SELECT / FROM Student; 470 | 471 | #### Expected Output 472 | 473 | line #, column 7: Syntax error 474 | 475 | #### Result: Passed 476 | ![](screenshots/error1.png) 477 | 478 | ### Skip invalid lexeme 479 | #### Input 480 | 481 | SELECT * FROM Student; 482 | INSERT # INTO Student(sid, age) VALUES(55, 5); 483 | SELECT * FROM Student; 484 | 485 | #### Expected Output 486 | 487 | Before insertion, there should be 2 records in the query result. The insert statement should have a feed back: 488 | 489 | line #, column 9: Invalid lexeme # 490 | 491 | But this invalid lexeme will be ignored, followed by one more feedback: 492 | 493 | Inserted 1 rows into table Student 494 | 495 | After insertion, there will be a record with sid = 55, age = 5 in the table. 496 | 497 | #### Result: Passed 498 | ![](screenshots/error2.png) 499 | -------------------------------------------------------------------------------- /draft/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace ssql { 8 | 9 | using std::istream; 10 | using std::string; 11 | using std::map; 12 | 13 | #define BUF_SIZE 256 14 | #define MAX_OP_SIZE 2 15 | 16 | class LexError : std::exception { 17 | public: 18 | LexError(string _msg) : msg(_msg) {} 19 | ~LexError() row () {} // Updated 20 | const char *what() const throw() { 21 | return msg.c_str(); 22 | } 23 | private: 24 | string msg; 25 | }; 26 | 27 | class Lexer { 28 | public: 29 | Lexer(istream &s) : stream(s) { 30 | line = 0; 31 | initLookups(); 32 | } 33 | 34 | void initLookups() { 35 | words["create"] = CREATE; 36 | words["table"] = TABLE; 37 | words["int"] = INT; 38 | words["default"] = DEFAULT; 39 | words["primary"] = PRIMARY; 40 | words["key"] = KEY; 41 | words["insert"] = INSERT; 42 | words["into"] = INTO; 43 | words["values"] = VALUES; 44 | words["delete"] = DELETE; 45 | words["from"] = FROM; 46 | words["where"] = WHERE; 47 | words["select"] = SELECT; 48 | 49 | singleOp['+'] = PLUS; 50 | singleOp['-'] = MINUS; 51 | singleOp['*'] = MUL; 52 | singleOp['/'] = DIV; 53 | singleOp['('] = L_PAREN; 54 | singleOp[')'] = R_PAREN; 55 | singleOp[','] = COMMA; 56 | singleOp[';'] = SEMICOLON; 57 | 58 | ops["&&"] = AND; 59 | ops["||"] = OR; 60 | ops["!"] = NOT; 61 | ops["<"] = LT; 62 | ops[">"] = GT; 63 | ops["!="] = NEQ; 64 | ops["=="] = EQ; 65 | ops[">="] = GEQ; 66 | ops["<="] = LEQ; 67 | ops["+"] = PLUS; 68 | ops["-"] = MINUS; 69 | ops["*"] = MUL; 70 | ops["/"] = DIV; 71 | ops["("] = L_PAREN; 72 | ops[")"] = R_PAREN; 73 | ops[","] = COMMA; 74 | ops[";"] = SEMICOLON; 75 | } 76 | 77 | bool isOp (char ch) { 78 | return ch == '&' || ch == '|' || ch == '!' || ch == '<' 79 | || ch == '>' || ch == '=' || ch == '+' || ch == '-' 80 | || ch == '*' || ch == '/' || ch == '(' || ch == ')' 81 | || ch == ',' || ch == ';'; 82 | } 83 | 84 | char read_char() { 85 | stream.get(peek); 86 | col++; 87 | return peek; 88 | } 89 | 90 | // after returen, peek and buffers will retain recent values, 91 | // in particular, peek may contain values put back in the stream during last call 92 | // so next time next() is called, we need to intialize them properly 93 | Token next() { 94 | peek = stream.peek(); 95 | while (peek != EOF) { 96 | read_char(); 97 | if (isdigit(peek)) { // numbers 98 | num_buffer = 0; 99 | do { 100 | num_buffer = num_buffer * 10 + peek - '0'; 101 | read_char(); 102 | } while (!isEnd() && isdigit(peek)); 103 | 104 | if (peek != EOF) { 105 | stream.putback(peek); 106 | } 107 | 108 | return Token(NUM, &num_buffer, sizeof(int)); 109 | } else if (isalpha(peek) || peek == '_') { // keywords or identifilers 110 | memset(buffer, '\0', sizeof(buffer)); 111 | int cur = 0; 112 | do { 113 | buffer[cur++] = peek; 114 | } while (cur < BUF_SIZE && stream.get(peek) && (isalnum(peek) || peek == '_')); 115 | 116 | if (cur == BUF_SIZE) { 117 | throw LexError("Exceed maximun identifier length"); 118 | } 119 | if (peek != EOF) { 120 | stream.putback(peek); 121 | } 122 | 123 | string str(buffer); 124 | for (int i = 0; i < str.size(); ++i) { 125 | str[i] = tolower(str[i]); // case insensitive 126 | } 127 | 128 | if (words.find(str) != words.end()) { // keyword 129 | return Token(words[str]); 130 | } else { 131 | return Token(ID, str.c_str(), str.size()); 132 | } 133 | } else if (isspace(peek)) { // white space 134 | if (peek == '\n') 135 | line++; 136 | } else if (singleOp.find(peek) != singleOp.end()) { 137 | return Token(singleOp[peek]); // deterministic single character operators 138 | } else if (isOp(peek)) { // operators 139 | memset(buffer, '\0', sizeof(buffer)); 140 | int cur = 0; 141 | do { 142 | buffer[cur++] = peek; 143 | } while (cur < MAX_OP_SIZE && stream.get(peek) && isOp(peek)); 144 | 145 | if (peek != EOF) { 146 | stream.putback(peek); 147 | } 148 | 149 | string str(buffer); 150 | 151 | while (str.size() != 0 && ops.find(str) == ops.end()) { // too long 152 | char temp = str[str.size() - 1]; // last charactor 153 | str = str.substr(0, str.size() - 1); 154 | stream.putback(temp); 155 | } 156 | 157 | if (str.size() == 0) { 158 | throw LexError("Invalid operator"); 159 | } 160 | 161 | return Token(ops[str]); 162 | } else if (peek == EOF) { // error 163 | return Token(END); // $ 164 | } else { // error 165 | throw LexError("Invalid lexeme"); 166 | } 167 | } 168 | } 169 | 170 | int getLine() { 171 | return line; 172 | } 173 | 174 | map words; 175 | map singleOp; 176 | map ops; 177 | private: 178 | char peek; 179 | char buffer[BUF_SIZE]; 180 | int num_buffer; 181 | int line; // line number 182 | istream &stream; 183 | }; 184 | 185 | } -------------------------------------------------------------------------------- /draft/Parser.cpp: -------------------------------------------------------------------------------- 1 | class Statement { 2 | public: 3 | Statement(string _id) : id(_id) {} 4 | string getId() { return id; } 5 | virtual void execute(); 6 | private: 7 | string id; 8 | }; 9 | 10 | class Create : Statement { 11 | public: 12 | Create(const string _id, const map &_default_spec, 13 | const vector &_keys) : 14 | Statement(_id), 15 | default_spec(_default_spec), 16 | keys(_keys) {} 17 | 18 | const map &getDefaults() const { 19 | return default_spec; 20 | } 21 | 22 | const vector &getKeys() const { 23 | return keys; 24 | } 25 | 26 | private: 27 | string id; 28 | map default_spec; 29 | vector keys; 30 | }; 31 | 32 | class Insert : Statement { 33 | public: 34 | Create(const string _id, const vector &columns, 35 | const vector &values) : 36 | Statement(_id), 37 | default_spec(_default_spec), 38 | keys(_keys) {} 39 | 40 | const vector &getColumns() const { 41 | return columns; 42 | } 43 | 44 | const vector &getVales() const { 45 | return values; 46 | } 47 | 48 | private: 49 | string id; 50 | vector columns; 51 | vector values; 52 | }; 53 | 54 | class Delete : Statement { 55 | public: 56 | Delete(const string _id, const Expr &_where) 57 | : id(_id), where(_where) {} 58 | const Expr &getWhere() const { 59 | return where; 60 | } 61 | private: 62 | string id; 63 | Expr where; 64 | }; 65 | 66 | class Query : Statement { 67 | public: 68 | Query(const string _id, const vector _columns, 69 | const Expr &_where) 70 | : id(_id), columns(_columns), where(_where) {} 71 | const vector &getColumns() const { 72 | return columns; 73 | } 74 | const Expr &getWhere() const { 75 | return where; 76 | } 77 | private: 78 | string id; 79 | vector columns; // can contain '*' 80 | Expr where; 81 | }; 82 | 83 | 84 | class Parser { 85 | public: 86 | const Statement &ssql_stmt() { 87 | if (lookahead == CREATE) { 88 | return create_stmt(); 89 | } else if (lookahead == INSERT) { 90 | return insert_stmt(); 91 | } else if (lookahead == DELETE) { 92 | return delete_stmt(); 93 | } else if (lookahead == SELECT) { 94 | return query_stmt(); 95 | } else { 96 | throw ParseError("Syntax error"); 97 | } 98 | } 99 | 100 | const Create &create_stmt() { 101 | if (lookahead == CREATE) { 102 | // create_stmt -> CREATE TABLE id L_PAREN decl_list 103 | // R_PAREN SEMICOLON 104 | match(CREATE); match(TABLE); 105 | string table_id = id(); 106 | match(L_PAREN); 107 | map defaults; 108 | vector keys; 109 | decl_list(defaults, keys); 110 | match(R_PAREN) match(SEMICOLON); 111 | return Create(id, defaults, keys); 112 | } else { 113 | throw ParseError("Syntax error"); 114 | } 115 | } 116 | 117 | Parser &decl_list(map &defaults, vector &keys) { 118 | if (lookahead == NUM || lookahead == ID) { 119 | // decl_list -> decl _decl_list 120 | decl(defaults, keys); 121 | _decl_list(defaults, keys); 122 | } else { 123 | throw ParseError("Syntax error"); 124 | } 125 | } 126 | 127 | Parser &_decl_list(map &defaults, vector &keys) { 128 | if (lookahead == COMMA) { 129 | // _decl_list -> COMMA decl _decl_list 130 | match(COMMA); decl(defaults, keys); 131 | _decl_list(defaults, keys); 132 | } else if (lookahead == R_PAREN || lookahead == END) { 133 | ; // _decl_list -> epsilon 134 | } else { 135 | throw ParseError("Syntax error"); 136 | } 137 | } 138 | 139 | Parser &decl(map &defaults, vector &keys) { 140 | if (lookahead == ID) { 141 | // decl -> id INT default_spec 142 | string name = id(); 143 | match(INT); 144 | int num = default_spec(defaults); 145 | defaults[name] = num; 146 | } else if (lookahead == PRIMARY) { 147 | // decl -> PRIMARY KEY L_PAREN column_list R_PAREN 148 | match(PRIMARY); match(KEY); match(L_PAREN); 149 | column_list(keys); 150 | match(R_PAREN); 151 | } else { 152 | throw ParseError("Syntax error"); 153 | } 154 | } 155 | 156 | int default_spec(map &defaults) { 157 | if (lookahead == DEFAULT) { 158 | // default_spec -> DEFAULT ASSIGN num 159 | match(DEFAULT); match(ASSIGN); 160 | return num(); 161 | } else if (lookahead == COMMA || lookahead == R_PAREN || lookahead == END) { 162 | // default_spec -> epsilon 163 | return 0; // if no default, default to zero 164 | } else { 165 | throw ParseError("Syntax error"); 166 | } 167 | } 168 | 169 | Parser &column_list(vector &names) { 170 | if (lookahead == ID) { 171 | // column_list -> id _column_list 172 | names.push_back(id()); 173 | _column_list(names); 174 | } else { 175 | throw ParseError("Syntax error"); 176 | } 177 | } 178 | 179 | Parser &_column_list(vector &names) { 180 | if (lookahead == COMMA) { 181 | // _column_list -> COMMA id _column_list 182 | match(COMMA); 183 | names.push_back(id()); 184 | _column_list(names); 185 | } else if (lookahead == R_PAREN || lookahead == END){ 186 | ; // _column_list -> epsilon 187 | } else { 188 | throw ParseError("Syntax error"); 189 | } 190 | } 191 | 192 | const Insert &insert_stmt() { 193 | if (lookahead == INSERT) { 194 | // insert_stmt -> INSERT INTO id L_PAREN column_list R_PAREN 195 | // VALUES L_PAREN value_list R_PAREN SEMICOLON 196 | match(INSERT); match(INTO); 197 | string table_id = id(); 198 | match(L_PAREN); 199 | vector columns; 200 | column_list(columns); 201 | match(R_PAREN); 202 | vector values; 203 | value_list(values); 204 | match(R_PAREN); 205 | match(SEMICOLON); 206 | return Insert(table_id, columns, values); 207 | } else { 208 | throw ParseError("Syntax error"); 209 | } 210 | } 211 | 212 | Parser &value_list(vector values) { 213 | if (lookahead == NUM) { 214 | // value_list -> num _value_list 215 | values.push_back(num()); 216 | _value_list(values); 217 | } else { 218 | throw ParseError("Syntax error"); 219 | } 220 | } 221 | 222 | Parser &_value_list(vector values) { 223 | if (lookahead == COMMA) { 224 | // _value_list -> COMMA num _value_list 225 | match(COMMA); 226 | values.push_back(num()); 227 | _value_list(values); 228 | } else if (lookahead == R_PAREN || lookahead == END){ 229 | ; // _value_list -> epsilon 230 | } else { 231 | throw ParseError("Syntax error"); 232 | } 233 | } 234 | 235 | const Delete &delete_stmt() { 236 | if (lookahead == DELETE) { 237 | // delete_stmt -> DELETE FROM id where_clause SEMICOLON 238 | match(DELETE); match(FROM); 239 | string table_id = id(); 240 | Expr where = where_clause(); 241 | match(SEMICOLON); 242 | return Delete(table_id, where); 243 | } else { 244 | throw ParseError("Syntax error"); 245 | } 246 | } 247 | 248 | Expr where_clause(Expr &where) { 249 | if (lookahead == WHERE) { 250 | // where_clause -> WHERE conjunct_list 251 | match(WHERE); 252 | return conjunct_list(); 253 | } else if (lookahead == SEMICOLON || lookahead == END) { 254 | ; // where_clause -> epsilon 255 | } else { 256 | throw ParseError("Syntax error"); 257 | } 258 | 259 | } 260 | 261 | Expr conjunct_list() { 262 | if (lookahead == NUM || lookahead == ID) { 263 | // conjunct_list -> bool _conjunct_list 264 | Expr temp = boolean(); 265 | Expr test = _conjunct_list(); 266 | if (test.isNull()) { // test lack a left node 267 | return temp; 268 | } else { 269 | test.setLeft(temp); 270 | return test; 271 | } 272 | } else { 273 | throw ParseError("Syntax error"); 274 | } 275 | } 276 | 277 | Expr _conjunct_list() { 278 | if (lookahead == AND) { 279 | // _conjunct_list -> AND bool _conjunct_list 280 | match(AND); 281 | Expr root(AND); 282 | 283 | Expr temp = boolean(); 284 | Expr test = _conjunct_list(); 285 | if (test.isNull()) { 286 | root.setRight(temp); 287 | return root; 288 | } else { 289 | test.setLeft(temp); 290 | root.setRight(test); 291 | return root; 292 | } 293 | } else if (lookahead == SEMICOLON || lookahead == END) { 294 | return NULL_EXPR; 295 | } else { 296 | throw ParseError("Syntax error"); 297 | } 298 | } 299 | 300 | Expr boolean() { 301 | if (lookahead == NUM || lookahead == ID) { 302 | Expr temp; 303 | temp.setLeft(operand()); 304 | temp.setType(rop()); 305 | temp.setRight(operand()); 306 | return temp; 307 | } else { 308 | throw ParseError("Syntax error"); 309 | } 310 | } 311 | 312 | Expr operand() { 313 | if (lookahead == NUM) { 314 | int result = num(); 315 | Expr temp(NUM); 316 | temp.setValue(Token(NUM, &result, sizeof(result))); 317 | return temp; 318 | } else if (lookahead == ID) { 319 | string result = id(); 320 | Expr temp(ID); 321 | temp.setValue(Token(ID, result.c_str(), result.size())); 322 | return temp; 323 | } else { 324 | throw ParseError("Syntax error"); 325 | } 326 | } 327 | 328 | Type rop() { 329 | if (lookahead == NEQ || lookahead == EQ 330 | || lookahead == LT || lookahead == GT 331 | || lookahead == LEQ || lookahead == GEQ) { 332 | Type result = lookahead.getType() 333 | match(result); 334 | return result; 335 | } else { 336 | throw ParseError("Syntax error"); 337 | } 338 | } 339 | 340 | const Query &query_stmt() { 341 | if (lookahead == SELECT) { 342 | // query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 343 | vector columns; 344 | match(SELECT); select_list(columns); match(FROM); 345 | string table_id = id(); 346 | Expr where = where_clause(); 347 | match(SEMICOLON); 348 | return Query(table_id, columns, where); 349 | } else { 350 | throw ParseError("Syntax error"); 351 | } 352 | } 353 | 354 | Parser &select_list(vector &columns) { 355 | if (lookahead == MUL) { 356 | // select_list -> MUL 357 | columns.push_back("*"); 358 | match(MUL); 359 | } else { 360 | // select_list -> column_list 361 | column_list(columns); 362 | } else { 363 | throw ParseError("Syntax error"); 364 | } 365 | } 366 | 367 | string id() { 368 | if (lookahead == ID) { 369 | string result = lookahead.getId(); 370 | lookahead = lexer.next(); 371 | return result; 372 | } else { 373 | throw ParseError("Syntax error"); 374 | } 375 | } 376 | 377 | int &num() { 378 | if (lookahead == NUM) { 379 | int result = lookahead.getNumber(); 380 | lookahead = lexer.next(); 381 | return result; 382 | } else { 383 | throw ParseError("Syntax error"); 384 | } 385 | } 386 | 387 | Parser &match(Type t) { 388 | if (lookahead == t) { 389 | lookahead = lexer.next(); 390 | } else { 391 | throw ParseError("Syntax error"); 392 | } 393 | } 394 | 395 | bool isEnd() { 396 | return lookahead == END; 397 | } 398 | 399 | private: 400 | Token lookahead; 401 | Lexer &lexer; 402 | }; -------------------------------------------------------------------------------- /draft/Token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using std::string; 7 | using std::cin; 8 | using std::cout; 9 | 10 | enum Type { 11 | ID, NUM, CREATE, TABLE, INT, DEFAULT, PRIMARY, KEY, INSERT, 12 | INTO, VALUES, DELETE, FROM, WHERE, SELECT, LT, GT, NEQ, EQ, 13 | GEQ, LEQ, PLUS, MINUS, MUL, DIV, AND, OR, NOT, L_PAREN, R_PAREN, 14 | COMMA, SEMICOLON, END 15 | }; 16 | 17 | class TokenError : std::exception { 18 | public: 19 | TokenError(string _msg) : msg(_msg) {} 20 | ~TokenError() throw () {} // Updated 21 | const char* what() const throw() { return msg.c_str(); } 22 | private: 23 | string msg; 24 | }; 25 | 26 | // avoid RTTI since Token's value will be frequently needed 27 | class Token { 28 | public: 29 | Token(Type _type, const void *raw=NULL, const int size=0) { 30 | if (isValue(_type) && (raw == NULL) || size == 0) { 31 | throw TokenError("Expected non-keyword"); 32 | } 33 | 34 | int real_size = size; 35 | if (_type == ID) { 36 | real_size++; // '\0' 37 | } 38 | 39 | if (isValue(_type)) { 40 | data = (char *)malloc(real_size); 41 | memcpy(data, (char *)raw, real_size / sizeof(char)); 42 | } 43 | type = _type; 44 | } 45 | 46 | bool isValue(Type t) const { // number or identifier 47 | return t == NUM || t == ID; 48 | } 49 | 50 | void initNameMap() { 51 | name[ID] = "ID"; 52 | name[NUM] = "NUM"; 53 | name[CREATE] = "CREATE"; 54 | name[TABLE] = "TABLE"; 55 | name[INT] = "INT"; 56 | name[DEFAULT] = "DEFAULT"; 57 | name[PRIMARY] = "PRIMARY"; 58 | name[KEY] = "KEY"; 59 | name[INSERT] = "INSERT"; 60 | name[INTO] = "INTO"; 61 | name[VALUES] = "VALUES"; 62 | name[DELETE] = "DELETE"; 63 | name[FROM] = "FROM"; 64 | name[WHERE] = "WHERE"; 65 | name[SELECT] = "SELECT"; 66 | name[LT] = "LT"; 67 | name[GT] = "GT"; 68 | name[NEQ] = "NEQ"; 69 | name[EQ] = "EQ"; 70 | name[GEQ] = "GEQ"; 71 | name[LEQ] = "LEQ"; 72 | name[PLUS] = "PLUS"; 73 | name[MINUS] = "MINUS"; 74 | name[MUL] = "MUL"; 75 | name[DIV] = "DIV"; 76 | name[AND] = "AND"; 77 | name[OR] = "OR"; 78 | name[NOT] = "NOT"; 79 | name[L_PAREN] = "L_PAREN"; 80 | name[R_PAREN] = "R_PAREN"; 81 | name[COMMA] = "COMMA"; 82 | name[SEMICOLON] = "SEMICOLON"; 83 | name[END] = "END"; 84 | } 85 | 86 | // identifiers 87 | string getId() const { 88 | if (type != ID){ 89 | throw TokenError("Expected identifer, get otherwise"); 90 | } else { 91 | return string(data); 92 | } 93 | } 94 | 95 | // keywords 96 | Type getKeyword() const { 97 | if (isValue(type)) { 98 | throw TokenError("Expected keywords, get otherwise"); 99 | } else { 100 | return type; 101 | } 102 | } 103 | 104 | // numbers 105 | int getNumber() const { 106 | if (type != NUM) { 107 | throw TokenError("Expected number, get otherwise"); 108 | } else { 109 | int result = 0; 110 | memcpy(&result, data, sizeof(int)); 111 | return result; 112 | } 113 | } 114 | 115 | bool operator== (Type rhs) { 116 | return type == rhs; 117 | } 118 | 119 | friend ostream &operator<<(ostream &s, const Token &token) { 120 | s << name[type] << ": "; 121 | if (type == ID) { 122 | s << "(" << name[type] << ", " << getId() << " ) "; 123 | } else if (type == NUM) { 124 | s << "(" << name[type] << ", " << getNumber() << " ) "; 125 | } else { 126 | s << name[type] << " "; 127 | } 128 | } 129 | 130 | Type getType() const { 131 | return type; 132 | } 133 | 134 | static map name; 135 | private: 136 | Type type; 137 | char *data; 138 | }; 139 | 140 | /* 141 | int main(void) { 142 | // usage: 143 | int v = 15; 144 | string s("hey!"); 145 | Type id = SELECT; 146 | const char *a = s.c_str(); 147 | 148 | try { 149 | Token token(NUM, &v, sizeof(int)); 150 | int value = token.getNumber(); 151 | cout << value << '\n'; 152 | cout << s.size() << '\n'; 153 | 154 | Token token2(ID, s.c_str(), s.size()); 155 | string str = token2.getId(); 156 | cout << str << '\n'; 157 | 158 | Token token3(SELECT, s.c_str(), s.size()); 159 | cout << (token3.getKeyword() == SELECT) << '\n'; 160 | 161 | Token token4(SELECT, s.c_str(), s.size()); 162 | string str2 = token3.getId(); // throw expection 163 | cout << str2 << '\n'; 164 | 165 | } catch(TokenError e) { 166 | cout << e.what() << '\n'; 167 | } 168 | 169 | 170 | return 0; 171 | } 172 | */ 173 | -------------------------------------------------------------------------------- /draft/draft.txt: -------------------------------------------------------------------------------- 1 | enum Word { 2 | CREATE, 3 | TABLE, 4 | INT, 5 | DEFAULT, 6 | PRIMARY, 7 | KEY, 8 | INSERT, 9 | INTO, 10 | VALUES, 11 | DELETE, 12 | FROM, 13 | WHERE, 14 | SELECT 15 | } 16 | 17 | keyword = { 18 | "create", 19 | "table", 20 | "int", 21 | "default", 22 | "primary", 23 | "key", 24 | "insert", 25 | "into", 26 | "values", 27 | "delete", 28 | "from", 29 | "where", 30 | "select" 31 | } 32 | 33 | enum Arith { 34 | PLUS, 35 | MINUS, 36 | MUL, 37 | DIV 38 | } 39 | 40 | arith_op = { 41 | "+", 42 | "-", 43 | "*", 44 | "/" 45 | } 46 | 47 | enum Rel { 48 | LT, 49 | GT, 50 | NEQ, 51 | EQ, 52 | GEQ, 53 | LEQ 54 | } 55 | 56 | rel_op = { 57 | "<", 58 | ">", 59 | "<>", 60 | "==", 61 | ">=", 62 | "<=" 63 | } 64 | 65 | enum Logic { 66 | AND, 67 | OR, 68 | NOT 69 | } 70 | 71 | 72 | logic_op = { 73 | "&&", 74 | "||", 75 | "!" 76 | } 77 | 78 | enum Sep { 79 | 80 | } 81 | L = 5 82 | 83 | other = { 84 | "(", 85 | ")", 86 | ",", 87 | ";" 88 | } 89 | 90 | 91 | SINGLEOP = { 92 | "+", 93 | "-", 94 | "*", 95 | "/", 96 | "!" 97 | "(", 98 | ")", 99 | ",", 100 | ";" 101 | } 102 | 103 | DOUBLEOP = { 104 | "&&", 105 | "||", 106 | "==" 107 | } 108 | 109 | VAROP = { 110 | "<", 111 | ">", 112 | "<>", 113 | ">=", 114 | "<=" 115 | } 116 | 117 | map keywords to Word 118 | ===================== 119 | 120 | Lexer: 121 | 122 | 123 | ssql_stmt -> create_stmt 124 | | insert_stmt 125 | | delete_stmt 126 | | query_stmt 127 | 128 | ===================CREATE================= 129 | create_stmt -> CREATE TABLE id L_PAREN decl_list R_PAREN SEMICOLON 130 | 131 | #decl_list -> decl | decl_list COMMA decl 132 | decl_list -> decl _decl_list 133 | _decl_list -> COMMA decl _decl_list | epsilon 134 | 135 | decl -> id INT default_spec | PRIMARY KEY L_PAREN column_list R_PAREN 136 | 137 | # !!! 138 | # default_spec -> DEFAULT EQ num | epsilon 139 | 140 | default_spec -> DEFAULT EQ expr[true] | epsilon 141 | 142 | 143 | #column_list -> id | column_list COMMA id 144 | column_list -> id _column_list 145 | _column_list -> COMMA id _column_list | epsilon 146 | 147 | 148 | ==================INSERT=================== 149 | insert_stmt -> INSERT INTO id L_PAREN column_list R_PAREN VALUES L_PAREN value_list R_PAREN SEMICOLON 150 | 151 | #value_list -> expr[true] | value_list COMMA expr[true] 152 | 153 | #!!! 154 | value_list -> expr[true] _value_list 155 | _value_list -> COMMA expr[true] _value_list | epsilon 156 | 157 | ==================DELETE================== 158 | 159 | delete_stmt -> DELETE FROM id where_clause SEMICOLON 160 | 161 | #!!! 162 | # where_clause -> WHERE conjunct_list | epsilon 163 | 164 | where_clause -> WHERE disjunct | epsilon 165 | 166 | #conjunct_list -> bool | conjunct_list AND bool 167 | #conjunct_list -> bool _conjunct_list 168 | #_conjunct_list -> AND bool _conjunct_list | epsilon 169 | 170 | disjunct -> conjunct _disjunct 171 | _disjunct -> OR conjunct _disjunct | epsilon 172 | 173 | conjunct -> bool _conjunct 174 | _conjunct -> AND bool _conjunct | epsilon 175 | 176 | bool -> L_PAREN disjunct R_PAREN | NOT bool | comp 177 | comp -> expr[false] rop expr[false] 178 | 179 | expr[] -> term[] _expr[] 180 | _expr[] -> PLUS term[] _expr[] | MINUS term[] _expr[] | epsilon 181 | term[] -> unary[] _term[] 182 | _term[] -> MUL unary[] _term[] | DIV unary[] _term[] | epsilon 183 | unary[] -> PLUS unary[] | MINUS unary[] | num 184 | unary[simple=true] -> L_PAREN expr[simple=true] R_PAREN 185 | unary[simple=false] -> id 186 | 187 | # bool -> operand rop operand 188 | # operand -> num | id 189 | rop -> NEQ | EQ | LT | GT | LEQ | GEQ 190 | 191 | ====================SELECT================= 192 | 193 | query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 194 | select_list -> column_list | MUL 195 | 196 | ====================SUMMARY=============== 197 | 198 | terminals: 199 | id, num, CREATE, TABLE, INT, DEFAULT, PRIMARY, KEY, INSERT, INTO, VALUES, DELETE, FROM, WHERE, SELECT, LT, GT, NEQ, EQ, GEQ, LEQ, PLUS, MINUS, MUL, DIV, AND, OR, NOT, L_PAREN, R_PAREN, COMMA, SEMICOLON 200 | 201 | nonterminals: 202 | create_stmt, 203 | decl_list, 204 | _decl_list, 205 | decl, 206 | default_spec, 207 | column_list, 208 | _column_list, 209 | insert_stmt, 210 | value_list, 211 | _value_list, 212 | delete_stmt, 213 | where_clause, 214 | conjunct_list, 215 | _conjunct_list, 216 | bool, 217 | operand, 218 | rop, 219 | query_stmt, 220 | select_list 221 | -------------------------------------------------------------------------------- /draft/engine.cpp: -------------------------------------------------------------------------------- 1 | class DataBaseError : std::exception { 2 | DataBaseError(string _msg) : msg(_msg) {} 3 | ~DataBaseError() throw () {} // Updated 4 | const char *what() const throw() { 5 | return msg.c_str(); 6 | } 7 | private: 8 | string msg; 9 | }; 10 | 11 | #define MAX_COL 100 12 | #define MAX_KEY 100 13 | 14 | struct Scheme { 15 | string name; 16 | int def; 17 | bool is_key; 18 | Scheme(string _name, int def, bool _is_key) 19 | : name(_name), def(_def), is_key(_is_key) {} 20 | }; 21 | 22 | 23 | int main(int argc, char const *argv[]) { 24 | Lexer *lexptr; 25 | ofstream out; 26 | ifstream in; // must be in the scope until stop scanning 27 | 28 | if (argc > 1) { 29 | in.open(argv[1], ifstream::in); 30 | if (in.is_open()) { 31 | lexptr = new Lexer(in); 32 | } else { 33 | cout << "Fail to open " << argv[1] << '\n'; 34 | exit(1); 35 | } 36 | } else { 37 | lexptr = new Lexer(cin); 38 | } 39 | 40 | Parser parser(*lexptr); 41 | Engine engine; 42 | 43 | while (!parser.isEnd()) { 44 | try { 45 | Type next = parser.next_stmt_type(); 46 | if (next == CREATE) { 47 | Create create_stmt = parser.create_stmt(); 48 | engine.create(create_stmt); 49 | cout << "Created table " << create_stmt.getId() << "\n"; 50 | } else if (next == INSERT) { 51 | Insert insert_stmt = parser.insert_stmt(); 52 | int number = engine.insert(insert_stmt); 53 | cout << "Inserted " << number << " rows into table "; 54 | cout << insert_stmt.getId() << "\n"; 55 | } else if (next == DELETE) { 56 | Delete delete_stmt = parser.delete_stmt(); 57 | int number = engine.del(insert_stmt); 58 | cout << "Deleted " << number << " rows from table "; 59 | cout << delete_stmt.getId() << "\n"; 60 | } else if (next == SELECT) { 61 | Query query_stmt = parser.query_stmt(); 62 | string table_id = query_stmt.getId(); 63 | 64 | vector > results; 65 | vector names = query_stmt.getColumns(); 66 | 67 | int number = engine.query(query_stmt, results); 68 | if (number > 0) { 69 | if (std::find(name.begin(), names.end(), "*") != names.end()) { 70 | names = engine.getColumns(table_id); 71 | } 72 | 73 | for (auto name : names) { 74 | cout << name << '\t'; 75 | } 76 | cout << '\n'; 77 | 78 | for (auto record : results) { 79 | for (auto col : record) { 80 | cout << col << '\t' 81 | } 82 | cout << '\n'; 83 | } 84 | cout << number <<< " matching rows in " << query_stmt.getId() << "\n"; 85 | } else { 86 | cout << "No matching rows in " << query_stmt.getId() << "\n"; 87 | } 88 | } 89 | } catch (LexError e) { 90 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 91 | } catch (ParseError e) { 92 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 93 | } catch(DataBaseError e) { 94 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 95 | } 96 | } 97 | 98 | return 0; 99 | } 100 | 101 | 102 | class Engine { 103 | public: 104 | bool create(const Create &create_stmt) { 105 | // check the table is not created before 106 | string table_id = create_stmt.getId(); 107 | if (tables.find(table_id) != tables.end()) { 108 | throw DataBaseError(string("Table ") + table_id + string(" already exists")); 109 | } 110 | 111 | // check no multiple primary keys 112 | const vector > &keys = create.getKeys(); 113 | if (keys.size() > 1) { 114 | throw DataBaseError("Multiple primary key definitions")); 115 | } 116 | 117 | // check no duplicate column definitions 118 | const multimap &defs = create.getDefaults(); 119 | map unique_defs; 120 | for(auto it = defs.begin(); it != defs.end(); it = defs.upper_bound(it->first)) { 121 | if (defaults.count(it->first) > 1) { 122 | throw DataBaseError(string("Multiple definitions for ") + it->first); 123 | } else { 124 | unique_defs[it->first] = it->second; 125 | } 126 | } 127 | 128 | // check all primary keys have definitions 129 | const vector &primary = keys[0]; 130 | for (auto key: primary) { 131 | if (defs.find(key) == defs.end()) { 132 | throw DataBaseError(string("Undefined key ") + key); 133 | } 134 | } 135 | 136 | if (unique_defs.size() > MAX_COL) { 137 | throw DataBaseError(string("Number of columns should be no more than ") 138 | + to_string(MAX_COL)); 139 | } 140 | 141 | if (primary.size() > MAX_KEY) { 142 | throw DataBaseError(string("Number of keys should be no more than ") 143 | + to_string(MAX_KEY)); 144 | } 145 | 146 | Table new_table = Table(table_id, unique_defs, primary); 147 | tables[create_stmt.id] = new_table; 148 | return true; 149 | } 150 | 151 | // insert the values 152 | bool insert(const Insert &insert_stmt) { 153 | // the table should exist 154 | string table_id = insert_stmt.getId(); 155 | auto it = tables.find(table_id); 156 | if (it == tables.end()) { 157 | throw DataBaseError(string("Cannot find table ") + table_id); 158 | } 159 | 160 | set checked; 161 | const vector &columns = insert_stmt.getColumns(); 162 | const vector &values = insert_stmt.getValues(); 163 | 164 | // # of columns should equal to # of values 165 | if (columns.size() != values.size()) { 166 | throw DataBaseError("Numbers of columns and values do not match"); 167 | } 168 | 169 | // no duplicate columns 170 | for (auto col : columns) { 171 | if (checked.find(col) != checked.end()) { 172 | throw DataBaseError(string("Duplicate column ") + col); 173 | } else { 174 | checked.insert(col); 175 | } 176 | } 177 | 178 | // no primary key constraint violation(all have/already in) 179 | // all columns should be in the schema of the table 180 | return it->insert(columns, values); 181 | } 182 | 183 | // delete the records 184 | int del(const Delete &delete_stmt) { 185 | // the table should exist 186 | string table_id = delete_stmt.getId(); 187 | auto it = tables.find(table_id); 188 | if (it == tables.end()) { 189 | throw DataBaseError(string("Cannot find table ") + table_id); 190 | } 191 | 192 | // columns occurring in the where clause (if any) 193 | // should be in the schema of the table 194 | return it->del(delete_stmt.getWhere()); 195 | } 196 | 197 | // query the records 198 | int query(const Insert &query_stmt, vector > &results) const { 199 | // the table should exist 200 | string table_id = query_stmt.getId(); 201 | auto it = tables.find(table_id); 202 | if (it == tables.end()) { 203 | throw DataBaseError(string("Cannot find table ") + table_id); 204 | } 205 | 206 | // all columns (except *) in the select list should be in 207 | // the schema of the table 208 | // columns occurring in the where clause (if any) 209 | // should be in the schema of the table 210 | return it->query(query_stmt.getColumns(), query_stmt.getWhere(), results); 211 | } 212 | 213 | const vector &getColumns(string table_id) { 214 | auto it = tables.find(table_id); 215 | if (it == tables.end()) { 216 | throw DataBaseError(string("Cannot find table ") + table_id); 217 | } 218 | 219 | return it->getColumns(); 220 | } 221 | 222 | private: 223 | map tables; 224 | }; 225 | 226 | class Table { 227 | public: 228 | Table(const string table_id, const map defs, 229 | const vector primary) 230 | : id(table_id), keys(primary.begin(), primary.end()) { 231 | int counter = 0; 232 | for (auto it = defs.begin(); it != defs.end(); ++it) { 233 | Scheme new_scheme = Scheme(it->first, it->second, 234 | keys.find(it->first) != keys.end()); 235 | indexes[it->first] = counter++; 236 | schema.push_back(new_scheme); 237 | columns.push_back(it->first); 238 | } 239 | 240 | } 241 | 242 | Table &insert(const vector cols, const vector values) { 243 | 244 | // check no primary key constraint violation 245 | set colset(cols.begin(), cols.end()); 246 | for (auto key : keys) { 247 | if (colset.find(key) == colset.end()) { 248 | throw DataBaseError(string("Key ") + key + string(" not found")); 249 | } 250 | } 251 | 252 | // all columns should be in the schema of the table 253 | for(col : colset) { 254 | if (std::find(columns.begin(), columns.end(), col) == columns.end()) { 255 | throw DataBaseError(string("Column ") + col + string(" is not in the schema")); 256 | } 257 | } 258 | 259 | vector new_record(columns.size()); 260 | // fill in default data 261 | for (int i = 0; i < columns.size(); ++i) { 262 | new_record[i] = schema[i].def; 263 | } 264 | 265 | // fill in available data 266 | for (int i = 0; i < cols.size(); ++i) { 267 | int index = indexes[cols[i]]; 268 | new_record[index] = values[i]; 269 | } 270 | 271 | // check not already exists 272 | for (auto record : data) { 273 | if (conflict(record, new_record)) { 274 | throw DataBaseError(string("Record already exists")); 275 | } 276 | } 277 | 278 | data.push_back(new_record); 279 | } 280 | 281 | int del(const Expr expr) { 282 | auto it = data.begin(); 283 | int count = 0; 284 | 285 | // columns occurring in the where clause (if any) 286 | // should be in the schema of the table 287 | while (it != data.end()) { 288 | if (expr.eval(*it, indexes)) { 289 | data.erase(it++); 290 | count++; 291 | } else { 292 | it++; 293 | } 294 | } 295 | 296 | return count; 297 | } 298 | 299 | int query(const vector &names, const Expr expr, 300 | vector > &results) const { 301 | int count = 0; 302 | 303 | // get queried columns 304 | if (std::find(names.begin(), names.end(), "*")) { // all 305 | for (auto record : data) { 306 | if (expr.eval(record, indexes)) { 307 | // columns occurring in the where clause (if any) 308 | // should be in the schema of the table 309 | results.push_back(record); 310 | count++; 311 | } 312 | } 313 | return count; 314 | } else { // selected columns 315 | // all columns (except *) in the select list should be in 316 | // the schema of the table 317 | for(name : names) { 318 | if (std::find(columns.begin(), columns.end(), name) == columns.end()) { 319 | throw DataBaseError(string("Column ") + name + string(" is not in the schema")); 320 | } 321 | } 322 | 323 | // use int index for reordering 324 | vector query_indexes; 325 | for (auto name : names) { 326 | query_indexes.push_back(indexes[name]); 327 | } 328 | 329 | for (auto record : data) { 330 | if (expr.eval(record, indexes)) { 331 | // columns occurring in the where clause (if any) 332 | // should be in the schema of the table 333 | vector reordered; 334 | for (idx : query_indexes) { 335 | reordered.push_back(record[idx]); 336 | } 337 | results.push_back(reordered); 338 | count++; 339 | } 340 | } 341 | 342 | return count; 343 | } 344 | 345 | } 346 | 347 | const vector &getColumns() const { 348 | return columns; 349 | } 350 | 351 | bool conflict(const vector old_record, const vector new_record) const { 352 | for (key: keys) { 353 | int idx = indexes[key]; 354 | if (old_record[idx] != new_record[idx]) { 355 | return false; 356 | } 357 | } 358 | return true; 359 | } 360 | 361 | 362 | private: 363 | string id; 364 | 365 | set keys; 366 | map indexes; 367 | 368 | // same order 369 | vector schema; 370 | vector columns; 371 | vector > data; 372 | }; 373 | -------------------------------------------------------------------------------- /draft/first-follow.txt: -------------------------------------------------------------------------------- 1 | first 2 | # RULES: 3 | # 1. right is terminal X, put X in first 4 | # 2. right is epsilon, put epsilon in first 5 | # 3. right is Y1Y2Y3..,put first(Y1) in first, 6 | # then if Y1 can derive epsilon, put first(Y2)... and so forth 7 | 8 | 9 | #ssql_stmt -> create_stmt | insert_stmt | delete_stmt | query_stmt 10 | ssql_stmt = {CREATE, INSERT, DELETE, SELECT} 11 | #create_stmt -> CREATE TABLE id L_PAREN decl_list R_PAREN SEMICOLON 12 | create_stmt = {CREATE} 13 | # decl_list -> decl _decl_list 14 | decl_list = {id, PRIMARY} 15 | #_decl_list -> COMMA decl _decl_list 16 | #_decl_list -> epsilon 17 | _decl_list = {COMMA, epsilon} 18 | #decl -> id INT default_spec 19 | #decl -> PRIMARY KEY L_PAREN column_list R_PAREN 20 | decl = {id, PRIMARY} 21 | 22 | #default_spec -> DEFAULT EQ expr[true] 23 | #default_spec -> epsilon 24 | default_spec = {DEFAULT, epsilon} 25 | 26 | # expr[] -> term[] _expr[] 27 | # first(term) 28 | expr[simple=true] = {PLUS, MINUS, num, L_PAREN} 29 | expr[simple=false] = {PLUS, MINUS, num, id} 30 | 31 | # _expr[] -> PLUS term[] _expr[] 32 | # _expr[] -> MINUS term[] _expr[] 33 | # _expr[] -> epsilon 34 | _expr[] = {PLUS, MINUS, epsilon} 35 | 36 | #term[] -> unary[] _term[] 37 | #first(unary) 38 | term[simple=true] = {PLUS, MINUS, num, L_PAREN} 39 | term[simple=false] = {PLUS, MINUS, num, id} 40 | 41 | # _term[] -> MUL unary[] _term[] 42 | # _term[] -> DIV unary[] _term[] 43 | # _term[] -> epsilon 44 | _term[] = {MUL, DIV, epsilon} 45 | 46 | # unary[] -> PLUS unary[] | MINUS unary[] 47 | # unary[] -> num 48 | # unary[simple=true] -> L_PAREN expr[true] R_PAREN 49 | # unary[simple=false] -> id 50 | unary[simple=true] = {PLUS, MINUS, num, L_PAREN} 51 | unary[simple=false] = {PLUS, MINUS, num, id} 52 | 53 | #column_list -> id _column_list 54 | column_list = {id} 55 | #_column_list -> COMMA id _column_list 56 | #_column_list -> epsilon 57 | _column_list = {COMMA, epsilon} 58 | # INSERT INTO id L_PAREN column_list R_PAREN VALUES L_PAREN value_list R_PAREN SEMICOLON 59 | insert_stmt = {INSERT} 60 | 61 | #value_list -> expr[true] _value_list 62 | # first(expr[true]) 63 | value_list = {PLUS, MINUS, num, L_PAREN} 64 | #_value_list -> COMMA expr[true] _value_list 65 | #_value_list -> epsilon 66 | _value_list = {COMMA, epsilon} 67 | 68 | #delete_stmt -> DELETE FROM id where_clause SEMICOLON 69 | delete_stmt = {DELETE} 70 | 71 | #where_clause -> WHERE disjunct 72 | #where_clause -> epsilon 73 | where_clause = {WHERE, epsilon} 74 | 75 | # disjunct -> conjunct _disjunct 76 | # first(conjunct) 77 | disjunct = {L_PAREN, NOT, PLUS, MINUS, num, id} 78 | 79 | # _disjunct -> OR conjunct _disjunct | epsilon 80 | _disjunct = {OR, epsilon} 81 | 82 | # conjunct -> bool _conjunct 83 | # first(bool) 84 | conjunct = {L_PAREN, NOT, PLUS, MINUS, num, id} 85 | 86 | # _conjunct -> AND bool _conjunct | epsilon 87 | _conjunct = {AND, epsilon} 88 | 89 | # bool -> L_PAREN disjunct R_PAREN | NOT bool | comp 90 | # first(comp) 91 | bool = {L_PAREN, NOT, PLUS, MINUS, num, id} 92 | # comp -> expr[false] rop expr[false] 93 | # first(expr[false]) 94 | comp = {PLUS, MINUS, num, id} 95 | 96 | #rop -> NEQ | EQ | LT | GT | LEQ | GEQ 97 | rop = {NEQ, EQ, LT, GT, LEQ, GEQ} 98 | 99 | #query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 100 | query_stmt = {SELECT} 101 | #select_list -> MUL 102 | #select_list -> column_list 103 | #first column_list 104 | select_list = {MUL, id} 105 | 106 | ============================== 107 | follow 108 | # RULES: 109 | # 1. put $ i follow(start symbol) 110 | # 2. find all production where the left symbol is in the body 111 | # 3. If A -> aXb, 112 | # put everything except epsilon in first(b) in follow(X). 113 | # if first(b) contains epsilon(b can derive epsilon), 114 | # put everything in follow(A) into follow(X) 115 | # 4. If A -> aX, put everything in follow(A) into follow(X) 116 | 117 | ssql_stmt = {$} 118 | create_stmt = {$} 119 | #create_stmt -> CREATE TABLE id L_PAREN decl_list R_PAREN SEMICOLON 120 | decl_list = {R_PAREN} 121 | #decl_list -> decl _decl_list 122 | _decl_list = {R_PAREN} 123 | #decl_list -> decl _decl_list 124 | #_decl_list -> COMMA decl _decl_list 125 | #first(_decl_list) + follow(decl_list) + follow(_decl_list) 126 | decl = {COMMA, R_PAREN} 127 | #decl -> id INT default_spec 128 | # follow(decl) 129 | default_spec = {COMMA, R_PAREN} 130 | 131 | # default_spec -> DEFAULT EQ expr[true] 132 | # value_list -> expr[true] 133 | # _value_list -> COMMA expr[true] _value_list 134 | # unary[simple=true] -> L_PAREN expr[true] R_PAREN 135 | # follow(default_spec) + follow(value_list) 136 | # + first(_value_list) + follow(_value_list) 137 | expr[true] = {COMMA, R_PAREN} 138 | 139 | # comp -> expr[false] rop expr[false] 140 | # first(rop) + follow(comp) 141 | expr[false] = {NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R_PAREN} 142 | 143 | # expr[] -> term[] _expr[] 144 | # _expr[] -> PLUS term[] _expr[] 145 | # _expr[] -> MINUS term[] _expr[] 146 | # follow(expr[true]) 147 | _expr[true] = {COMMA, R_PAREN} 148 | # follow(expr[false]) 149 | _expr[false] = {NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R_PAREN} 150 | 151 | # expr[] -> term[] _expr[] 152 | # _expr[] -> PLUS term[] _expr[] 153 | # _expr[] -> MINUS term[] _expr[] 154 | # first(_expr[]) + follow(expr[]) + follow(_expr[]) 155 | term[true] = {PLUS, MINUS, COMMA, R_PAREN} 156 | term[false] = {PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R_PAREN} 157 | 158 | # term[] -> unary[] _term[] 159 | # _term[] -> MUL unary[] _term[] 160 | # _term[] -> DIV unary[] _term[] 161 | # follow(term) 162 | _term[true] = {PLUS, MINUS, COMMA, R_PAREN} 163 | _term[false] = {PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R_PAREN} 164 | 165 | # term[] -> unary[] _term[] 166 | # _term[] -> MUL unary[] _term[] 167 | # _term[] -> DIV unary[] _term[] 168 | # unary[] -> PLUS unary[] 169 | # unary[] -> MINUS unary[] 170 | # first(_term[]) + follow(term[]) + follow(_term[]) 171 | unary[true] = {MUL, DIV, PLUS, MINUS, COMMA, R_PAREN} 172 | unary[false] = {MUL, DIV, PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R_PAREN} 173 | 174 | #decl -> RIMARY KEY L_PAREN column_list R_PAREN 175 | #insert_stmt -> INSERT INTO id L_PAREN column_list R_PAREN VALUES L_PAREN value_list R_PAREN SEMICOLON 176 | #select_list -> column_list 177 | # follow(select_list) 178 | column_list = {FROM, R_PAREN} 179 | #column_list -> id _column_list 180 | _column_list = {FROM, R_PAREN} 181 | 182 | insert_stmt = {$} 183 | #insert_stmt -> INSERT INTO id L_PAREN column_list R_PAREN VALUES L_PAREN value_list R_PAREN SEMICOLON 184 | value_list = {R_PAREN} 185 | # value_list -> expr[true] _value_list 186 | # _value_list -> COMMA expr[true] _value_list 187 | # follow(value_list) 188 | _value_list = {R_PAREN} 189 | 190 | delete_stmt = {$} 191 | #delete_stmt -> DELETE FROM id where_clause SEMICOLON 192 | #query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 193 | where_clause = {SEMICOLON} 194 | 195 | # where_clause -> WHERE disjunct 196 | # bool -> L_PAREN disjunct R_PAREN 197 | # follow(where) 198 | disjunct = {SEMICOLON, R_PAREN} 199 | 200 | #disjunct -> conjunct _disjunct 201 | #_disjunct -> OR conjunct _disjunct 202 | #follow(disjunct) 203 | _disjunct = {SEMICOLON, R_PAREN} 204 | 205 | # disjunct -> conjunct _disjunct 206 | # _disjunct -> OR conjunct _disjunct 207 | # first(_disjunct) + follow(disjunct) + follow(_disjunct) 208 | conjunct = {OR, SEMICOLON, R_PAREN} 209 | 210 | # conjunct -> bool _conjunct 211 | # _conjunct -> AND bool _conjunct 212 | # follow(conjunct) 213 | _conjunct = {OR, SEMICOLON, R_PAREN} 214 | 215 | # conjunct -> bool _conjunct 216 | # _conjunct -> AND bool _conjunct 217 | # bool -> NOT bool 218 | # first(_conjunct) + follow(conjunct) + follow(_conjunct) 219 | bool = {AND, OR, SEMICOLON, R_PAREN} 220 | 221 | # bool -> comp 222 | # follow(bool) 223 | comp = {AND, OR, SEMICOLON, R_PAREN} 224 | 225 | # comp -> expr[false] rop expr[false] 226 | # first(expr[false]) 227 | rop = {PLUS, MINUS, num, id} 228 | 229 | query_stmt = {$} 230 | #query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 231 | select_list = {FROM} 232 | 233 | -------------------------------------------------------------------------------- /draft/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Token.h" 5 | #include "Lexer.h" 6 | #include "Parser.h" 7 | #include "Engine.h" 8 | 9 | using std::cin; 10 | using std::cout; 11 | using std::ifstream; 12 | using std::ofstream; 13 | using namespace ssql; 14 | 15 | int main(int argc, char const *argv[]) { 16 | Lexer *lexptr; 17 | ofstream out; 18 | ifstream in; // must be in the scope until stop scanning 19 | 20 | if (argc > 1) { 21 | in.open(argv[1], ifstream::in); 22 | if (in.is_open()) { 23 | lexptr = new Lexer(in); 24 | } else { 25 | cout << "Fail to open " << argv[1] << '\n'; 26 | exit(1); 27 | } 28 | } else { 29 | lexptr = new Lexer(cin); 30 | } 31 | 32 | Parser parser(*lexptr); 33 | Engine engine; 34 | 35 | while (!parser.isEnd()) { 36 | try { 37 | Type next = parser.next_stmt_type(); 38 | if (next == CREATE) { 39 | Create create_stmt = parser.create_stmt(); 40 | engine.create(create_stmt); 41 | cout << "Created table " << create_stmt.getId() << "\n"; 42 | } else if (next == INSERT) { 43 | Insert insert_stmt = parser.insert_stmt(); 44 | int number = engine.insert(insert_stmt); 45 | cout << "Inserted " << number << " rows into table "; 46 | cout << insert_stmt.getId() << "\n"; 47 | } else if (next == DELETE) { 48 | Delete delete_stmt = parser.delete_stmt(); 49 | int number = engine.del(insert_stmt); 50 | cout << "Deleted " << number << " rows from table "; 51 | cout << delete_stmt.getId() << "\n"; 52 | } else if (next == SELECT) { 53 | Query query_stmt = parser.query_stmt(); 54 | string table_id = query_stmt.getId(); 55 | 56 | vector > results; 57 | vector names = query_stmt.getColumns(); 58 | 59 | int number = engine.query(query_stmt, results); 60 | if (number > 0) { 61 | if (std::find(name.begin(), names.end(), "*") 62 | != names.end()) { 63 | names = engine.getColumns(table_id); 64 | } 65 | 66 | for (auto name : names) { 67 | cout << name << '\t'; 68 | } 69 | cout << '\n'; 70 | 71 | for (auto record : results) { 72 | for (auto col : record) { 73 | cout << col << '\t' 74 | } 75 | cout << '\n'; 76 | } 77 | cout << number <<< " matching rows in "; 78 | cout << query_stmt.getId() << "\n"; 79 | } else { 80 | cout << "No matching rows in "; 81 | cout << query_stmt.getId() << "\n"; 82 | } 83 | } 84 | } catch (LexError e) { 85 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 86 | } catch (ParseError e) { 87 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 88 | } catch(DataBaseError e) { 89 | cout << lexptr->getLine() << ": " << e.what() << '\n'; 90 | } 91 | } 92 | 93 | delete lexptr; 94 | return 0; 95 | } -------------------------------------------------------------------------------- /draft/parse.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joyeecheung/simple-sql-parser/febd5b8eea4e8e5ae378624fe2dac986dd4b5798/draft/parse.xlsx -------------------------------------------------------------------------------- /draft/parsing-table: -------------------------------------------------------------------------------- 1 | # RULES 2 | # for each production A -> alpha 3 | # for a in first(A), put A -> alpha in [A, a] 4 | # if epsilon in first(A), for b in follow(A), put A -> alpha in [A, b] 5 | # if epsilon in first(A) and $ in follow(A), add A -> alpha to [A, $] 6 | 7 | 8 | ssql\_stmt -> create\_stmt | insert\_stmt | delete\_stmt | query\_stmt 9 | {CREATE, INSERT, DELETE, SELECT}, {$} 10 | ----------- 11 | CREATE: create\_stmt -> CREATE TABLE id L\_PAREN decl\_list R\_PAREN SEMICOLON 12 | INSERT: insert\_stmt -> INSERT INTO id L\_PAREN column\_list R\_PAREN VALUES L\_PAREN value\_list R\_PAREN SEMICOLON 13 | DELETE: delete\_stmt -> DELETE FROM id where\_clause SEMICOLON 14 | SELECT: query\_stmt -> SELECT select\_list FROM id where\_clause SEMICOLON 15 | 16 | 17 | ----------------------- 18 | create\_stmt 19 | -> CREATE TABLE id L\_PAREN decl\_list R\_PAREN SEMICOLON 20 | ------------ 21 | {CREATE}, {$} 22 | ------------ 23 | CREATE: 24 | create\_stmt -> CREATE TABLE id L\_PAREN decl\_list R\_PAREN SEMICOLON 25 | 26 | 27 | ------------------------ 28 | decl\_list 29 | -> decl \_decl\_list 30 | ------------ 31 | {id, PRIMARY}, {R\_PAREN} 32 | ------------ 33 | id: 34 | decl\_list -> decl \_decl\_list 35 | PRIMARY: 36 | decl\_list -> decl \_decl\_list 37 | 38 | ------------------------ 39 | \_decl\_list 40 | -> COMMA decl \_decl\_list 41 | -> epsilon 42 | ------------ 43 | {COMMA, epsilon}, {R\_PAREN} 44 | ------------ 45 | COMMA: 46 | \_decl\_list -> COMMA decl \_decl\_list 47 | R\_PAREN: 48 | \_decl\_list -> epsilon 49 | 50 | ------------------------ 51 | decl 52 | -> id INT default\_spec 53 | -> PRIMARY KEY L\_PAREN column\_list R\_PAREN 54 | ------------ 55 | {id, PRIMARY}, {COMMA, R\_PAREN} 56 | ------------ 57 | id: 58 | decl -> id INT default\_spec 59 | PRIMARY: 60 | PRIMARY KEY L\_PAREN column\_list R\_PAREN 61 | 62 | ------------------------ 63 | default\_spec 64 | -> DEFAULT ASSIGN expr[true] 65 | -> epsilon 66 | ------------ 67 | {DEFAULT, epsilon}, {COMMA, R\_PAREN} 68 | ------------ 69 | DEFAULT: 70 | default\_spec -> DEFAULT ASSIGN expr[true] 71 | COMMA: 72 | default\_spec -> epsilon 73 | R\_PAREN: 74 | default\_spec -> epsilon 75 | 76 | ----------------------- 77 | column\_list 78 | -> id \_column\_list 79 | ------------ 80 | {id}, {FROM, R\_PAREN} 81 | ------------ 82 | id: 83 | column\_list -> id \_column\_list 84 | 85 | ----------------------- 86 | \_column\_list 87 | -> COMMA id \_column\_list 88 | -> epsilon 89 | ------------ 90 | {COMMA, epsilon}, {FROM, R\_PAREN} 91 | ------------ 92 | COMMA: 93 | \_column\_list -> COMMA id \_column\_list 94 | FROM: 95 | \_column\_list -> epsilon 96 | R\_PAREN: 97 | \_column\_list -> epsilon 98 | 99 | 100 | ----------------------- 101 | insert\_stmt 102 | ------------ 103 | {INSERT}, {$} 104 | ------------ 105 | INSERT: 106 | insert\_stmt -> INSERT INTO id L\_PAREN column\_list R\_PAREN VALUES L\_PAREN value\_list R\_PAREN SEMICOLON 107 | 108 | 109 | ----------------------- 110 | value\_list 111 | -> expr[true] \_value\_list 112 | ------------ 113 | {PLUS, MINUS, num, L\_PAREN}, {R\_PAREN} 114 | ------------ 115 | PLUS: 116 | value\_list -> expr[true] \_value\_list 117 | MINUS: 118 | value\_list -> expr[true] \_value\_list 119 | num: 120 | value\_list -> expr[true] \_value\_list 121 | L\_PAREN: 122 | value\_list -> expr[true] \_value\_list 123 | 124 | 125 | ----------------------- 126 | \_value\_list 127 | -> COMMA expr[true] \_value\_list 128 | -> epsilon 129 | ------------ 130 | {COMMA, epsilon}, {R\_PAREN} 131 | ------------ 132 | COMMA: 133 | \_value\_list -> COMMA expr[true] \_value\_list 134 | R\_PAREN: 135 | \_value\_list -> epsilon 136 | 137 | 138 | ----------------------- 139 | delete\_stmt 140 | ------------ 141 | {DELETE}, {$} 142 | ------------ 143 | DELETE: 144 | delete\_stmt -> DELETE FROM id where\_clause SEMICOLON 145 | 146 | 147 | ----------------------- 148 | where\_clause 149 | -> WHERE disjunct 150 | -> epsilon 151 | ------------ 152 | {WHERE, epsilon}, {SEMICOLON} 153 | ------------ 154 | WHERE: 155 | where\_clause -> WHERE disjunct 156 | SEMICOLON: 157 | where\_clause -> epsilon 158 | 159 | 160 | ----------------------- 161 | disjunct 162 | -> conjunct \_disjunct 163 | ------------ 164 | {L\_PAREN, NOT, PLUS, MINUS, num, id}, {SEMICOLON, R\_PAREN} 165 | ------------ 166 | L\_PAREN: 167 | disjunct -> conjunct \_disjunct 168 | NOT: 169 | disjunct -> conjunct \_disjunct 170 | PLUS: 171 | disjunct -> conjunct \_disjunct 172 | MINUS: 173 | disjunct -> conjunct \_disjunct 174 | num: 175 | disjunct -> conjunct \_disjunct 176 | id: 177 | disjunct -> conjunct \_disjunct 178 | 179 | 180 | ----------------------- 181 | \_disjunct 182 | -> OR conjunct \_disjunct 183 | -> epsilon 184 | ------------ 185 | {OR, epsilon}, {SEMICOLON, R\_PAREN} 186 | ------------ 187 | OR: 188 | \_disjunct -> OR conjunct \_disjunct 189 | SEMICOLON: 190 | \_disjunct-> epsilon 191 | R\_PAREN: 192 | \_disjunct-> epsilon 193 | 194 | 195 | ----------------------- 196 | conjunct 197 | -> bool \_conjunct 198 | ------------ 199 | {L\_PAREN, NOT, PLUS, MINUS, num, id}, {OR, SEMICOLON, R\_PAREN} 200 | ------------ 201 | L\_PAREN: 202 | conjunct -> bool \_conjunct 203 | NOT: 204 | conjunct -> bool \_conjunct 205 | PLUS: 206 | conjunct -> bool \_conjunct 207 | MINUS: 208 | conjunct -> bool \_conjunct 209 | num: 210 | conjunct -> bool \_conjunct 211 | id: 212 | conjunct -> bool \_conjunct 213 | 214 | 215 | ----------------------- 216 | \_conjunct 217 | -> AND bool \_conjunct 218 | -> epsilon 219 | ------------ 220 | {AND, epsilon}, {OR, SEMICOLON, R\_PAREN} 221 | ------------ 222 | AND: 223 | conjunct\_list -> bool \_conjunct 224 | OR: 225 | \_conjunct -> epsilon 226 | SEMICOLON: 227 | \_conjunct -> epsilon 228 | R\_PAREN: 229 | \_conjunct -> epsilon 230 | 231 | 232 | ----------------------- 233 | bool 234 | -> L\_PAREN disjunct R\_PAREN 235 | -> NOT bool 236 | -> comp 237 | ------------ 238 | {L\_PAREN, NOT, PLUS, MINUS, num, id}, {AND, OR, SEMICOLON, R\_PAREN} 239 | ------------ 240 | L\_PAREN: 241 | bool -> L\_PAREN disjunct R\_PAREN 242 | NOT: 243 | bool -> NOT bool 244 | PLUS: 245 | bool -> comp 246 | MINUS: 247 | bool -> comp 248 | num: 249 | bool -> comp 250 | id: 251 | bool -> comp 252 | 253 | 254 | ----------------------- 255 | comp 256 | -> expr[false] rop expr[false] 257 | ------------ 258 | {PLUS, MINUS, num, id}, {AND, OR, SEMICOLON, R\_PAREN} 259 | ------------ 260 | PLUS: 261 | comp -> expr[false] rop expr[false] 262 | MINUS: 263 | comp -> expr[false] rop expr[false] 264 | num: 265 | comp -> expr[false] rop expr[false] 266 | id: 267 | comp -> expr[false] rop expr[false] 268 | 269 | 270 | ----------------------- 271 | expr[] 272 | -> term[] \_expr[] 273 | ------------ 274 | {PLUS, MINUS, num} true{L\_PAREN} false{id} 275 | true{COMMA, R\_PAREN} false{NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN} 276 | ------------ 277 | PLUS: 278 | expr[] -> term[] \_expr[] 279 | MINUS: 280 | expr[] -> term[] \_expr[] 281 | num: 282 | expr[] -> term[] \_expr[] 283 | L\_PAREN && simple == true: 284 | expr[true] -> term[true] \_expr[true] 285 | id && simple == false: 286 | expr[false] -> term[false] \_expr[false] 287 | 288 | 289 | ----------------------- 290 | \_expr[] 291 | -> PLUS term[] \_expr[] 292 | -> MINUS term[] \_expr[] 293 | -> epsilon 294 | ------------ 295 | {PLUS, MINUS, epsilon}, 296 | true{COMMA, R\_PAREN} 297 | false{NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN} 298 | ------------ 299 | PLUS: 300 | \_expr[] -> PLUS term[] \_expr[] 301 | MINUS: 302 | \_expr[] -> MINUS term[] \_expr[] 303 | true: 304 | COMMA || R\_PAREN: 305 | \_expr[] -> epsilon 306 | false: 307 | NEQ || EQ || LT || GT || LEQ || GEQ || AND || OR || SEMICOLON || R\_PAREN 308 | \_expr[] -> epsilon 309 | 310 | 311 | ----------------------- 312 | term[] 313 | -> unary[] \_term[] 314 | ------------ 315 | {PLUS, MINUS, num} true{L\_PAREN} false{id} 316 | true{PLUS, MINUS, COMMA, R\_PAREN} 317 | false{PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN} 318 | ------------ 319 | PLUS: 320 | term[] -> unary[] \_term[] 321 | MINUS: 322 | term[] -> unary[] \_term[] 323 | num: 324 | term[] -> unary[] \_term[] 325 | L\_PAREN && simple == true: 326 | term[true] -> unary[true] \_term[true] 327 | id && simple == false: 328 | term[false] -> unary[false] \_term[false] 329 | 330 | 331 | ----------------------- 332 | \_term[] 333 | -> MUL unary[] \_term[] 334 | -> DIV unary[] \_term[] 335 | -> epsilon 336 | ------------ 337 | {MUL, DIV, epsilon}, 338 | true{PLUS, MINUS, COMMA, R\_PAREN} 339 | false{PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN} 340 | ------------ 341 | MUL: 342 | \_term[] -> MUL unary[] \_term[] 343 | DIV: 344 | \_term[] -> DIV unary[] \_term[] 345 | PLUS || MINUS: 346 | \_term[] -> epsilon 347 | true: 348 | COMMA || R\_PAREN: 349 | \_term[] -> epsilon 350 | false: 351 | NEQ || EQ || LT || GT || LEQ || GEQ || AND || OR || SEMICOLON || R\_PAREN 352 | \_term[] -> epsilon 353 | 354 | 355 | 356 | ----------------------- 357 | unary[] 358 | -> PLUS unary[] 359 | -> MINUS unary[] 360 | -> num 361 | [true] -> L\_PAREN expr[true] R\_PAREN 362 | [false] -> id 363 | ------------ 364 | {PLUS, MINUS, num, L\_PAREN} 365 | true {MUL, DIV, PLUS, MINUS, COMMA, R\_PAREN} 366 | false {MUL, DIV, PLUS, MINUS, NEQ, EQ, LT, GT, LEQ, GEQ, AND, OR, SEMICOLON, R\_PAREN} 367 | ------------ 368 | PLUS: 369 | unary[] -> PLUS unary[] 370 | MINUS: 371 | unary[] -> MINUS unary[] 372 | num: 373 | unary[] -> num 374 | L\_PAREN && simple == true: 375 | unary[true] -> L\_PAREN expr[true] R\_PAREN 376 | id && simple == false: 377 | unary[false] -> id 378 | 379 | 380 | ----------------------- 381 | rop 382 | ------------ 383 | {NEQ, EQ, LT, GT, LEQ, GEQ}, {PLUS, MINUS, num, id} 384 | ------------ 385 | NEQ: 386 | rop -> NEQ 387 | EQ: 388 | rop -> EQ 389 | LT: 390 | rop -> LT 391 | GT: 392 | rop -> GT 393 | LEQ: 394 | rop -> LEQ 395 | GEQ: 396 | rop -> GEQ 397 | 398 | ----------------------- 399 | query\_stmt 400 | ------------ 401 | {SELECT}, {$} 402 | ------------ 403 | SELECT: 404 | query\_stmt -> SELECT select\_list FROM id where\_clause SEMICOLON 405 | 406 | ----------------------- 407 | select\_list 408 | ------------ 409 | {MUL, id}, {FROM, $} 410 | ------------ 411 | MUL: 412 | select\_list -> MUL 413 | id: 414 | select\_list -> column\_list 415 | -------------------------------------------------------------------------------- /src/Engine.cpp: -------------------------------------------------------------------------------- 1 | #include "Engine.h" 2 | 3 | namespace ssql { 4 | 5 | bool Engine::create(const Create &create_stmt) { 6 | // check the table is not created before 7 | string table_id = create_stmt.getId(); 8 | if (tables.find(table_id) != tables.end()) { 9 | throw DataBaseError(string("Table ") + table_id 10 | + string(" already exists")); 11 | } 12 | 13 | // check no multiple primary keys 14 | const vector > &keys = create_stmt.getKeys(); 15 | if (keys.size() > 1) { 16 | throw DataBaseError("Multiple primary key definitions"); 17 | } 18 | 19 | // check no duplicate column definitions 20 | const multimap &defs = create_stmt.getDefaults(); 21 | map unique_defs; 22 | for(auto it = defs.begin(); it != defs.end(); 23 | it = defs.upper_bound(it->first)) { 24 | if (defs.count(it->first) > 1) { 25 | throw DataBaseError(string("Multiple definitions for ") 26 | + it->first); 27 | } else { 28 | unique_defs[it->first] = it->second; 29 | } 30 | } 31 | 32 | // check all primary keys have definitions 33 | vector primary; 34 | if (keys.size() > 0) { 35 | primary = keys[0]; 36 | for (const auto &key: primary) { 37 | if (defs.find(key) == defs.end()) { 38 | throw DataBaseError(string("Undefined key ") + key); 39 | } 40 | } 41 | } else { // no keys 42 | for (const auto &kv : defs) { 43 | primary.push_back(kv.first); 44 | } 45 | } 46 | 47 | 48 | if (unique_defs.size() > MAX_COL) { 49 | throw DataBaseError(string("Number of columns should be no more than ") 50 | + to_string(MAX_COL)); 51 | } 52 | 53 | if (primary.size() > MAX_KEY) { 54 | throw DataBaseError(string("Number of keys should be no more than ") 55 | + to_string(MAX_KEY)); 56 | } 57 | 58 | Table new_table = Table(table_id, unique_defs, primary); 59 | tables[table_id] = new_table; 60 | return true; 61 | } 62 | 63 | // insert the values 64 | bool Engine::insert(const Insert &insert_stmt) { 65 | // the table should exist 66 | string table_id = insert_stmt.getId(); 67 | auto it = tables.find(table_id); 68 | if (it == tables.end()) { 69 | throw DataBaseError(string("Cannot find table ") + table_id); 70 | } 71 | 72 | set checked; 73 | const vector &columns = insert_stmt.getColumns(); 74 | const vector &values = insert_stmt.getValues(); 75 | 76 | // # of columns should equal to # of values 77 | if (columns.size() != values.size()) { 78 | throw DataBaseError("Numbers of columns and values do not match"); 79 | } 80 | 81 | // no duplicate columns 82 | for (const auto &col : columns) { 83 | if (checked.find(col) != checked.end()) { 84 | throw DataBaseError(string("Duplicate column ") + col); 85 | } else { 86 | checked.insert(col); 87 | } 88 | } 89 | 90 | // no primary key constraint violation(all have/already in) 91 | // all columns should be in the schema of the table 92 | it->second.insert(columns, values); 93 | return true; 94 | } 95 | 96 | // delete the records 97 | int Engine::del(const Delete &delete_stmt) { 98 | // the table should exist 99 | string table_id = delete_stmt.getId(); 100 | auto it = tables.find(table_id); 101 | if (it == tables.end()) { 102 | throw DataBaseError(string("Cannot find table ") + table_id); 103 | } 104 | 105 | // columns occurring in the where clause (if any) 106 | // should be in the schema of the table 107 | return it->second.del(delete_stmt.getWhere()); 108 | } 109 | 110 | // query the records 111 | int Engine::query(const Query &query_stmt, 112 | vector > &results) const { 113 | // the table should exist 114 | string table_id = query_stmt.getId(); 115 | auto it = tables.find(table_id); 116 | if (it == tables.end()) { 117 | throw DataBaseError(string("Cannot find table ") + table_id); 118 | } 119 | 120 | // all columns (except *) in the select list should be in 121 | // the schema of the table 122 | // columns occurring in the where clause (if any) 123 | // should be in the schema of the table 124 | return it->second.query(query_stmt.getColumns(), query_stmt.getWhere(), results); 125 | } 126 | 127 | const vector &Engine::getColumns(string table_id) const { 128 | auto it = tables.find(table_id); 129 | if (it == tables.end()) { 130 | throw DataBaseError(string("Cannot find table ") + table_id); 131 | } 132 | 133 | return it->second.getColumns(); 134 | } 135 | 136 | } -------------------------------------------------------------------------------- /src/Engine.h: -------------------------------------------------------------------------------- 1 | #include "Table.h" 2 | #include "Statements.h" 3 | 4 | #ifndef ENGINE_H 5 | #define ENGINE_H 6 | 7 | #define MAX_COL 100 8 | #define MAX_KEY 100 9 | 10 | namespace ssql { 11 | 12 | /** 13 | * Responsible for semantics analysis 14 | * (those that doesn't need the knowledge of schema) 15 | * and keeping track of tables in memory. 16 | * Also responsible for extracting clean components inside statments, 17 | * and pass them down to `Table`. 18 | */ 19 | class Engine { 20 | public: 21 | Engine() {} 22 | bool create(const Create &create_stmt); 23 | // insert the values 24 | bool insert(const Insert &insert_stmt); 25 | // delete the records 26 | int del(const Delete &delete_stmt); 27 | // query the records 28 | int query(const Query &query_stmt, vector > &results) const; 29 | // get column names of the table with the given id 30 | const vector &getColumns(string table_id) const; 31 | ~Engine() {} 32 | private: 33 | map tables; 34 | }; 35 | 36 | } 37 | 38 | #endif -------------------------------------------------------------------------------- /src/Expr.cpp: -------------------------------------------------------------------------------- 1 | #include "Expr.h" 2 | 3 | namespace ssql { 4 | 5 | Expr::Expr(const Expr &other) { 6 | value = other.value; 7 | type = other.type; 8 | left = right = NULL; 9 | 10 | if (other.left != NULL) { // other has a left 11 | setLeft(*(other.left)); 12 | } else { // others doesn't have left 13 | left = NULL; 14 | } 15 | 16 | if (other.right != NULL) { 17 | setRight((*other.right)); 18 | } else { 19 | right = NULL; 20 | } 21 | } 22 | 23 | void Expr::setLeft(const Expr &other) { 24 | if (left != NULL) { 25 | delete left; 26 | left = NULL; 27 | } 28 | 29 | left = new Expr(other.type); 30 | *left = other; 31 | } 32 | 33 | void Expr::setRight(const Expr &other) { 34 | if (right != NULL) { 35 | delete right; 36 | right = NULL; 37 | } 38 | 39 | right = new Expr(other.type); 40 | *right = other; 41 | } 42 | 43 | void Expr::setLeftMost(const Expr &other) { 44 | Expr *cur = this; 45 | while(cur->hasLeft()) { 46 | cur = cur->left; 47 | } 48 | cur->setLeft(other); 49 | } 50 | 51 | Expr &Expr::operator=(const Expr &rhs) { 52 | if (rhs.left != NULL) { // other has a left 53 | setLeft(*(rhs.left)); 54 | } else { // others doesn't have left 55 | if (left != NULL) { 56 | delete left; 57 | left = NULL; 58 | } 59 | } 60 | 61 | if (rhs.right != NULL) { 62 | setRight((*rhs.right)); 63 | } else { 64 | if (right != NULL) { 65 | delete right; 66 | right = NULL; 67 | } 68 | } 69 | 70 | value = rhs.value; 71 | type = rhs.type; 72 | return *this; 73 | } 74 | 75 | 76 | ostream &operator<<(ostream &s, const Expr &expr) { 77 | if (expr.isValue()) { // id or number 78 | s << expr.getValue(); 79 | return s; 80 | } 81 | 82 | // real expression 83 | s << "("; 84 | if (expr.hasLeft()) { 85 | s << expr.getLeft(); 86 | } 87 | 88 | if (!expr.isNull()) { // operator 89 | s << " " << Token(expr.getType()) << " "; 90 | } 91 | 92 | if (expr.hasRight()) { 93 | s << expr.getRight(); 94 | } 95 | 96 | s << ")"; 97 | return s; 98 | } 99 | 100 | int Expr::eval(vector record, map indexes) const { 101 | if (type == NUM) { 102 | return value.getNumber(); 103 | } else if (type == ID) { 104 | string id = value.getId(); 105 | auto it = indexes.find(id); 106 | if (it == indexes.end()) { 107 | throw RuntimeError(id + " not found in the scheme"); 108 | } 109 | return record[it->second]; 110 | } else if (type == AND) { 111 | return left->eval(record, indexes) && right->eval(record, indexes); 112 | } else if (type == EQ) { 113 | return left->eval(record, indexes) == right->eval(record, indexes); 114 | } else if (type == NEQ) { 115 | return left->eval(record, indexes) != right->eval(record, indexes); 116 | } else if (type == LT) { 117 | return left->eval(record, indexes) < right->eval(record, indexes); 118 | } else if (type == GT) { 119 | return left->eval(record, indexes) > right->eval(record, indexes); 120 | } else if (type == LEQ) { 121 | return left->eval(record, indexes) <= right->eval(record, indexes); 122 | } else if (type == GEQ) { 123 | return left->eval(record, indexes) >= right->eval(record, indexes); 124 | } else if (type == PLUS) { 125 | if (hasLeft()) { 126 | return left->eval(record, indexes) + right->eval(record, indexes); 127 | } else { 128 | return right->eval(record, indexes); 129 | } 130 | } else if (type == MINUS) { 131 | if (hasLeft()) { 132 | return left->eval(record, indexes) - right->eval(record, indexes); 133 | } else { 134 | return 0 - right->eval(record, indexes); 135 | } 136 | } else if (type == MUL) { 137 | return left->eval(record, indexes) * right->eval(record, indexes); 138 | } else if (type == DIV) { 139 | int left_result = left->eval(record, indexes); 140 | int right_result = right->eval(record, indexes); 141 | if (right_result == 0) { 142 | throw RuntimeError("Division by zero"); 143 | } 144 | return left_result / right_result; 145 | } else if (type == OR) { 146 | return left->eval(record, indexes) || right->eval(record, indexes); 147 | } else if (type == NOT) { 148 | return ! right->eval(record, indexes); 149 | } else if (type == NONE) { 150 | return true; // for empty where 151 | } 152 | return 0; 153 | } 154 | 155 | int Expr::eval() const { 156 | vector record; 157 | map indexes; 158 | return eval(record, indexes); // empty 159 | } 160 | 161 | Expr::~Expr() { 162 | if (left != NULL) { 163 | delete left; 164 | left = NULL; 165 | } 166 | 167 | if (right != NULL) { 168 | delete right; 169 | right = NULL; 170 | } 171 | } 172 | 173 | } -------------------------------------------------------------------------------- /src/Expr.h: -------------------------------------------------------------------------------- 1 | #include "Token.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef EXPR_H 8 | #define EXPR_H 9 | 10 | namespace ssql { 11 | 12 | using std::vector; 13 | using std::string; 14 | using std::map; 15 | 16 | 17 | class RuntimeError : std::exception { 18 | public: 19 | RuntimeError(string _msg) : msg(_msg) {} 20 | ~RuntimeError() throw () {} // Updated 21 | const char *what() const throw() { 22 | return msg.c_str(); 23 | } 24 | private: 25 | string msg; 26 | }; 27 | 28 | /* Expression class with a tree-like structure. 29 | * 30 | * There are three type of expressions: 31 | * 1. Binary expression. `left` and `right` are pointers to 32 | * left and right expressions, `type` is the operator, 33 | * `value` is a token with type `NONE`. 34 | * 2. Unary expression. `left` is NULL, `right` is the right operand, 35 | * `type` is the unary operator, `value` is a token with type `NONE`. 36 | * 3. Operand. `type`is ID or NUM, `left` and `right` are NULL, 37 | * `value` is the token for the operand. 38 | */ 39 | class Expr { 40 | public: 41 | Expr(Type t = NONE) : left(NULL), right(NULL), value(NONE), type(t) {} 42 | Expr(const Expr &other); 43 | 44 | void setLeft(const Expr &other); 45 | void setRight(const Expr &other); 46 | // put other into the left of the 47 | // leftmost expression in the tree 48 | void setLeftMost(const Expr &other); 49 | 50 | void setValue(Token v) { 51 | value = v; 52 | } 53 | void setType(Type t) { 54 | type = t; 55 | } 56 | bool isNull() const { 57 | return type == NONE; 58 | } 59 | 60 | bool hasLeft() const { 61 | return left != NULL; 62 | } 63 | 64 | bool hasRight() const { 65 | return right != NULL; 66 | } 67 | 68 | const Expr &getRight() const { 69 | return *right; 70 | } 71 | 72 | const Expr &getLeft() const { 73 | return *left; 74 | } 75 | 76 | Type getType() const { 77 | return type; 78 | } 79 | 80 | bool isValue() const { 81 | return type == ID || type == NUM; 82 | } 83 | 84 | Token getValue() const { 85 | return value; 86 | } 87 | 88 | Expr &operator=(const Expr &rhs); 89 | friend ostream &operator<<(ostream &s, const Expr &expr); 90 | 91 | // pass in the row record and id-to-index map 92 | // e.g. eval([1,2,3], {"sid": 0, "age": 1, "name": 2}) 93 | int eval(vector record, map indexes) const; 94 | // for simple expressions i.e. no id binding 95 | int eval() const; 96 | 97 | ~Expr(); 98 | private: 99 | Expr *left; 100 | Expr *right; 101 | Token value; 102 | Type type; 103 | }; 104 | 105 | const Expr NULL_EXPR; 106 | 107 | } 108 | 109 | #endif -------------------------------------------------------------------------------- /src/IO.cpp: -------------------------------------------------------------------------------- 1 | #include "IO.h" 2 | 3 | namespace ssql { 4 | 5 | void IO::printLine(int length, char ch) { 6 | for (int i = 0; i < length; ++i) { 7 | cout << ch; 8 | } 9 | cout << '\n'; 10 | } 11 | 12 | void IO::printData(const vector &names, 13 | vector > &results) { 14 | // print header 15 | printLine(names.size() * WIDTH_PER_COL); 16 | for (const auto &name : names) { 17 | cout << "|" << setw(WIDTH_PER_COL - 1) << name; 18 | } 19 | cout << "|\n"; 20 | 21 | // print data 22 | printLine(names.size() * WIDTH_PER_COL); 23 | for (const auto &record : results) { 24 | for (auto col : record) { 25 | cout << "|" << setw(WIDTH_PER_COL - 1) << col; 26 | } 27 | cout << "|\n"; 28 | printLine(names.size() * WIDTH_PER_COL); 29 | } 30 | } 31 | 32 | void IO::loop() { 33 | while (!parser.isEnd()) { 34 | try { 35 | Type next = parser.next_stmt_type(); 36 | execute(next); 37 | } catch (LexError e) { 38 | cout << "line " << lexer.getLine() << ", "; 39 | cout << "column " << lexer.getCol() << ": "; 40 | cout << e.what() << '\n'; 41 | } catch (ParseError e) { 42 | cout << "line " << parser.getLine() << ", "; 43 | cout << "column " << parser.getCol() << ": "; 44 | cout << e.what() << '\n'; 45 | parser.consume_until_start(); 46 | } catch (DataBaseError e) { 47 | cout << "line " << parser.getLine() << ": "; 48 | cout << e.what() << '\n'; 49 | } catch (RuntimeError e) { 50 | cout << "line " << parser.getLine() << ", "; 51 | cout << e.what() << '\n'; 52 | parser.consume_until_start(); 53 | } 54 | } 55 | } 56 | 57 | void IO::execute(Type next) { 58 | if (next == CREATE) { 59 | Create create_stmt = parser.create_stmt(); 60 | engine.create(create_stmt); 61 | cout << "Created table " << create_stmt.getId() << "\n"; 62 | } else if (next == INSERT) { 63 | Insert insert_stmt = parser.insert_stmt(); 64 | int number = engine.insert(insert_stmt); 65 | cout << "Inserted " << number << " rows into table "; 66 | cout << insert_stmt.getId() << "\n"; 67 | } else if (next == DELETE) { 68 | Delete delete_stmt = parser.delete_stmt(); 69 | int number = engine.del(delete_stmt); 70 | cout << "Deleted " << number << " rows from table "; 71 | cout << delete_stmt.getId() << "\n"; 72 | } else if (next == SELECT) { 73 | Query query_stmt = parser.query_stmt(); 74 | 75 | string table_id = query_stmt.getId(); 76 | vector > results; 77 | vector names = query_stmt.getColumns(); 78 | 79 | int number = engine.query(query_stmt, results); 80 | if (number > 0) { 81 | // replace * with column names 82 | if (std::find(names.begin(), names.end(), "*") 83 | != names.end()) { 84 | names = engine.getColumns(table_id); 85 | } 86 | // print results 87 | printData(names, results); 88 | 89 | cout << number << " matching rows in "; 90 | cout << table_id << "\n"; 91 | } else { 92 | cout << "No matching rows in "; 93 | cout << table_id << "\n"; 94 | } 95 | } else if (next == END) { 96 | return; 97 | } 98 | } 99 | 100 | } -------------------------------------------------------------------------------- /src/IO.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Token.h" 5 | #include "Lexer.h" 6 | #include "Parser.h" 7 | #include "Engine.h" 8 | #include "Expr.h" 9 | 10 | #ifndef IO_H 11 | #define IO_H 12 | 13 | namespace ssql { 14 | 15 | #define WIDTH_PER_COL 15 16 | using std::cout; 17 | using std::setw; 18 | 19 | class IO { 20 | public: 21 | IO(Parser &p, Lexer &l, Engine &e) : parser(p), lexer(l), engine(e) {} 22 | // print a line of ch with given length 23 | void printLine(int length, char ch = '-'); 24 | // print the data inside given results and column names 25 | void printData(const vector &names, 26 | vector > &results); 27 | // loop to execute statements 28 | void loop(); 29 | // execute statements, output error prompts 30 | void execute(Type next); 31 | private: 32 | Parser &parser; 33 | Lexer &lexer; 34 | Engine &engine; 35 | }; 36 | 37 | } 38 | 39 | #endif -------------------------------------------------------------------------------- /src/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include "Lexer.h" 2 | 3 | namespace ssql { 4 | 5 | using std::size_t; 6 | 7 | map Lexer::words; 8 | map Lexer::singleOp; 9 | map Lexer::ops; 10 | 11 | void Lexer::initLookups() { 12 | words["create"] = CREATE; 13 | words["table"] = TABLE; 14 | words["int"] = INT; 15 | words["default"] = DEFAULT; 16 | words["primary"] = PRIMARY; 17 | words["key"] = KEY; 18 | words["insert"] = INSERT; 19 | words["into"] = INTO; 20 | words["values"] = VALUES; 21 | words["delete"] = DELETE; 22 | words["from"] = FROM; 23 | words["where"] = WHERE; 24 | words["select"] = SELECT; 25 | 26 | singleOp['+'] = PLUS; 27 | singleOp['-'] = MINUS; 28 | singleOp['*'] = MUL; 29 | singleOp['/'] = DIV; 30 | singleOp['('] = L_PAREN; 31 | singleOp[')'] = R_PAREN; 32 | singleOp[','] = COMMA; 33 | singleOp[';'] = SEMICOLON; 34 | 35 | ops["&&"] = AND; 36 | ops["||"] = OR; 37 | ops["!"] = NOT; 38 | ops["<"] = LT; 39 | ops[">"] = GT; 40 | ops["<>"] = NEQ; 41 | ops["="] = ASSIGN; 42 | ops["=="] = EQ; 43 | ops[">="] = GEQ; 44 | ops["<="] = LEQ; 45 | ops["+"] = PLUS; 46 | ops["-"] = MINUS; 47 | ops["*"] = MUL; 48 | ops["/"] = DIV; 49 | ops["("] = L_PAREN; 50 | ops[")"] = R_PAREN; 51 | ops[","] = COMMA; 52 | ops[";"] = SEMICOLON; 53 | } 54 | 55 | bool Lexer::isOp (char ch) const { 56 | return ch == '&' || ch == '|' || ch == '!' || ch == '<' 57 | || ch == '>' || ch == '=' || ch == '+' || ch == '-' 58 | || ch == '*' || ch == '/' || ch == '(' || ch == ')' 59 | || ch == ',' || ch == ';'; 60 | } 61 | 62 | Token Lexer::next() { 63 | while(!isEnd()) { 64 | if (isdigit(peek)) { // numbers 65 | num_buffer = 0; 66 | do { 67 | num_buffer = num_buffer * 10 + advance() - '0'; 68 | } while (isdigit(peek)); 69 | 70 | return Token(NUM, &num_buffer, sizeof(int)); 71 | } else if (isalpha(peek) || peek == '_') { // keywords or identifilers 72 | memset(buffer, '\0', sizeof(buffer)); 73 | int count = 0; 74 | do { 75 | buffer[count++] = advance(); 76 | } while (count < BUF_SIZE && (isalnum(peek) || peek == '_')); 77 | 78 | if (count == BUF_SIZE) { 79 | throw LexError("Exceed maximun identifier length"); 80 | } 81 | 82 | string str(buffer); 83 | for (size_t i = 0; i < str.size(); ++i) { 84 | str[i] = tolower(str[i]); // case insensitive 85 | } 86 | 87 | if (words.find(str) != words.end()) { // keyword 88 | return Token(words[str]); 89 | } else { 90 | str = buffer; // case sensitive for identifier 91 | return Token(ID, str.c_str(), str.size()); 92 | } 93 | } else if (isspace(peek)) { // white space 94 | if (advance() == '\n') { 95 | line++; 96 | col = 1; 97 | } 98 | } else if (singleOp.find(peek) != singleOp.end()) { 99 | // deterministic single character operators 100 | return Token(singleOp[advance()]); 101 | } else if (isOp(peek)) { // operators 102 | memset(buffer, '\0', sizeof(buffer)); 103 | int count = 0; 104 | do { 105 | buffer[count++] = advance(); 106 | } while (isOp(peek) && count << MAX_OP_SIZE); 107 | 108 | string str(buffer); 109 | 110 | while (str.size() != 0 && ops.find(str) == ops.end()) { // too long 111 | char temp = str[str.size() - 1]; // last charactor 112 | str = str.substr(0, str.size() - 1); 113 | retreat(temp); 114 | } 115 | 116 | if (str.size() == 0) { 117 | throw LexError("Invalid operator"); 118 | } 119 | 120 | return Token(ops[str]); 121 | } else { // error 122 | string msg = "Invalid lexeme "; 123 | msg.push_back(advance()); 124 | throw LexError(msg); 125 | } 126 | } 127 | 128 | // peek == EOF 129 | advance(); 130 | return Token(END); // $ 131 | } 132 | 133 | } -------------------------------------------------------------------------------- /src/Lexer.h: -------------------------------------------------------------------------------- 1 | #include "Token.h" 2 | 3 | #include 4 | #include 5 | 6 | #ifndef LEXER_H 7 | #define LEXER_H 8 | 9 | namespace ssql { 10 | 11 | using std::istream; 12 | using std::string; 13 | using std::map; 14 | 15 | #define BUF_SIZE 256 16 | #define MAX_OP_SIZE 2 17 | 18 | class LexError : std::exception { 19 | public: 20 | LexError(string _msg) : msg(_msg) {} 21 | ~LexError() throw () {} // Updated 22 | const char* what() const throw() { return msg.c_str(); } 23 | private: 24 | string msg; 25 | }; 26 | 27 | class Lexer { 28 | public: 29 | Lexer(istream &s) : stream(s) { 30 | line = 1; 31 | col = 1; 32 | initLookups(); 33 | } 34 | 35 | bool isOp (char ch) const; 36 | 37 | Token next(); 38 | 39 | int getCol() const { 40 | return col; 41 | } 42 | 43 | int getLine() const { 44 | return line; 45 | } 46 | 47 | // can't be read anymore i.e. peek == EOF 48 | bool isEnd() { 49 | peek = stream.peek(); 50 | return peek == EOF; 51 | } 52 | 53 | // advance in the stream, return the read character 54 | char advance() { 55 | char last = stream.get(); 56 | peek = stream.peek(); 57 | col++; // record column 58 | return last; 59 | } 60 | 61 | void retreat(char ch) { // put ch back to stream 62 | stream.putback(ch); 63 | col--; // record column 64 | peek = ch; 65 | } 66 | 67 | static map words; // keywords 68 | // operators that can be determined by one character 69 | static map singleOp; 70 | static map ops; // other operators 71 | private: 72 | static void initLookups(); // init the maps 73 | 74 | char peek; // next character to read by advance() 75 | char buffer[BUF_SIZE]; // for keywords, operators etc. 76 | int num_buffer; // for accumulating numbers 77 | int line; // line number 78 | int col; // column number 79 | istream &stream; // stream for lexer 80 | }; 81 | 82 | } 83 | 84 | #endif -------------------------------------------------------------------------------- /src/Parser.cpp: -------------------------------------------------------------------------------- 1 | #include "Parser.h" 2 | 3 | namespace ssql { 4 | 5 | using std::pair; 6 | 7 | Type Parser::next_stmt_type() { 8 | if (match(CREATE) || match(INSERT) 9 | || match(DELETE) || match(SELECT)) { 10 | return lookahead.getType(); 11 | } else if (match(END)) { 12 | return END; 13 | } else { 14 | throw ParseError(string("Syntax error: statments should start with") 15 | + string(" CREATE, INSERT, DELETE or SELECT")); 16 | } 17 | } 18 | 19 | Create Parser::create_stmt() { 20 | if (match(CREATE)) { 21 | // create_stmt -> CREATE TABLE id L_PAREN decl_list 22 | // R_PAREN SEMICOLON 23 | cosume(CREATE); 24 | cosume(TABLE); 25 | string table_id = id(); 26 | cosume(L_PAREN); 27 | multimap defaults; 28 | vector > keys; 29 | decl_list(defaults, keys); 30 | cosume(R_PAREN); 31 | cosume(SEMICOLON); 32 | return Create(table_id, defaults, keys); 33 | } else { 34 | throw ParseError("Syntax error"); 35 | } 36 | } 37 | 38 | Parser &Parser::decl_list(multimap &defaults, 39 | vector > &keys) { 40 | if (match(ID) || match(PRIMARY)) { 41 | // decl_list -> decl _decl_list 42 | decl(defaults, keys); 43 | _decl_list(defaults, keys); 44 | } else { 45 | throw ParseError("Syntax error"); 46 | } 47 | return *this; 48 | } 49 | 50 | Parser &Parser::_decl_list(multimap &defaults, 51 | vector > &keys) { 52 | if (match(COMMA)) { 53 | // _decl_list -> COMMA decl _decl_list 54 | cosume(COMMA); 55 | decl(defaults, keys); 56 | _decl_list(defaults, keys); 57 | } else if (match(R_PAREN)) { 58 | ; // _decl_list -> epsilon 59 | } else { 60 | throw ParseError("Syntax error"); 61 | } 62 | return *this; 63 | } 64 | 65 | Parser &Parser::decl(multimap &defaults, 66 | vector > &keys) { 67 | if (match(ID)) { 68 | // decl -> id INT default_spec 69 | string name = id(); 70 | cosume(INT); 71 | int number = default_spec(); 72 | defaults.insert(std::pair(name, number)); 73 | } else if (match(PRIMARY)) { 74 | // decl -> PRIMARY KEY L_PAREN column_list R_PAREN 75 | cosume(PRIMARY); 76 | cosume(KEY); 77 | cosume(L_PAREN); 78 | vector columns; 79 | column_list(columns); 80 | keys.push_back(columns); 81 | cosume(R_PAREN); 82 | } else { 83 | throw ParseError("Syntax error"); 84 | } 85 | return *this; 86 | } 87 | 88 | int Parser::default_spec() { 89 | if (match(DEFAULT)) { 90 | // default_spec -> DEFAULT ASSIGN expr[true] 91 | cosume(DEFAULT); 92 | cosume(ASSIGN); 93 | Expr spec = expr(true); 94 | #ifdef TRACK 95 | std::cout << '\n' << spec << '\n'; 96 | #endif 97 | return spec.eval(); 98 | } else if (match(COMMA) || match(R_PAREN)) { 99 | // default_spec -> epsilon 100 | return 0; // if no default, default to zero 101 | } else { 102 | throw ParseError("Syntax error"); 103 | } 104 | } 105 | 106 | Parser &Parser::column_list(vector &names) { 107 | if (match(ID)) { 108 | // column_list -> id _column_list 109 | names.push_back(id()); 110 | _column_list(names); 111 | } else { 112 | throw ParseError("Syntax error"); 113 | } 114 | return *this; 115 | } 116 | 117 | Parser &Parser::_column_list(vector &names) { 118 | if (match(COMMA)) { 119 | // _column_list -> COMMA id _column_list 120 | cosume(COMMA); 121 | names.push_back(id()); 122 | _column_list(names); 123 | } else if (match(FROM) || match(R_PAREN)) { 124 | ; // _column_list -> epsilon 125 | } else { 126 | throw ParseError("Syntax error"); 127 | } 128 | return *this; 129 | } 130 | 131 | Insert Parser::insert_stmt() { 132 | if (match(INSERT)) { 133 | // insert_stmt -> INSERT INTO id L_PAREN column_list R_PAREN 134 | // VALUES L_PAREN value_list R_PAREN SEMICOLON 135 | cosume(INSERT); 136 | cosume(INTO); 137 | string table_id = id(); 138 | cosume(L_PAREN); 139 | vector columns; 140 | column_list(columns); 141 | cosume(R_PAREN); 142 | cosume(VALUES); 143 | cosume(L_PAREN); 144 | vector values; 145 | value_list(values); 146 | cosume(R_PAREN); 147 | cosume(SEMICOLON); 148 | return Insert(table_id, columns, values); 149 | } else { 150 | throw ParseError("Syntax error"); 151 | } 152 | } 153 | 154 | Parser &Parser::value_list(vector &values) { 155 | if (match(PLUS) || match(MINUS) 156 | || match(NUM) || match(L_PAREN)) { 157 | // value_list -> expr[true] _value_list 158 | Expr value = expr(true); 159 | #ifdef TRACK 160 | std::cout << '\n' << value << '\n'; 161 | #endif 162 | values.push_back(value.eval()); 163 | _value_list(values); 164 | } else { 165 | throw ParseError("Syntax error"); 166 | } 167 | return *this; 168 | } 169 | 170 | Parser &Parser::_value_list(vector &values) { 171 | if (match(COMMA)) { 172 | // _value_list -> COMMA expr[true] _value_list 173 | cosume(COMMA); 174 | Expr value = expr(true); 175 | #ifdef TRACK 176 | std::cout << '\n' << value << '\n'; 177 | #endif 178 | values.push_back(value.eval()); 179 | _value_list(values); 180 | } else if (match(R_PAREN)) { 181 | ; // _value_list -> epsilon 182 | } else { 183 | throw ParseError("Syntax error"); 184 | } 185 | return *this; 186 | } 187 | 188 | Delete Parser::delete_stmt() { 189 | if (match(DELETE)) { 190 | // delete_stmt -> DELETE FROM id where_clause SEMICOLON 191 | cosume(DELETE); 192 | cosume(FROM); 193 | string table_id = id(); 194 | Expr where = where_clause(); 195 | cosume(SEMICOLON); 196 | return Delete(table_id, where); 197 | } else { 198 | throw ParseError("Syntax error"); 199 | } 200 | } 201 | 202 | Expr Parser::where_clause() { 203 | if (match(WHERE)) { 204 | // where_clause -> WHERE disjunct 205 | cosume(WHERE); 206 | return disjunct(); 207 | } else if (match(SEMICOLON)) { 208 | return NULL_EXPR; // where_clause -> epsilon 209 | } else { 210 | throw ParseError("Syntax error"); 211 | } 212 | } 213 | 214 | Expr Parser::disjunct() { 215 | if (match(L_PAREN) || match(NOT) 216 | || match(PLUS) || match(MINUS) 217 | || match(NUM) || match(ID)) { 218 | // disjunct -> conjunct _disjunct 219 | Expr temp = conjunct(); 220 | Expr test = _disjunct(); 221 | if (test.isNull()) { // test lack a left node 222 | return temp; 223 | } else { 224 | test.setLeftMost(temp); 225 | return test; 226 | } 227 | } else { 228 | throw ParseError("Syntax error when maching disjunct"); 229 | } 230 | } 231 | 232 | 233 | Expr Parser::_disjunct() { 234 | if (match(OR)) { 235 | // _disjunct -> OR conjunct _disjunct 236 | cosume(OR); 237 | Expr root(OR); 238 | 239 | Expr temp = conjunct(); 240 | Expr test = _disjunct(); 241 | if (test.isNull()) { 242 | root.setRight(temp); 243 | return root; 244 | } else { 245 | root.setRight(temp); 246 | test.setLeft(root); 247 | return test; 248 | } 249 | } else if (match(SEMICOLON) || match(R_PAREN)) { 250 | return NULL_EXPR; // _disjunct-> epsilon 251 | } else { 252 | throw ParseError("Syntax error when matching disjunct"); 253 | } 254 | } 255 | 256 | 257 | Expr Parser::conjunct() { 258 | if (match(L_PAREN) || match(NOT) 259 | || match(PLUS) || match(MINUS) 260 | || match(NUM) || match(ID)) { 261 | // conjunct -> bool _conjunct 262 | Expr temp = boolean(); 263 | Expr test = _conjunct(); 264 | if (test.isNull()) { // test lack a left node 265 | return temp; 266 | } else { 267 | test.setLeftMost(temp); 268 | return test; 269 | } 270 | } else { 271 | throw ParseError("Syntax error when matching conjunct"); 272 | } 273 | } 274 | 275 | Expr Parser::_conjunct() { 276 | if (match(AND)) { 277 | // _conjunct -> AND bool _conjunct 278 | cosume(AND); 279 | Expr root(AND); 280 | 281 | Expr temp = boolean(); 282 | Expr test = _conjunct(); 283 | 284 | if (test.isNull()) { 285 | root.setRight(temp); 286 | return root; 287 | } else { 288 | root.setRight(temp); 289 | test.setLeft(root); 290 | return test; 291 | } 292 | } else if (match(OR) || match(SEMICOLON) 293 | || match(R_PAREN)) { 294 | return NULL_EXPR; // _conjunct -> epsilon 295 | } else { 296 | throw ParseError("Syntax error when matching conjunct"); 297 | } 298 | } 299 | 300 | Expr Parser::boolean() { 301 | if (match(NUM) || match(ID) 302 | || match(PLUS) || match(MINUS)) { 303 | // bool -> comp 304 | return comp(); 305 | } else if (match(L_PAREN)) { 306 | // bool -> L_PAREN disjunct R_PAREN 307 | cosume(L_PAREN); 308 | Expr temp = disjunct(); 309 | cosume(R_PAREN); 310 | return temp; 311 | } else if (match(NOT)) { 312 | // bool -> NOT bool 313 | cosume(NOT); 314 | Expr temp(NOT); 315 | temp.setRight(boolean()); 316 | return temp; 317 | } else { 318 | throw ParseError("Syntax error when matching boolean"); 319 | } 320 | } 321 | 322 | 323 | Expr Parser::comp() { 324 | if (match(NUM) || match(ID) 325 | || match(PLUS) || match(MINUS)) { 326 | Expr temp; 327 | temp.setLeft(expr(false)); 328 | temp.setType(rop()); 329 | temp.setRight(expr(false)); 330 | return temp; 331 | } else { 332 | throw ParseError("Syntax error when maching comparisons"); 333 | } 334 | } 335 | 336 | Expr Parser::expr(bool simple) { 337 | if (match(NUM) || match(PLUS) || match(MINUS) 338 | || (simple && match(L_PAREN)) || (!simple && match(ID))) { 339 | // expr[] -> term[] _expr[] 340 | Expr temp = term(simple); 341 | Expr test = _expr(simple); 342 | if (test.isNull()) { // test lack a left node 343 | return temp; 344 | } else { 345 | test.setLeftMost(temp); 346 | return test; 347 | } 348 | } else { 349 | throw ParseError("Syntax error when matching expressions"); 350 | } 351 | } 352 | 353 | Expr Parser::_expr(bool simple) { 354 | if (match(PLUS) || match(MINUS)) { 355 | // _expr[] -> PLUS term[] _expr[] 356 | // _expr[] -> MINUS term[] _expr[] 357 | Type op = lookahead.getType(); 358 | cosume(op); 359 | Expr root(op); 360 | 361 | Expr temp = term(simple); 362 | Expr test = _expr(simple); 363 | if (test.isNull()) { 364 | root.setRight(temp); 365 | return root; 366 | } else { 367 | root.setRight(temp); 368 | test.setLeft(root); 369 | return test; 370 | } 371 | } else if (simple && (match(COMMA) || match(R_PAREN))) { 372 | return NULL_EXPR; // _expr[] -> epsilon 373 | } else if (!simple && (match(NEQ) || match(EQ) 374 | || match(LT) || match(GT) || match(LEQ) 375 | || match(GEQ) || match(AND) || match(OR) 376 | || match(SEMICOLON) || match(R_PAREN))) { 377 | return NULL_EXPR; // _expr[] -> epsilon 378 | } else { 379 | throw ParseError("Syntax error when matching expressions"); 380 | } 381 | } 382 | 383 | Expr Parser::term(bool simple) { 384 | if (match(NUM) || match(PLUS) || match(MINUS) 385 | || (simple && match(L_PAREN)) || (!simple && match(ID))) { 386 | // term[] -> unary[] _term[] 387 | Expr temp = unary(simple); 388 | Expr test = _term(simple); 389 | if (test.isNull()) { // test lack a left node 390 | return temp; 391 | } else { 392 | test.setLeftMost(temp); 393 | return test; 394 | } 395 | } else { 396 | throw ParseError("Syntax error when matching terms"); 397 | } 398 | } 399 | 400 | 401 | Expr Parser::_term(bool simple) { 402 | if (match(MUL) || match(DIV)) { 403 | // _term[] -> MUL unary[] _term[] 404 | // _term[] -> DIV unary[] _term[] 405 | Type op = lookahead.getType(); 406 | cosume(op); 407 | Expr root(op); 408 | 409 | Expr temp = unary(simple); 410 | Expr test = _term(simple); 411 | if (test.isNull()) { 412 | root.setRight(temp); 413 | return root; 414 | } else { 415 | root.setRight(temp); 416 | test.setLeft(root); 417 | return test; 418 | } 419 | } else if (match(PLUS) || match(MINUS)) { 420 | return NULL_EXPR; // _term[] -> epsilon 421 | } else if (simple && (match(COMMA) || match(R_PAREN))) { 422 | return NULL_EXPR; // _term[] -> epsilon 423 | } else if (!simple && (match(NEQ) || match(EQ) 424 | || match(LT) || match(GT) || match(LEQ) 425 | || match(GEQ) || match(AND) || match(OR) 426 | || match(SEMICOLON) || match(R_PAREN))) { 427 | return NULL_EXPR; // _term[]-> epsilon 428 | } else { 429 | throw ParseError("Syntax error when matching terms"); 430 | } 431 | } 432 | 433 | Expr Parser::unary(bool simple) { 434 | if (match(PLUS) || match(MINUS)) { 435 | // unary[] -> PLUS unary[] 436 | // unary[] -> MINUS unary[] 437 | Type op = lookahead.getType(); 438 | cosume(op); 439 | Expr root(op); 440 | root.setRight(unary(simple)); 441 | return root; 442 | } else if (match(NUM)) { 443 | // unary[] -> num 444 | int result = num(); 445 | Expr temp(NUM); 446 | temp.setValue(Token(NUM, &result, sizeof(result))); 447 | return temp; 448 | } else if (!simple && match(ID)) { 449 | // unary[false] -> id 450 | string result = id(); 451 | Expr temp(ID); 452 | temp.setValue(Token(ID, result.c_str(), result.size())); 453 | return temp; 454 | } else if (simple && match(L_PAREN)) { 455 | // unary[true] -> L_PAREN expr[true] R_PAREN 456 | cosume(L_PAREN); 457 | Expr temp = expr(true); 458 | cosume(R_PAREN); 459 | return temp; 460 | } else { 461 | throw ParseError("Syntax error when matching unary"); 462 | } 463 | } 464 | 465 | Type Parser::rop() { 466 | if (match(NEQ) || match(EQ) 467 | || match(LT) || match(GT) 468 | || match(LEQ) || match(GEQ)) { 469 | Type result = lookahead.getType(); 470 | cosume(result); 471 | return result; 472 | } else { 473 | throw ParseError("Syntax error when matching relational operators"); 474 | } 475 | } 476 | 477 | Query Parser::query_stmt() { 478 | if (match(SELECT)) { 479 | // query_stmt -> SELECT select_list FROM id where_clause SEMICOLON 480 | cosume(SELECT); 481 | vector columns; 482 | select_list(columns); 483 | cosume(FROM); 484 | string table_id = id(); 485 | Expr where = where_clause(); 486 | cosume(SEMICOLON); 487 | return Query(table_id, columns, where); 488 | } else { 489 | throw ParseError("Syntax error"); 490 | } 491 | } 492 | 493 | Parser &Parser::select_list(vector &columns) { 494 | if (match(MUL)) { 495 | // select_list -> MUL 496 | columns.push_back("*"); 497 | cosume(MUL); 498 | } else if (match(ID)) { 499 | // select_list -> column_list 500 | column_list(columns); 501 | } else { 502 | throw ParseError("Syntax error"); 503 | } 504 | return *this; 505 | } 506 | 507 | string Parser::id() { 508 | if (match(ID)) { 509 | string result = lookahead.getId(); 510 | advance(); 511 | return result; 512 | } else { 513 | throw ParseError("Syntax error: Expected ID"); 514 | } 515 | } 516 | 517 | int Parser::num() { 518 | if (match(NUM)) { 519 | int result = lookahead.getNumber(); 520 | advance(); 521 | return result; 522 | } else { 523 | throw ParseError("Syntax error: Expected number"); 524 | } 525 | } 526 | 527 | Parser &Parser::cosume(Type t) { 528 | if (match(t)) { 529 | advance(); 530 | } else { 531 | throw ParseError("Syntax error"); 532 | } 533 | return *this; 534 | } 535 | 536 | void Parser::advance() { 537 | #ifdef TRACK 538 | std::cout << lookahead << ' '; 539 | if (lookahead == SEMICOLON) 540 | std::cout << '\n'; 541 | #endif 542 | if (lookahead == SEMICOLON && !delay) { 543 | delay = true; 544 | } else { 545 | col = lexer.getCol(); 546 | line = lexer.getLine(); 547 | lookahead = lexer.next(); 548 | delay = false; 549 | } 550 | } 551 | 552 | } -------------------------------------------------------------------------------- /src/Parser.h: -------------------------------------------------------------------------------- 1 | #include "Lexer.h" 2 | #include "Expr.h" 3 | #include "Statements.h" 4 | 5 | #ifndef PARSER_H 6 | #define PARSER_H 7 | 8 | namespace ssql { 9 | 10 | class ParseError : std::exception { 11 | public: 12 | ParseError(string _msg) : msg(_msg) {} 13 | ~ParseError() throw () {} // Updated 14 | const char *what() const throw() { 15 | return msg.c_str(); 16 | } 17 | private: 18 | string msg; 19 | }; 20 | 21 | /* Predictive parser. 22 | * 23 | * Use RVO instead of move semantics for performance. 24 | * Usage: 25 | * Parser parser(lexer); // get a lexer for it first. 26 | * // type of next statement. CREATE/DELETE/INSERT/SELECT 27 | * Type next_type = parser.next_stmt_type(); 28 | * if (next_type == CREATE) { // check the type first 29 | * Create stmt = parser.create_stmt(); // get the statment 30 | * } // and so forth... 31 | */ 32 | class Parser { 33 | public: 34 | Parser(Lexer &l) : lexer(l) { 35 | col = lexer.getCol(); 36 | line = lexer.getLine(); 37 | lookahead = lexer.next(); 38 | delay = false; 39 | } 40 | 41 | Type next_stmt_type(); // head of next statement 42 | 43 | Create create_stmt(); 44 | Parser &decl_list(multimap &defaults, vector > &keys); 45 | Parser &_decl_list(multimap &defaults, vector > &keys); 46 | Parser &decl(multimap &defaults, vector > &keys); 47 | int default_spec(); 48 | Parser &column_list(vector &names); 49 | Parser &_column_list(vector &names); 50 | 51 | Insert insert_stmt(); 52 | Parser &value_list(vector &values); 53 | Parser &_value_list(vector &values); 54 | 55 | Delete delete_stmt(); 56 | // RVO is heavily used here 57 | Expr where_clause(); 58 | Expr disjunct(); 59 | Expr _disjunct(); 60 | Expr conjunct(); 61 | Expr _conjunct(); 62 | Expr boolean(); 63 | Expr comp(); 64 | Expr expr(bool simple); 65 | Expr _expr(bool simple); 66 | Expr term(bool simple); 67 | Expr _term(bool simple); 68 | Expr unary(bool simple); 69 | 70 | Type rop(); 71 | 72 | Query query_stmt(); 73 | Parser &select_list(vector &columns); 74 | 75 | // terminals 76 | string id(); 77 | int num(); 78 | Parser &cosume(Type t); // cosume a keyword/operator 79 | 80 | // Use this to move the lookahead and record the col/line. 81 | // Don't call lexer.next() by hand. 82 | void advance(); 83 | 84 | // next lookahead is END 85 | bool isEnd() const { 86 | return lookahead == END; 87 | } 88 | 89 | int getCol() const { 90 | return col; 91 | } 92 | 93 | int getLine() const { 94 | return line; 95 | } 96 | 97 | bool match(Type t) { 98 | if (delay) { 99 | advance(); 100 | } 101 | return lookahead == t; 102 | } 103 | 104 | Parser &consume_until_start() { 105 | while (!match(CREATE) && !match(INSERT) && !match(DELETE) 106 | && !match(SELECT) && !match(END)) { 107 | advance(); 108 | } 109 | return *this; 110 | } 111 | 112 | ~Parser() {} 113 | 114 | private: 115 | Token lookahead; 116 | Lexer &lexer; 117 | int col; // column number 118 | int line; // line number 119 | bool delay; 120 | }; 121 | 122 | } 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /src/Statements.cpp: -------------------------------------------------------------------------------- 1 | #include "Statements.h" -------------------------------------------------------------------------------- /src/Statements.h: -------------------------------------------------------------------------------- 1 | #include "Expr.h" 2 | 3 | #include 4 | using std::multimap; 5 | using std::multiset; 6 | 7 | #ifndef STATEMENTS_H 8 | #define STATEMENTS_H 9 | 10 | namespace ssql{ 11 | 12 | class Statement { 13 | public: 14 | Statement(string _id) : id(_id) {} 15 | string getId() const { return id; } 16 | private: 17 | string id; 18 | }; 19 | 20 | /** 21 | * Create statement. Consists of id, defaults and keys. 22 | */ 23 | class Create : public Statement { 24 | public: 25 | Create(const string _id, const multimap &_default_spec, 26 | const vector > &_keys) : 27 | Statement(_id), 28 | default_spec(_default_spec), 29 | keys(_keys) {} 30 | 31 | const multimap &getDefaults() const { 32 | return default_spec; 33 | } 34 | 35 | const vector > &getKeys() const { 36 | return keys; 37 | } 38 | 39 | private: 40 | multimap default_spec; 41 | vector > keys; 42 | }; 43 | 44 | /** 45 | * Insert statement. Consists of id, columns and values(paired with index). 46 | */ 47 | class Insert : public Statement { 48 | public: 49 | Insert(const string _id, const vector &_columns, 50 | const vector &_values) : 51 | Statement(_id), 52 | columns(_columns), 53 | values(_values) {} 54 | 55 | const vector &getColumns() const { 56 | return columns; 57 | } 58 | 59 | const vector &getValues() const { 60 | return values; 61 | } 62 | 63 | private: 64 | vector columns; 65 | vector values; 66 | }; 67 | 68 | /** 69 | * Delete statement. Consists of id and a where expression. 70 | */ 71 | class Delete : public Statement { 72 | public: 73 | Delete(const string _id, const Expr &_where) 74 | : Statement(_id), where(_where) {} 75 | const Expr &getWhere() const { 76 | return where; 77 | } 78 | private: 79 | Expr where; 80 | }; 81 | 82 | /** 83 | * Query statement. Consists of id, columns, and a where expression. 84 | */ 85 | class Query : public Statement { 86 | public: 87 | Query(const string _id, const vector _columns, 88 | const Expr &_where) 89 | : Statement(_id), columns(_columns), where(_where) {} 90 | const vector &getColumns() const { 91 | return columns; 92 | } 93 | const Expr &getWhere() const { 94 | return where; 95 | } 96 | private: 97 | vector columns; // can contain '*' 98 | Expr where; 99 | }; 100 | 101 | } 102 | #endif -------------------------------------------------------------------------------- /src/Table.cpp: -------------------------------------------------------------------------------- 1 | #include "Table.h" 2 | 3 | namespace ssql { 4 | 5 | using std::size_t; 6 | 7 | Table::Table(const string table_id, const map defs, 8 | const vector primary) 9 | : id(table_id), keys(primary.begin(), primary.end()) { 10 | int counter = 0; 11 | for (auto it = defs.begin(); it != defs.end(); ++it) { 12 | Scheme new_scheme = Scheme(it->first, it->second, 13 | keys.find(it->first) != keys.end()); 14 | indexes[it->first] = counter++; 15 | schema.push_back(new_scheme); 16 | columns.push_back(it->first); 17 | } 18 | } 19 | 20 | Table &Table::insert(const vector cols, 21 | const vector values) { 22 | 23 | // check no primary key constraint violation 24 | set colset(cols.begin(), cols.end()); 25 | for (const auto &key : keys) { 26 | if (colset.find(key) == colset.end()) { 27 | throw DataBaseError(string("Key ") + key + string(" not found")); 28 | } 29 | } 30 | 31 | // all columns should be in the schema of the table 32 | for(auto col : colset) { 33 | if (std::find(columns.begin(), columns.end(), col) == columns.end()) { 34 | throw DataBaseError(string("Column ") + col 35 | + string(" is not in the schema")); 36 | } 37 | } 38 | 39 | vector new_record(columns.size()); 40 | // fill in default data 41 | for (size_t i = 0; i < columns.size(); ++i) { 42 | new_record[i] = schema[i].def; 43 | } 44 | 45 | // fill in available data 46 | for (size_t i = 0; i < cols.size(); ++i) { 47 | int index = indexes[cols[i]]; 48 | new_record[index] = values[i]; 49 | } 50 | 51 | // check not already exists 52 | for (const auto &record : data) { 53 | if (conflict(record, new_record)) { 54 | throw DataBaseError(string("Record already exists")); 55 | } 56 | } 57 | 58 | data.push_back(new_record); 59 | return *this; 60 | } 61 | 62 | int Table::del(const Expr expr) { 63 | if (data.size() <= 0) { 64 | return 0; 65 | } 66 | 67 | auto it = data.begin(); 68 | int count = 0; 69 | 70 | // columns occurring in the where clause (if any) 71 | // should be in the schema of the table 72 | while (it != data.end()) { 73 | if (expr.eval(*it, indexes)) { 74 | it = data.erase(it); 75 | count++; 76 | } else { 77 | it++; 78 | } 79 | } 80 | 81 | return count; 82 | } 83 | 84 | int Table::query(const vector &names, const Expr expr, 85 | vector > &results) const { 86 | int count = 0; 87 | if (data.size() <= 0) { 88 | return 0; 89 | } 90 | 91 | // get queried columns 92 | if (std::find(names.begin(), names.end(), "*") != names.end()) { // all 93 | for (const auto &record : data) { 94 | if (expr.eval(record, indexes)) { 95 | // columns occurring in the where clause (if any) 96 | // should be in the schema of the table 97 | results.push_back(record); 98 | count++; 99 | } 100 | } 101 | return count; 102 | } else { // selected columns 103 | // all columns (except *) in the select list should be in 104 | // the schema of the table 105 | for(const auto &name : names) { 106 | if (std::find(columns.begin(), columns.end(), name) 107 | == columns.end()) { 108 | throw DataBaseError(string("Column ") + name 109 | + string(" is not in the schema")); 110 | } 111 | } 112 | 113 | // use int index for reordering 114 | vector query_indexes; 115 | for (const auto &name : names) { 116 | auto it = indexes.find(name); 117 | query_indexes.push_back(it->second); 118 | } 119 | 120 | for (const auto &record : data) { 121 | if (expr.eval(record, indexes)) { 122 | // columns occurring in the where clause (if any) 123 | // should be in the schema of the table 124 | vector reordered; 125 | for (const auto &idx : query_indexes) { 126 | reordered.push_back(record[idx]); 127 | } 128 | results.push_back(reordered); 129 | count++; 130 | } 131 | } 132 | 133 | return count; 134 | } 135 | 136 | } 137 | 138 | bool Table::conflict(const vector old_record, 139 | const vector new_record) const { 140 | for (const auto &key: keys) { 141 | auto it = indexes.find(key); 142 | int idx = it->second; 143 | if (old_record[idx] != new_record[idx]) { 144 | return false; 145 | } 146 | } 147 | return true; 148 | } 149 | 150 | } -------------------------------------------------------------------------------- /src/Table.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "Expr.h" 7 | 8 | #ifndef TABLE_H 9 | #define TABLE_H 10 | 11 | namespace ssql { 12 | 13 | using std::set; 14 | using std::map; 15 | using std::string; 16 | using std::vector; 17 | using std::to_string; 18 | 19 | class DataBaseError : std::exception { 20 | public: 21 | DataBaseError(string _msg) : msg(_msg) {} 22 | ~DataBaseError() throw () {} // Updated 23 | const char *what() const throw() { 24 | return msg.c_str(); 25 | } 26 | private: 27 | string msg; 28 | }; 29 | 30 | /** 31 | * Unit of schema. 32 | */ 33 | struct Scheme { 34 | string name; 35 | int def; 36 | bool is_key; 37 | Scheme(string _name, int _def, bool _is_key) 38 | : name(_name), def(_def), is_key(_is_key) {} 39 | }; 40 | 41 | /** 42 | * Responsible for transactions. 43 | * Perform semantics analysis that needs the knowledge of its schema. 44 | */ 45 | class Table { 46 | public: 47 | Table() {} 48 | // create table with id, default mappins, primary keys 49 | Table(const string table_id, const map defs, 50 | const vector primary); 51 | // insert values 52 | Table &insert(const vector cols, const vector values); 53 | // delete rows satisfying given expressions 54 | int del(const Expr expr); 55 | // query rows satisfying given expressions 56 | int query(const vector &names, const Expr expr, 57 | vector > &results) const; 58 | // get column names of this table 59 | const vector &getColumns() const { 60 | return columns; 61 | } 62 | // check key conflicts for two records 63 | bool conflict(const vector old_record, 64 | const vector new_record) const; 65 | 66 | ~Table() {} 67 | private: 68 | string id; 69 | 70 | set keys; // primary keys 71 | map indexes; // string names to indexes 72 | 73 | // with same order 74 | vector schema; 75 | vector columns; // column names 76 | vector > data; 77 | }; 78 | 79 | } 80 | 81 | #endif -------------------------------------------------------------------------------- /src/Token.cpp: -------------------------------------------------------------------------------- 1 | #include "Token.h" 2 | 3 | namespace ssql { 4 | 5 | map Token::name; 6 | 7 | Token::Token(Type _type, const void *raw, const int size) { 8 | if (isValue(_type)) { // number or identifier 9 | if (raw == NULL || size == 0) { 10 | throw TokenError("Expected non-keyword"); 11 | } else { 12 | type = _type; 13 | 14 | real_size = size / sizeof(char); 15 | if (type == ID) { 16 | real_size++; // '\0' 17 | } 18 | 19 | data = new char[real_size]; 20 | memcpy(data, (char *)raw, real_size); 21 | } 22 | } else { // keyword or operator 23 | type = _type; 24 | data = NULL; 25 | real_size = 0; 26 | } 27 | } 28 | 29 | Token::Token(const Token &other) { 30 | type = other.type; 31 | if (isValue(type)) { // number or identifier 32 | real_size = other.real_size; 33 | data = new char[real_size]; 34 | memcpy(data, other.data, real_size); 35 | } else { // keyword or operator 36 | real_size = 0; 37 | data = NULL; 38 | } 39 | } 40 | 41 | void Token::initNameMap() { 42 | name[ID] = "ID"; 43 | name[NUM] = "NUM"; 44 | name[CREATE] = "CREATE"; 45 | name[TABLE] = "TABLE"; 46 | name[INT] = "INT"; 47 | name[DEFAULT] = "DEFAULT"; 48 | name[PRIMARY] = "PRIMARY"; 49 | name[KEY] = "KEY"; 50 | name[INSERT] = "INSERT"; 51 | name[INTO] = "INTO"; 52 | name[VALUES] = "VALUES"; 53 | name[DELETE] = "DELETE"; 54 | name[FROM] = "FROM"; 55 | name[WHERE] = "WHERE"; 56 | name[SELECT] = "SELECT"; 57 | name[ASSIGN] = "ASSIGN"; 58 | name[LT] = "LT"; 59 | name[GT] = "GT"; 60 | name[NEQ] = "NEQ"; 61 | name[EQ] = "EQ"; 62 | name[GEQ] = "GEQ"; 63 | name[LEQ] = "LEQ"; 64 | name[PLUS] = "PLUS"; 65 | name[MINUS] = "MINUS"; 66 | name[MUL] = "MUL"; 67 | name[DIV] = "DIV"; 68 | name[AND] = "AND"; 69 | name[OR] = "OR"; 70 | name[NOT] = "NOT"; 71 | name[L_PAREN] = "L_PAREN"; 72 | name[R_PAREN] = "R_PAREN"; 73 | name[COMMA] = "COMMA"; 74 | name[SEMICOLON] = "SEMICOLON"; 75 | name[END] = "END"; 76 | name[NONE] = "NONE"; 77 | } 78 | 79 | // identifiers 80 | string Token::getId() const { 81 | if (type != ID) { 82 | throw TokenError("Expected identifer, get otherwise"); 83 | } else { 84 | return string(data); 85 | } 86 | } 87 | 88 | // keywords 89 | Type Token::getKeyword() const { 90 | if (isValue(type)) { 91 | throw TokenError("Expected keywords, get otherwise"); 92 | } else { 93 | return type; 94 | } 95 | } 96 | 97 | // numbers 98 | int Token::getNumber() const { 99 | if (type != NUM) { 100 | throw TokenError("Expected number, get otherwise"); 101 | } else { 102 | int result = 0; 103 | memcpy(&result, data, sizeof(int)); 104 | return result; 105 | } 106 | } 107 | 108 | ostream &operator<<(ostream &s, const Token &token) { 109 | if (Token::name.size() == 0) { 110 | Token::initNameMap(); 111 | } 112 | 113 | Type type = token.type; 114 | string token_name = Token::name[type]; 115 | 116 | if (type == ID) { 117 | s << "(" << token_name << ", " << token.getId() << ")"; 118 | } else if (type == NUM) { 119 | s << "(" << token_name << ", " << token.getNumber() << ")"; 120 | } else { 121 | s << token_name; 122 | } 123 | return s; 124 | } 125 | 126 | Token &Token::operator=(const Token &rhs) { 127 | type = rhs.type; 128 | 129 | // release old data 130 | if (data != NULL) { 131 | delete [] data; 132 | data = NULL; 133 | real_size = 0; 134 | } 135 | 136 | if (isValue(type)) { // number or identifier 137 | real_size = rhs.real_size; 138 | data = new char[real_size]; 139 | memcpy(data, rhs.data, real_size); 140 | } else { // keyword or operator 141 | real_size = 0; 142 | data = NULL; 143 | } 144 | return *this; 145 | } 146 | 147 | Token::~Token() { 148 | if (data != NULL) { 149 | delete [] data; 150 | data = NULL; 151 | } 152 | } 153 | 154 | } -------------------------------------------------------------------------------- /src/Token.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef TOKEN_H 8 | #define TOKEN_H 9 | 10 | namespace ssql { 11 | 12 | using std::string; 13 | using std::ostream; 14 | using std::map; 15 | 16 | // keywords and operators 17 | enum Type { 18 | ID, NUM, CREATE, TABLE, INT, DEFAULT, PRIMARY, KEY, INSERT, 19 | INTO, VALUES, DELETE, FROM, WHERE, SELECT, ASSIGN, LT, GT, NEQ, EQ, 20 | GEQ, LEQ, PLUS, MINUS, MUL, DIV, AND, OR, NOT, L_PAREN, R_PAREN, 21 | COMMA, SEMICOLON, END, NONE 22 | }; 23 | 24 | class TokenError : std::exception { 25 | public: 26 | TokenError(string _msg) : msg(_msg) {} 27 | ~TokenError() throw () {} // Updated 28 | const char* what() const throw() { return msg.c_str(); } 29 | private: 30 | string msg; 31 | }; 32 | 33 | /* Tokens (for terminals). 34 | * 35 | * Avoid RTTI since Token's value will be frequently needed. 36 | * There are Four type of tokens: 37 | * 1. Keyword. `type` is its type (e.g. CREATE), `data`=NULL, 38 | * `real_size`=0. Use `Type getKeyword()` to get it. 39 | * 2. Operator. `type` is its type (e.g. CREATE), `data`=NULL, 40 | * `real_size`=0. Use `Type getKeyword()` to get it. 41 | * 3. Identifier. `type`is ID, `data` contains 42 | * the raw string, `real_size` is string length + 1(for '\0'). 43 | * use `string getId()` to get it. 44 | * 4. Number. `type` is NUM, `data` contains an int, 45 | * `real_size` is the size of the int. 46 | * use `int getNumber()` to get it. 47 | */ 48 | class Token { 49 | public: 50 | Token(Type _type=NONE, const void *raw=NULL, const int size=0); 51 | Token(const Token &other); 52 | 53 | // number or identifier 54 | bool isValue(Type t) const { // number or identifier 55 | return t == NUM || t == ID; 56 | } 57 | 58 | Type getType() const { 59 | return type; 60 | } 61 | 62 | // For identifiers. Need to guarded by getType() == ID 63 | string getId() const; 64 | // For keywords or operators. Need to guarded by !isValue(getType()) 65 | Type getKeyword() const; 66 | // For numbers. Need to guarded by getType() == NUM 67 | int getNumber() const; 68 | 69 | // Check type 70 | bool operator== (Type rhs) const { 71 | return type == rhs; 72 | } 73 | bool operator!= (Type rhs) const { 74 | return !(*this == rhs); 75 | } 76 | 77 | Token &operator=(const Token &rhs); 78 | friend ostream &operator<<(ostream &s, const Token &token); 79 | 80 | ~Token(); 81 | 82 | static map name; // uppercase name of types 83 | 84 | private: 85 | static void initNameMap(); // statically init names 86 | 87 | Type type; // type of the token 88 | char *data; // raw data 89 | int real_size; // size of data[] 90 | }; 91 | 92 | } 93 | 94 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "IO.h" 5 | 6 | using std::cin; 7 | using std::ifstream; 8 | using std::ofstream; 9 | 10 | using namespace ssql; 11 | 12 | int main(int argc, char const *argv[]) { 13 | Lexer *lexptr; 14 | ofstream out; 15 | ifstream in; // must be in the scope until stop scanning 16 | 17 | if (argc > 1) { 18 | in.open(argv[1], ifstream::in); 19 | if (in.is_open()) { 20 | lexptr = new Lexer(in); 21 | } else { 22 | cout << "Fail to open " << argv[1] << '\n'; 23 | exit(1); 24 | } 25 | } else { 26 | lexptr = new Lexer(cin); 27 | } 28 | 29 | Parser parser(*lexptr); 30 | Engine engine; 31 | IO io(parser, *lexptr, engine); 32 | 33 | // try-catch in case the first lexeme is invalid 34 | try { 35 | io.loop(); 36 | } catch (LexError e) { 37 | cout << "line " << lexptr->getLine() << ", "; 38 | cout << "column " << lexptr->getCol() << ": "; 39 | cout << e.what() << '\n'; 40 | } catch (ParseError e) { 41 | cout << "line " << parser.getLine() << ", "; 42 | cout << "column " << parser.getCol() << ": "; 43 | cout << e.what() << '\n'; 44 | } 45 | 46 | delete lexptr; 47 | return 0; 48 | } -------------------------------------------------------------------------------- /src/test_lexer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Token.h" 5 | #include "Lexer.h" 6 | 7 | using std::cin; 8 | using std::cout; 9 | using std::ifstream; 10 | using std::ofstream; 11 | 12 | using namespace ssql; 13 | 14 | int main(int argc, char const *argv[]) { 15 | Lexer *lexptr; 16 | ofstream out; 17 | ifstream in; // must be in the scope until stop scanning 18 | 19 | if (argc > 1) { 20 | in.open(argv[1], ifstream::in); 21 | if (in.is_open()) { 22 | lexptr = new Lexer(in); 23 | } else { 24 | cout << "Fail to open " << argv[1] << '\n'; 25 | exit(1); 26 | } 27 | } else { 28 | lexptr = new Lexer(cin); 29 | } 30 | 31 | Lexer &lexer = *lexptr; 32 | Token lookahead = lexer.next(); 33 | while (lookahead != END) { 34 | try { 35 | cout << lookahead << '\n'; 36 | lookahead = lexer.next(); 37 | } catch (LexError e) { 38 | cout << e.what() << '\n'; 39 | } 40 | } 41 | 42 | if (lookahead == END) { 43 | cout << lookahead << '\n'; 44 | } 45 | delete lexptr; 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /src/test_parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Token.h" 5 | #include "Lexer.h" 6 | #include "Parser.h" 7 | #include "Expr.h" 8 | 9 | using std::cin; 10 | using std::cout; 11 | using std::ifstream; 12 | using std::ofstream; 13 | using namespace ssql; 14 | 15 | int main(int argc, char const *argv[]) { 16 | Lexer *lexptr; 17 | ofstream out; 18 | ifstream in; // must be in the scope until stop scanning 19 | 20 | if (argc > 1) { 21 | in.open(argv[1], ifstream::in); 22 | if (in.is_open()) { 23 | lexptr = new Lexer(in); 24 | } else { 25 | cout << "Fail to open " << argv[1] << '\n'; 26 | exit(1); 27 | } 28 | } else { 29 | lexptr = new Lexer(cin); 30 | } 31 | 32 | Parser parser(*lexptr); 33 | while (!parser.isEnd()) { 34 | try { 35 | Type next = parser.next_stmt_type(); 36 | if (next == CREATE) { 37 | Create create = parser.create_stmt(); 38 | 39 | cout << "Until line: " << parser.getLine(); 40 | cout << ", col " << parser.getCol() << "\n"; 41 | 42 | cout << "create ID: " << create.getId() << '\n'; 43 | 44 | cout << "create defaults" << '\n'; 45 | const multimap &defs = create.getDefaults(); 46 | for (auto it = defs.begin(); it != defs.end(); ++it) { 47 | cout << it->first << ": " << it->second << "\n"; 48 | } 49 | 50 | cout << "create keys:\n"; 51 | const vector > &keys = create.getKeys(); 52 | for (auto key : keys) { 53 | for (auto col: key) { 54 | cout << col << "\n"; 55 | } 56 | } 57 | cout << "\n"; 58 | } else if (next == INSERT) { 59 | Insert insert = parser.insert_stmt(); 60 | 61 | cout << "Until line: " << parser.getLine(); 62 | cout << ", col " << parser.getCol() << "\n"; 63 | 64 | cout << "insert ID: " << insert.getId() << '\n'; 65 | 66 | cout << "insert values" << '\n'; 67 | const vector &columns = insert.getColumns(); 68 | const vector &values = insert.getValues(); 69 | for (int i = 0; i < columns.size(); ++i) { 70 | cout << columns[i] << ": " << values[i] << '\n'; 71 | } 72 | cout << "\n"; 73 | } else if (next == DELETE) { 74 | Delete del = parser.delete_stmt(); 75 | 76 | cout << "Until line: " << parser.getLine(); 77 | cout << ", col " << parser.getCol() << "\n"; 78 | 79 | cout << "delete ID: " << del.getId() << '\n'; 80 | cout << "delete clause" << '\n'; 81 | cout << del.getWhere(); 82 | cout << "\n\n"; 83 | } else if (next == SELECT) { 84 | Query query = parser.query_stmt(); 85 | 86 | cout << "Until line: " << parser.getLine(); 87 | cout << ", col " << parser.getCol() << "\n"; 88 | 89 | cout << "query ID: " << query.getId() << '\n'; 90 | 91 | cout << "query columns: "; 92 | const vector &columns = query.getColumns(); 93 | for (int i = 0; i < columns.size(); ++i) { 94 | cout << columns[i] << ' '; 95 | } 96 | 97 | cout << "\nquery clause" << '\n'; 98 | cout << query.getWhere(); 99 | cout << "\n\n"; 100 | } 101 | } catch (LexError e) { 102 | cout << lexptr->getLine() << ", " << lexptr->getCol() << ": "; 103 | cout << e.what() << '\n'; 104 | exit(1); 105 | } catch (ParseError e) { 106 | cout << parser.getLine() << ", " << parser.getCol() << ": "; 107 | cout << e.what() << '\n'; 108 | exit(1); 109 | } 110 | } 111 | 112 | delete lexptr; 113 | return 0; 114 | } 115 | -------------------------------------------------------------------------------- /test/all.good: -------------------------------------------------------------------------------- 1 | Created table Student 2 | line 8: Multiple definitions for age 3 | line 13: Multiple primary key definitions 4 | line 17: Undefined key height 5 | line 21: Table Student already exists 6 | Inserted 1 rows into table Student 7 | line 24: Duplicate column age 8 | line 25: Column height is not in the schema 9 | line 26: Numbers of columns and values do not match 10 | line 27: Record already exists 11 | line 28: Cannot find table Student5 12 | line 29: Key sid not found 13 | Inserted 1 rows into table Student 14 | ------------------------------ 15 | | age| sid| 16 | ------------------------------ 17 | | 18| 1111| 18 | ------------------------------ 19 | | 18| 20| 20 | ------------------------------ 21 | 2 matching rows in Student 22 | ------------------------------ 23 | | sid| age| 24 | ------------------------------ 25 | | 1111| 18| 26 | ------------------------------ 27 | | 20| 18| 28 | ------------------------------ 29 | 2 matching rows in Student 30 | --------------- 31 | | age| 32 | --------------- 33 | | 18| 34 | --------------- 35 | | 18| 36 | --------------- 37 | 2 matching rows in Student 38 | No matching rows in Student 39 | ------------------------------ 40 | | sid| age| 41 | ------------------------------ 42 | | 1111| 18| 43 | ------------------------------ 44 | | 20| 18| 45 | ------------------------------ 46 | 2 matching rows in Student 47 | line 37: Column height is not in the schema 48 | line 38, height not found in the scheme 49 | Inserted 1 rows into table Student 50 | ------------------------------ 51 | | age| sid| 52 | ------------------------------ 53 | | 25| 90| 54 | ------------------------------ 55 | 1 matching rows in Student 56 | ------------------------------ 57 | | sid| age| 58 | ------------------------------ 59 | | 20| 18| 60 | ------------------------------ 61 | | 90| 25| 62 | ------------------------------ 63 | 2 matching rows in Student 64 | Inserted 1 rows into table Student 65 | ------------------------------ 66 | | age| sid| 67 | ------------------------------ 68 | | 18| 1111| 69 | ------------------------------ 70 | | 18| 20| 71 | ------------------------------ 72 | | 25| 90| 73 | ------------------------------ 74 | | 93| 5| 75 | ------------------------------ 76 | 4 matching rows in Student 77 | Created table Student2 78 | Inserted 1 rows into table Student2 79 | ------------------------------ 80 | | age| sid| 81 | ------------------------------ 82 | | 93| 0| 83 | ------------------------------ 84 | 1 matching rows in Student2 85 | ------------------------------ 86 | | age| sid| 87 | ------------------------------ 88 | | 18| 1111| 89 | ------------------------------ 90 | | 18| 20| 91 | ------------------------------ 92 | | 25| 90| 93 | ------------------------------ 94 | | 93| 5| 95 | ------------------------------ 96 | 4 matching rows in Student 97 | Deleted 2 rows from table Student 98 | ------------------------------ 99 | | age| sid| 100 | ------------------------------ 101 | | 18| 1111| 102 | ------------------------------ 103 | | 18| 20| 104 | ------------------------------ 105 | 2 matching rows in Student 106 | Inserted 1 rows into table Student 107 | ------------------------------ 108 | | age| sid| 109 | ------------------------------ 110 | | 18| 1111| 111 | ------------------------------ 112 | | 18| 20| 113 | ------------------------------ 114 | | 17| 25| 115 | ------------------------------ 116 | 3 matching rows in Student 117 | Deleted 1 rows from table Student 118 | ------------------------------ 119 | | age| sid| 120 | ------------------------------ 121 | | 18| 1111| 122 | ------------------------------ 123 | | 18| 20| 124 | ------------------------------ 125 | 2 matching rows in Student 126 | Deleted 2 rows from table Student 127 | No matching rows in Student 128 | Inserted 1 rows into table Student 129 | ------------------------------ 130 | | age| sid| 131 | ------------------------------ 132 | | 10| 1| 133 | ------------------------------ 134 | 1 matching rows in Student 135 | line 67, height not found in the scheme 136 | ------------------------------ 137 | | age| sid| 138 | ------------------------------ 139 | | 10| 1| 140 | ------------------------------ 141 | 1 matching rows in Student 142 | line 70, Division by zero 143 | line 71, Division by zero 144 | Inserted 1 rows into table Student 145 | ------------------------------ 146 | | age| sid| 147 | ------------------------------ 148 | | 10| 1| 149 | ------------------------------ 150 | | 0| 77| 151 | ------------------------------ 152 | 2 matching rows in Student 153 | line 75, Division by zero 154 | line 77, column 7: Syntax error 155 | ------------------------------ 156 | | age| sid| 157 | ------------------------------ 158 | | 10| 1| 159 | ------------------------------ 160 | | 0| 77| 161 | ------------------------------ 162 | 2 matching rows in Student 163 | line 79, column 9: Invalid lexeme # 164 | Inserted 1 rows into table Student 165 | ------------------------------ 166 | | age| sid| 167 | ------------------------------ 168 | | 10| 1| 169 | ------------------------------ 170 | | 0| 77| 171 | ------------------------------ 172 | | 5| 55| 173 | ------------------------------ 174 | 3 matching rows in Student 175 | -------------------------------------------------------------------------------- /test/all.in: -------------------------------------------------------------------------------- 1 | CREATE TABLE Student(sid INT, 2 | age INT DEFAULT = 18, 3 | PRIMARY KEY (sid)); 4 | 5 | CREATE TABLE Student2(sid INT, 6 | age INT DEFAULT = 18, 7 | PRIMARY KEY (sid), 8 | age INT); 9 | 10 | CREATE TABLE Student3(sid INT, 11 | PRIMARY KEY (sid), 12 | age INT DEFAULT = 18, 13 | PRIMARY KEY (age)); 14 | 15 | CREATE TABLE Student4(sid INT, 16 | PRIMARY KEY (sid, height), 17 | age INT DEFAULT = 18); 18 | 19 | CREATE TABLE Student(sid INT, 20 | age INT DEFAULT = 18, 21 | PRIMARY KEY (sid)); 22 | 23 | INSERT INTO Student(sid, age) VALUES(1111, 18); 24 | INSERT INTO Student(sid, age, age) VALUES(1111, 18, 19); 25 | INSERT INTO Student(sid, height) VALUES(1111, 18); 26 | INSERT INTO Student(sid, age) VALUES(1111, 18, 19); 27 | INSERT INTO Student(sid, age) VALUES(1111, 18); 28 | INSERT INTO Student5(sid, age) VALUES(1111, 18); 29 | INSERT INTO Student(age) VALUES(20); 30 | INSERT INTO Student(sid) VALUES(20); 31 | 32 | SELECT * FROM Student; 33 | SELECT sid, age FROM Student; 34 | SELECT age FROM Student; 35 | SELECT sid, age FROM Student WHERE age < 18; 36 | SELECT sid, age FROM Student WHERE age < sid; 37 | SELECT sid, height FROM Student WHERE age < 18; 38 | SELECT sid, age FROM Student WHERE height < 180; 39 | 40 | INSERT INTO Student(sid, age) VALUES(90, 25); 41 | SELECT * FROM Student 42 | WHERE age + 7 > 19 + 6 && sid <> 6 / 3 - 2; 43 | SELECT sid, age FROM Student WHERE age + 1 / 2 >= 18 && sid <> 1111; 44 | INSERT INTO Student(sid, age) VALUES(6 * 3 / 4 + 1, 32 * 3 - 6 / 2); 45 | SELECT * FROM Student; 46 | 47 | CREATE TABLE Student2(sid INT DEFAULT = 6 * 3 / 4 + 1, 48 | age INT DEFAULT = 32 * 3 - 6 / 2, PRIMARY KEY (sid)); 49 | INSERT INTO Student2(sid) VALUES(0); 50 | SELECT * FROM Student2; 51 | 52 | SELECT * FROM Student; 53 | DELETE FROM Student 54 | WHERE age + 7 > 19 + 6 && sid <> 6 / 3 - 2; 55 | SELECT * FROM Student; 56 | 57 | INSERT INTO Student(age, sid) VALUES(17, 25); 58 | SELECT * FROM Student; 59 | DELETE FROM Student WHERE age < 18 && age > 14; 60 | SELECT * FROM Student; 61 | 62 | DELETE FROM Student; 63 | SELECT * FROM Student; 64 | 65 | INSERT INTO Student(sid, age) VALUES(1, 10); 66 | SELECT * FROM Student; 67 | DELETE FROM Student WHERE age < 18 && height > 180; 68 | SELECT * FROM Student; 69 | 70 | INSERT INTO Student(sid, age) VALUES(6 * 3 / 0, 32 * 3 - 6 / 2); 71 | SELECT age FROM Student WHERE age / 0 + 7 > sid; 72 | 73 | INSERT INTO Student(sid, age) VALUES(77, 0); 74 | SELECT * FROM Student; 75 | SELECT * FROM Student WHERE sid / age > sid; 76 | 77 | SELECT / FROM Student; 78 | SELECT * FROM Student; 79 | INSERT # INTO Student(sid, age) VALUES(55, 5); 80 | SELECT * FROM Student; -------------------------------------------------------------------------------- /test/lexer.good: -------------------------------------------------------------------------------- 1 | DELETE 2 | FROM 3 | (ID, Student) 4 | WHERE 5 | (ID, age) 6 | PLUS 7 | (NUM, 7) 8 | LT 9 | (NUM, 19) 10 | PLUS 11 | (NUM, 6) 12 | AND 13 | (ID, sid) 14 | NEQ 15 | (NUM, 6) 16 | DIV 17 | (NUM, 3) 18 | MUL 19 | (NUM, 5) 20 | MINUS 21 | (NUM, 2) 22 | SEMICOLON 23 | CREATE 24 | TABLE 25 | (ID, Student) 26 | L_PAREN 27 | (ID, sid) 28 | INT 29 | COMMA 30 | (ID, age) 31 | INT 32 | DEFAULT 33 | ASSIGN 34 | (NUM, 18) 35 | COMMA 36 | PRIMARY 37 | KEY 38 | L_PAREN 39 | (ID, sid) 40 | R_PAREN 41 | R_PAREN 42 | SEMICOLON 43 | SELECT 44 | (ID, sid) 45 | COMMA 46 | (ID, age) 47 | FROM 48 | (ID, Student) 49 | WHERE 50 | (ID, age) 51 | GEQ 52 | (NUM, 15) 53 | AND 54 | NOT 55 | (ID, age) 56 | LT 57 | (NUM, 18) 58 | SEMICOLON 59 | INSERT 60 | INTO 61 | (ID, Student) 62 | L_PAREN 63 | (ID, sid) 64 | COMMA 65 | (ID, age) 66 | R_PAREN 67 | VALUES 68 | L_PAREN 69 | (NUM, 1111) 70 | COMMA 71 | (NUM, 18) 72 | R_PAREN 73 | SEMICOLON 74 | DELETE 75 | FROM 76 | (ID, Student) 77 | WHERE 78 | (ID, age) 79 | LEQ 80 | (NUM, 18) 81 | AND 82 | (ID, age) 83 | GT 84 | (NUM, 14) 85 | SEMICOLON 86 | DELETE 87 | FROM 88 | (ID, Student) 89 | SEMICOLON 90 | SELECT 91 | MUL 92 | FROM 93 | (ID, Student) 94 | WHERE 95 | (ID, age) 96 | PLUS 97 | (NUM, 7) 98 | GT 99 | (NUM, 19) 100 | PLUS 101 | (NUM, 6) 102 | OR 103 | (ID, sid) 104 | EQ 105 | (NUM, 6) 106 | DIV 107 | (NUM, 3) 108 | MINUS 109 | (NUM, 2) 110 | SEMICOLON 111 | CREATE 112 | TABLE 113 | (ID, Student) 114 | L_PAREN 115 | (ID, sid) 116 | INT 117 | DEFAULT 118 | ASSIGN 119 | (NUM, 6) 120 | MUL 121 | (NUM, 3) 122 | DIV 123 | MINUS 124 | (NUM, 4) 125 | PLUS 126 | (NUM, 1) 127 | COMMA 128 | (ID, age) 129 | INT 130 | DEFAULT 131 | ASSIGN 132 | (NUM, 32) 133 | MUL 134 | (NUM, 3) 135 | MINUS 136 | (NUM, 6) 137 | DIV 138 | (NUM, 2) 139 | R_PAREN 140 | SEMICOLON 141 | INSERT 142 | INTO 143 | (ID, Student) 144 | L_PAREN 145 | (ID, sid) 146 | COMMA 147 | (ID, _age) 148 | R_PAREN 149 | VALUES 150 | L_PAREN 151 | (NUM, 6) 152 | MUL 153 | (NUM, 3) 154 | DIV 155 | (NUM, 4) 156 | PLUS 157 | (NUM, 1) 158 | COMMA 159 | (NUM, 2332) 160 | MUL 161 | (NUM, 3) 162 | MINUS 163 | (NUM, 6) 164 | DIV 165 | (NUM, 2) 166 | R_PAREN 167 | SEMICOLON 168 | END 169 | -------------------------------------------------------------------------------- /test/lexer.in: -------------------------------------------------------------------------------- 1 | DELETE FROM Student WHERE 2 | age + 7 < 19 + 6 3 | && 4 | sid <> 6 / 3 * 5 - 2; 5 | 6 | CREATE TABLE Student 7 | (sid INT, 8 | age INT DEFAULT = 18, 9 | PRIMARY KEY (sid)); 10 | 11 | SELECT sid, age FROM Student 12 | WHERE age >= 15 && ! age < 18; 13 | 14 | INSERT INTO Student(sid, age) 15 | VALUES(1111, 18); 16 | 17 | DELETE FROM Student WHERE 18 | age <= 18 && age > 14; 19 | 20 | DELETE FROM Student; 21 | 22 | SELECT * FROM Student WHERE 23 | age + 7 > 19 + 6 24 | || sid == 6 / 3 - 2; 25 | 26 | CREATE TABLE Student 27 | (sid INT DEFAULT = 6 * 3 / -4 + 1, 28 | age INT DEFAULT = 32 * 3 - 6 / 2); 29 | 30 | INSERT INTO Student(sid, _age) 31 | VALUES(6 * 3 / 4 + 1, 2332 * 3 - 6 / 2); 32 | -------------------------------------------------------------------------------- /test/parser.good: -------------------------------------------------------------------------------- 1 | Until line: 1, col 70 2 | insert ID: Student 3 | insert values 4 | sid: -3 5 | age: 93 6 | 7 | Until line: 2, col 91 8 | create ID: Student 9 | create defaults 10 | age: 93 11 | sid: 5 12 | create keys: 13 | 14 | Until line: 3, col 67 15 | delete ID: Student 16 | delete clause 17 | ((((ID, age) PLUS (NUM, 7)) GT ((NUM, 19) PLUS (NUM, 6))) AND ((ID, sid) NEQ ((((NUM, 6) DIV (NUM, 3)) MUL (NUM, 5)) MINUS (NUM, 2)))) 18 | 19 | Until line: 4, col 71 20 | create ID: Student 21 | create defaults 22 | age: 18 23 | sid: 0 24 | create keys: 25 | sid 26 | 27 | Until line: 5, col 47 28 | insert ID: Student 29 | insert values 30 | sid: 1111 31 | age: 18 32 | 33 | Until line: 6, col 47 34 | delete ID: Student 35 | delete clause 36 | (((ID, age) LT (NUM, 18)) AND ((ID, age) GT (NUM, 14))) 37 | 38 | Until line: 7, col 20 39 | delete ID: Student 40 | delete clause 41 | () 42 | 43 | Until line: 8, col 65 44 | query ID: Student 45 | query columns: * 46 | query clause 47 | ((((ID, age) PLUS (NUM, 7)) GT ((NUM, 19) PLUS (NUM, 6))) AND ((ID, sid) NEQ (((NUM, 6) DIV (NUM, 3)) MINUS (NUM, 2)))) 48 | 49 | Until line: 9, col 69 50 | query ID: Student 51 | query columns: sid age 52 | query clause 53 | (((( MINUS (ID, age)) GT ( PLUS (NUM, 15))) OR ((ID, age) LT (NUM, 18))) OR ((ID, sid) GT (NUM, 5))) 54 | 55 | Until line: 10, col 89 56 | create ID: Student 57 | create defaults 58 | age: 18 59 | sid: 0 60 | create keys: 61 | sid 62 | sid 63 | 64 | Until line: 11, col 96 65 | create ID: Student 66 | create defaults 67 | age: 18 68 | age: 7 69 | sid: 0 70 | create keys: 71 | sid 72 | 73 | -------------------------------------------------------------------------------- /test/parser.in: -------------------------------------------------------------------------------- 1 | INSERT INTO Student(sid, age) VALUES(-6 * 3 / 4 + 1, +32 * 3 - 6 / 2); 2 | CREATE TABLE Student(sid INT DEFAULT = 6 * -3 / -4 + 1, age INT DEFAULT = +32 * 3 - 6 / 2); 3 | DELETE FROM Student WHERE age + 7 > 19 + 6 && sid <> 6 / 3 * 5 - 2; 4 | CREATE TABLE Student(sid INT, age INT DEFAULT = 18, PRIMARY KEY (sid)); 5 | INSERT INTO Student(sid, age) VALUES(1111, 18); 6 | DELETE FROM Student WHERE age < 18 && age > 14; 7 | DELETE FROM Student; 8 | SELECT * FROM Student WHERE age + 7 > 19 + 6 && sid <> 6 / 3 - 2; 9 | SELECT sid, age FROM Student WHERE -age > +15 || age < 18 || sid > 5; 10 | CREATE TABLE Student(sid INT, age INT DEFAULT = 18, PRIMARY KEY (sid), PRIMARY KEY(sid)); 11 | CREATE TABLE Student(sid INT, age INT DEFAULT = 18, PRIMARY KEY (sid), age INT DEFAULT = 5 + 2); 12 | --------------------------------------------------------------------------------