├── src ├── encryption │ └── mod.rs ├── manager │ ├── mod.rs │ └── pool.rs ├── storage │ ├── format.rs │ ├── io.rs │ ├── mod.rs │ ├── page.rs │ ├── bytescoder.rs │ ├── index.rs │ └── diskinterface.rs ├── index │ ├── mod.rs │ ├── tree.rs │ └── btree.rs ├── connection │ ├── mod.rs │ ├── response.rs │ ├── message.rs │ └── request.rs ├── component │ ├── mod.rs │ ├── datatype.rs │ ├── field.rs │ ├── database.rs │ └── table.rs ├── sql │ ├── mod.rs │ ├── query.rs │ ├── symbol.rs │ ├── worker.rs │ ├── lexer.rs │ └── parser.rs └── main.rs ├── .rustfmt.toml ├── .gitignore ├── logo ├── logo.png ├── logo.psd └── logo_organization.psd ├── test_data └── 1.in ├── .env ├── cli.yml ├── Cargo.toml ├── LICENSE ├── .travis.yml ├── README.md └── client └── client.py /src/encryption/mod.rs: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /src/manager/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod pool; 2 | -------------------------------------------------------------------------------- /src/storage/format.rs: -------------------------------------------------------------------------------- 1 | // from rows to tsv or bin -------------------------------------------------------------------------------- /src/index/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod btree; 2 | pub mod tree; 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /data* 3 | **/*.rs.bk 4 | *.index 5 | -------------------------------------------------------------------------------- /src/storage/io.rs: -------------------------------------------------------------------------------- 1 | // fn write() {} 2 | 3 | // fn read() {} 4 | -------------------------------------------------------------------------------- /logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stellarsql/StellarSQL/HEAD/logo/logo.png -------------------------------------------------------------------------------- /logo/logo.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stellarsql/StellarSQL/HEAD/logo/logo.psd -------------------------------------------------------------------------------- /src/connection/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod message; 2 | pub mod request; 3 | pub mod response; 4 | -------------------------------------------------------------------------------- /test_data/1.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stellarsql/StellarSQL/HEAD/test_data/1.in -------------------------------------------------------------------------------- /src/component/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod database; 2 | pub mod datatype; 3 | pub mod field; 4 | pub mod table; 5 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | PORT = 23333 2 | FILE_BASE_PATH = data 3 | POOL_SIZE = 15 4 | ENABLE_TSV = true 5 | PAGE_SIZE = 4096 -------------------------------------------------------------------------------- /logo/logo_organization.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stellarsql/StellarSQL/HEAD/logo/logo_organization.psd -------------------------------------------------------------------------------- /src/sql/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod lexer; 2 | pub mod parser; 3 | pub mod query; 4 | pub mod symbol; 5 | pub mod worker; 6 | -------------------------------------------------------------------------------- /src/storage/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod bytescoder; 2 | pub mod diskinterface; 3 | pub mod file; 4 | pub mod index; 5 | pub mod io; 6 | pub mod page; 7 | -------------------------------------------------------------------------------- /src/connection/response.rs: -------------------------------------------------------------------------------- 1 | pub enum Response { 2 | OK { msg: String }, 3 | Error { msg: String }, 4 | } 5 | 6 | impl Response { 7 | pub fn serialize(&self) -> String { 8 | match *self { 9 | Response::OK { ref msg } => format!("{}\n", msg), 10 | Response::Error { ref msg } => format!("Error: {}\n", msg), 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cli.yml: -------------------------------------------------------------------------------- 1 | name: StellarSQL 2 | version: "0.0.1" 3 | author: "tigercosmos " 4 | about: A minimal DBMS implemented in Rust 5 | args: 6 | - port: 7 | help: port of the server. 8 | required: false 9 | takes_value: true 10 | - daemon: 11 | short: x 12 | long: daemon-mode 13 | help: run in daemon mode 14 | required: false -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stellar_sql" 3 | version = "0.1.0" 4 | authors = ["tigercosmos "] 5 | edition = '2018' 6 | 7 | [dependencies] 8 | clap = {version = "~2.32.0", features = ["yaml"]} 9 | dotenv_codegen = "0.11.0" 10 | tokio = "0.1" 11 | tokio-io = "0.1" 12 | futures = "0.1" 13 | bytes = "0.4" 14 | lazy_static = "1.1.0" 15 | serde = "1.0" 16 | serde_derive = "1.0" 17 | serde_json = "1.0" 18 | log = "0.4" 19 | env_logger = "0.6" 20 | byteorder = "1" 21 | uuid = {version = "0.7", features = ["serde", "v4"]} 22 | regex = "1.1.5" -------------------------------------------------------------------------------- /src/index/tree.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | 3 | pub type PagePtr = u32; 4 | pub type PageOffset = usize; 5 | pub type RowPtr = (PagePtr, PageOffset); 6 | 7 | pub trait Tree { 8 | fn new( 9 | pid: u32, 10 | node_type: NodeType, 11 | key_type: DataType, 12 | ptr_size: usize, 13 | key_size: usize, 14 | row_ptr_size: Option, 15 | ) -> Box; 16 | fn height(&self) -> u32; 17 | fn insert(&mut self, val: T); 18 | fn delete(&mut self, val: T); 19 | fn search(&self, val: T) -> RowPtr; 20 | } 21 | 22 | #[derive(PartialEq)] 23 | pub enum NodeType { 24 | Internal, 25 | Leaf, 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Liu, An-Chi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/component/datatype.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 2 | pub enum DataType { 3 | Char(u8), 4 | Double, 5 | Float, 6 | Int, 7 | Varchar(u8), 8 | Url, 9 | } 10 | 11 | impl DataType { 12 | pub fn get(s: &str, len: Option) -> Option { 13 | let length = len.unwrap_or(0); 14 | let d = match s { 15 | "char" => DataType::Char(length), 16 | "double" => DataType::Double, 17 | "float" => DataType::Float, 18 | "int" => DataType::Int, 19 | "varchar" => DataType::Varchar(length), 20 | "url" => DataType::Url, 21 | _ => return None, 22 | }; 23 | Some(d) 24 | } 25 | } 26 | 27 | #[cfg(test)] 28 | mod tests { 29 | use super::*; 30 | 31 | #[test] 32 | fn test_datatype() { 33 | assert_eq!(DataType::Double, DataType::get("double", None).unwrap()); 34 | assert_eq!(DataType::Char(8), DataType::get("char", Some(8)).unwrap()); 35 | assert_eq!(DataType::Url, DataType::get("url", None).unwrap()); 36 | assert!(DataType::get("date", None).is_none()); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | addons: 4 | apt: 5 | packages: 6 | - libssl-dev 7 | language: rust 8 | rust: 9 | - stable 10 | - nightly 11 | matrix: 12 | allow_failures: 13 | - rust: nightly 14 | 15 | before_script: 16 | - rustup component add rustfmt-preview 17 | 18 | cache: 19 | directories: 20 | - ./target 21 | - $HOME/.cargo/bin 22 | 23 | before_cache: | 24 | if [[ "$TRAVIS_RUST_VERSION" == stable ]]; then 25 | cargo install cargo-tarpaulin -f 26 | fi 27 | 28 | script: 29 | - cargo fmt --all -- --check 30 | - cargo build 31 | - cargo test 32 | - cargo rustdoc -- --document-private-items 33 | 34 | after_success: | 35 | if [[ "$TRAVIS_RUST_VERSION" == stable ]]; then 36 | cargo tarpaulin --ciserver travis-ci --coveralls $TRAVIS_JOB_ID 37 | 38 | cargo tarpaulin --out Xml 39 | bash <(curl -s https://codecov.io/bash) 40 | fi 41 | 42 | deploy: 43 | provider: pages 44 | skip_cleanup: true 45 | github_token: $GH_TOKEN 46 | local_dir: target/doc 47 | target_branch: gh-pages 48 | on: 49 | branch: master 50 | 51 | notifications: 52 | email: 53 | on_success: never 54 | on_failure: never -------------------------------------------------------------------------------- /src/component/field.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use uuid::Uuid; 3 | 4 | #[derive(Debug, Clone, Serialize, Deserialize)] 5 | pub struct Field { 6 | pub name: String, 7 | pub datatype: DataType, 8 | pub not_null: bool, 9 | pub default: Option, 10 | pub check: Checker, 11 | pub encrypt: bool, 12 | uuid: String, 13 | } 14 | 15 | #[derive(Debug, Clone, Serialize, Deserialize)] 16 | pub enum Checker { 17 | None, 18 | Some(Operator, String), 19 | } 20 | 21 | #[derive(Debug, Clone, Serialize, Deserialize)] 22 | pub enum Operator { 23 | LT, // < 24 | LE, // <= 25 | EQ, // = 26 | NE, // != 27 | GT, // > 28 | GE, // >= 29 | } 30 | 31 | impl Field { 32 | pub fn new(name: &str, datatype: DataType) -> Field { 33 | Field { 34 | name: name.to_string(), 35 | datatype, 36 | not_null: false, 37 | default: None, 38 | check: Checker::None, 39 | encrypt: false, 40 | uuid: Uuid::new_v4().to_string(), 41 | } 42 | } 43 | 44 | #[allow(dead_code)] 45 | pub fn new_all( 46 | name: &str, 47 | datatype: DataType, 48 | not_null: bool, 49 | default: Option, 50 | check: Checker, 51 | encrypt: bool, 52 | ) -> Field { 53 | Field { 54 | name: name.to_string(), 55 | datatype, 56 | not_null, 57 | default, 58 | check, 59 | encrypt, 60 | uuid: Uuid::new_v4().to_string(), 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/connection/message.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, BufRead}; 2 | use std::mem; 3 | 4 | use futures::{Poll, Stream}; 5 | 6 | use tokio_io::AsyncRead; 7 | 8 | /// Combinator created by the top-level `message` method which is a stream over 9 | /// the message of text on an I/O object. 10 | #[derive(Debug)] 11 | pub struct Message { 12 | io: A, 13 | message: String, 14 | } 15 | 16 | /// Creates a new stream from the I/O object given representing the message of 17 | /// input that are found on `A`. 18 | /// 19 | /// This method takes an asynchronous I/O object, `a`, and returns a `Stream` of 20 | /// message that the object contains. The returned stream will reach its end once 21 | /// `a` reaches EOF. 22 | pub fn new(a: A) -> Message 23 | where 24 | A: AsyncRead + BufRead, 25 | { 26 | Message { 27 | io: a, 28 | message: String::new(), 29 | } 30 | } 31 | 32 | impl Stream for Message 33 | where 34 | A: AsyncRead + BufRead, 35 | { 36 | type Item = String; 37 | type Error = io::Error; 38 | 39 | fn poll(&mut self) -> Poll, io::Error> { 40 | let n = match self.io.read_line(&mut self.message) { 41 | Ok(t) => t, 42 | Err(ref e) if e.kind() == ::std::io::ErrorKind::WouldBlock => return Ok(::futures::Async::NotReady), 43 | Err(e) => return Err(e.into()), 44 | }; 45 | if n == 0 && self.message.len() == 0 { 46 | return Ok(None.into()); 47 | } 48 | if self.message.ends_with("\n") { 49 | self.message.pop(); 50 | if self.message.ends_with("\r") { 51 | self.message.pop(); 52 | } 53 | } 54 | Ok(Some(mem::replace(&mut self.message, String::new())).into()) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/component/database.rs: -------------------------------------------------------------------------------- 1 | use crate::component::table::Table; 2 | use crate::storage::diskinterface::{DiskError, DiskInterface}; 3 | use std::collections::HashMap; 4 | use std::fmt; 5 | use uuid::Uuid; 6 | 7 | #[derive(Debug, Clone)] 8 | pub struct Database { 9 | pub name: String, 10 | pub tables: HashMap, 11 | 12 | /* storage */ 13 | pub is_dirty: bool, 14 | pub is_delete: bool, 15 | 16 | uuid: String, 17 | } 18 | 19 | #[derive(Debug)] 20 | pub enum DatabaseError { 21 | CausedByFile(DiskError), 22 | } 23 | 24 | impl fmt::Display for DatabaseError { 25 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 26 | match *self { 27 | DatabaseError::CausedByFile(ref e) => write!(f, "error caused by file: {}", e), 28 | } 29 | } 30 | } 31 | 32 | impl Database { 33 | pub fn new(name: &str) -> Database { 34 | Database { 35 | name: name.to_string(), 36 | tables: HashMap::new(), 37 | is_dirty: true, 38 | is_delete: false, 39 | uuid: Uuid::new_v4().to_string(), 40 | } 41 | } 42 | 43 | pub fn insert_new_table(&mut self, table: Table) { 44 | self.tables.insert(table.name.to_string(), table); 45 | } 46 | 47 | // load the metadate of the database and its tables 48 | pub fn load_db(username: &str, db_name: &str) -> Result { 49 | let mut db = Database::new(db_name); 50 | db.is_dirty = false; 51 | let metas = 52 | DiskInterface::load_tables_meta(username, db_name, None).map_err(|e| DatabaseError::CausedByFile(e))?; 53 | for meta in metas { 54 | let name = (&meta.name).to_string(); 55 | let mut table = Table::new(&name); 56 | table.format_meta(meta); 57 | db.tables.insert(name, table.into()); 58 | } 59 | Ok(db) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/sql/query.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | /// Data for `select` 4 | #[derive(Debug)] 5 | pub struct QueryData { 6 | pub fields: Vec, 7 | pub tables: Vec, 8 | pub joins: Vec, 9 | pub predicate: NodePtr, 10 | pub group_fields: Vec, 11 | pub aggregation_fn: Vec, 12 | pub sort_fields: Vec, 13 | pub sort_dir: SortDirection, 14 | pub is_distinct: bool, 15 | pub top: TopType, 16 | } 17 | 18 | impl QueryData { 19 | pub fn new() -> QueryData { 20 | QueryData { 21 | fields: vec![], 22 | tables: vec![], 23 | joins: vec![], 24 | predicate: None, 25 | group_fields: vec![], 26 | aggregation_fn: vec![], 27 | sort_fields: vec![], 28 | sort_dir: SortDirection::None, 29 | is_distinct: false, 30 | top: TopType::None, 31 | } 32 | } 33 | } 34 | 35 | #[derive(Debug, PartialEq)] 36 | #[allow(dead_code)] 37 | pub enum SortDirection { 38 | Asc, 39 | Desc, 40 | None, 41 | } 42 | 43 | #[derive(Debug, PartialEq)] 44 | pub enum TopType { 45 | Percent(f32), 46 | Number(u32), 47 | None, 48 | } 49 | 50 | #[derive(Debug)] 51 | pub struct Join { 52 | pub join_type: JoinType, 53 | pub table: String, 54 | pub condition: NodePtr, 55 | } 56 | 57 | impl Join { 58 | pub fn new(name: &str) -> Join { 59 | Join { 60 | join_type: JoinType::get(name).unwrap(), 61 | table: "".to_string(), 62 | condition: None, 63 | } 64 | } 65 | } 66 | 67 | #[derive(Debug, PartialEq)] 68 | pub enum JoinType { 69 | InnerJoin, 70 | FullOuterJoin, 71 | RightJoin, 72 | LeftJoin, 73 | } 74 | 75 | impl JoinType { 76 | fn get(name: &str) -> Option { 77 | let t = match name { 78 | "inner join" => JoinType::InnerJoin, 79 | "full outer join" => JoinType::FullOuterJoin, 80 | "left join" => JoinType::LeftJoin, 81 | "right join" => JoinType::RightJoin, 82 | _ => return None, 83 | }; 84 | Some(t) 85 | } 86 | } 87 | 88 | pub type NodePtr = Option>; 89 | 90 | #[derive(Default, Debug, Clone)] 91 | pub struct Node { 92 | pub root: String, 93 | pub set: HashSet, 94 | pub left: NodePtr, 95 | pub right: NodePtr, 96 | } 97 | 98 | impl Node { 99 | pub fn new(root: String) -> Node { 100 | Node { 101 | root: root, 102 | ..Default::default() 103 | } 104 | } 105 | 106 | pub fn left(mut self, leaf: Node) -> Self { 107 | self.left = Some(Box::new(leaf)); 108 | self 109 | } 110 | 111 | pub fn right(mut self, leaf: Node) -> Self { 112 | self.right = Some(Box::new(leaf)); 113 | self 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StellarSQL 2 | 3 | [![Build Status](https://travis-ci.org/stellarsql/StellarSQL.svg?branch=master)](https://travis-ci.org/stellarsql/StellarSQL) 4 | [![codecov](https://codecov.io/gh/stellarsql/StellarSQL/branch/master/graph/badge.svg)](https://codecov.io/gh/stellarsql/StellarSQL) 5 | 6 | (WIP) A minimal SQL DBMS written in Rust 7 | 8 | - The document is [here](https://stellarsql.github.io/StellarSQL/stellar_sql/). 9 | - There is a [slide](https://docs.google.com/presentation/d/1rSxFNpN5uzP1cF1olKTnyXgPdj_bcbLvSJhN5T5xn-U/edit?usp=sharing) introduce this project. 10 | - There is a series of articles introducing about this project: [Let's build a DBMS](https://tigercosmos.xyz/lets-build-dbms/) 11 | 12 | ![logo](https://raw.githubusercontent.com/stellarsql/StellarSQL/master/logo/logo.png) 13 | 14 | ## Setup 15 | 16 | Before you start, you need to have Rust(>=1.31) and Cargo. 17 | 18 | ```bash 19 | curl https://sh.rustup.rs -sSf | sh 20 | ``` 21 | 22 | Then we could get the source code. 23 | 24 | ```bash 25 | git clone https://github.com/tigercosmos/StellarSQL 26 | cd StellarSQL 27 | ``` 28 | 29 | ## Run 30 | 31 | ### Server 32 | 33 | Open the first window and run server: 34 | 35 | ```bash 36 | cargo run [port] 37 | ``` 38 | 39 | the default port is `23333`, and you can either modify `.env` or run by argument `[port]`. 40 | 41 | ### Client 42 | 43 | Open the another window and run the client by `python client/client.py` and connect to the server: 44 | 45 | command: 46 | 47 | ```sql 48 | > create user # key is our feature, put any number for now 49 | > set user # second time log in 50 | > create database # first time create database 51 | > use # second time adopt the database 52 | > # now support simple sql 53 | ``` 54 | 55 | SQL query are not implement very well. A few simple command support for now: 56 | 57 | - create database 58 | - create table 59 | - type: int, float, double, varchar, char, url 60 | - insert into 61 | - select {fields} from {table} where {predicate} 62 | - not yet support join, only a table 63 | - predicate without NULL 64 | 65 | The default `host` and `port` are `127.0.0.1` and `23333` 66 | 67 | ```shell 68 | $ python client/client.py [host] [port] 69 | 70 | Connect to 127.0.0.1:23333 71 | == Welcome to StellarSQL Client! == 72 | StellarSQL> create user Tiger 123 73 | Login OK! 74 | 75 | StellarSQL> create database DB1 76 | Query OK! 77 | 78 | StellarSQL> create table t1 (a1 int, b1 int, c1 float); 79 | Query OK! 80 | 81 | StellarSQL> insert into t1 (a1, b1, c1) values (1, 2, 1.2), (2, 3, 4.5), (4, 1, 0.3); 82 | Query OK! 83 | 84 | StellarSQL> select a1, b1, c1 from t1 where a1 > 1; 85 | {"fields":["a1","b1","c1"],"rows":[["2","3","4.5"],["4","1","0.3"]]} 86 | 87 | StellarSQL> select a1, b1, c1 from t1 where a1 > 1 and c1 > 2; 88 | {"fields":["a1","b1","c1"],"rows":[["2","3","4.5"]]} 89 | 90 | StellarSQL> select a1, b1, c1 from t1 where not (not a1 < 2 and not (not b1 = 3 or c1 > 1.1)); 91 | {"fields":["a1","b1","c1"],"rows":[["1","2","1.2"],["2","3","4.5"],["4","1","0.3"]]} 92 | ``` 93 | 94 | ## Build 95 | 96 | ```bash 97 | cargo build 98 | ``` 99 | 100 | ## Test 101 | 102 | ## Run all tests 103 | 104 | ```bash 105 | cargo test 106 | ``` 107 | 108 | ## Debug a test 109 | 110 | Add the line at the beginning of the test function. 111 | 112 | ```rust 113 | // init the logger for the test 114 | env_logger::init(); 115 | ``` 116 | 117 | Then run the command to see the debug information: 118 | 119 | ```sh 120 | RUST_LOG=debug cargo test -- --nocapture {test_name} 121 | ``` 122 | 123 | ## Pull Request 124 | 125 | Install [rustfmt](https://github.com/rust-lang/rustfmt), and make sure you could pass: 126 | 127 | ```sh 128 | cargo fmt --all -- --check 129 | cargo build 130 | cargo test 131 | ``` 132 | 133 | ## Document 134 | 135 | Build and open the document at localhost 136 | 137 | ```sh 138 | cargo rustdoc --open -- --document-private-items 139 | ``` 140 | 141 | ## License 142 | 143 | MIT 144 | -------------------------------------------------------------------------------- /client/client.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import sys 3 | import rlcompleter 4 | import readline 5 | 6 | 7 | def run(host, port): 8 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 9 | 10 | try: 11 | s.connect((host, port)) 12 | print('Connect to %s:%d' % (host, port)) 13 | except: 14 | print('Unable to connect %s:%d' % (host, port)) 15 | exit(1) 16 | 17 | print('== Welcome to StellarSQL Client! ==') 18 | 19 | client = Client() 20 | 21 | while client.check_live(): 22 | input = raw_input('StellarSQL> ') 23 | message = client.parse(input) 24 | if message is not None: 25 | s.send(message) 26 | else: 27 | continue 28 | data = s.recv(512) 29 | print(data) 30 | 31 | s.close() 32 | 33 | 34 | class Client(): 35 | """ 36 | username||database||query||key 37 | """ 38 | 39 | def __init__(self): 40 | self._user = "" 41 | self._database = "" 42 | self._query = "" 43 | self._key = "" 44 | self._is_live = True 45 | 46 | def _set_user(self, name): 47 | self._user = name 48 | print("user: %s" % self._user) 49 | 50 | def _use_database(self, name): 51 | self._database = name 52 | print("database: %s" % self._database) 53 | 54 | def _create_user(self, name, key): 55 | self._user = name 56 | self._key = key 57 | return ('{0}||||||{1}\n').format(self._user, self._key) 58 | 59 | def _create_database(self, db_name): 60 | self._database = db_name 61 | return ('{0}||||create database {1};\n').format(self._user, self._database) 62 | 63 | def _send_query(self, query): 64 | if self._user == "": 65 | print('Please set or create user!') 66 | return None 67 | if self._database == "": 68 | print('Please use or create database!') 69 | return None 70 | self._query = query 71 | return ('{0}||{1}||{2};\n').format(self._user, self._database, self._query) 72 | 73 | def check_live(self): 74 | return self._is_live 75 | 76 | def parse(self, input): 77 | tokens = input.split() 78 | try: 79 | # create user 80 | # create database 81 | if tokens[0] == 'create': 82 | if tokens[1] == 'user': 83 | return self._create_user(tokens[2], tokens[3]) 84 | elif tokens[1] == 'database': 85 | return self._create_database(tokens[2]) 86 | else: 87 | return self._send_query(input) 88 | 89 | # set user 90 | elif tokens[0] == 'set' and tokens[1] == 'user': 91 | user = tokens[2] 92 | self._set_user(user) 93 | 94 | # use database 95 | elif tokens[0] == 'use': 96 | db = tokens[1] 97 | return self._use_database(db) 98 | 99 | # quit 100 | elif tokens[0] == 'q' or tokens[0] == 'exit': 101 | self._is_live = False 102 | 103 | elif tokens[0] == 'h' or tokens[0] == 'help': 104 | print('create user ') 105 | print('set ') 106 | print('create database ') 107 | print('use ') 108 | print(' (ex: select a1 from t1)') 109 | 110 | # use database 111 | else: 112 | return self._send_query(input) 113 | 114 | except: 115 | print('Syntax Error! Enter `h` to see commands.') 116 | return None 117 | 118 | return None 119 | 120 | 121 | if __name__ == '__main__': 122 | host = '127.0.0.1' 123 | port = 23333 124 | 125 | readline.parse_and_bind("tab: complete") 126 | 127 | if len(sys.argv) == 3: 128 | host = sys.argv[1] 129 | port = sys.argv[2] 130 | elif len(sys.argv) == 1: 131 | pass 132 | else: 133 | print('run: client.py [host] [port]') 134 | exit(1) 135 | 136 | run(host, port) 137 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! # StellarSQL 2 | //! A minimal SQL DBMS written in Rust 3 | //! 4 | #[macro_use] 5 | extern crate clap; 6 | #[macro_use] 7 | extern crate dotenv_codegen; 8 | #[macro_use] 9 | extern crate lazy_static; 10 | #[macro_use] 11 | extern crate serde_derive; 12 | #[macro_use] 13 | extern crate log; 14 | 15 | mod component; 16 | mod connection; 17 | mod index; 18 | mod manager; 19 | mod sql; 20 | mod storage; 21 | 22 | use clap::App; 23 | use std::io::BufReader; 24 | use tokio::io::write_all; 25 | 26 | use crate::connection::message; 27 | use crate::connection::request::Request; 28 | use crate::connection::response::Response; 29 | use crate::manager::pool::Pool; 30 | use env_logger; 31 | use tokio::net::{TcpListener, TcpStream}; 32 | use tokio::prelude::*; 33 | 34 | use crate::storage::diskinterface::DiskInterface; 35 | use std::path::Path; 36 | use std::sync::{Arc, Mutex}; 37 | 38 | /// The entry of the program 39 | /// 40 | /// Use `Tokio` to handle each TCP connection and spawn a thread to handle the request. 41 | fn main() { 42 | info!("Hello, StellarSQL!"); 43 | 44 | // start logger 45 | env_logger::init(); 46 | 47 | env_init(); 48 | 49 | // Parse arguments 50 | let yml = load_yaml!("../cli.yml"); 51 | let m = App::from_yaml(yml).get_matches(); 52 | 53 | let port = if let Some(port_) = m.value_of("port") { 54 | port_ 55 | } else { 56 | dotenv!("PORT") 57 | }; 58 | 59 | let addr = format!("127.0.0.1:{}", port).parse().unwrap(); 60 | 61 | lazy_static! { 62 | static ref mutex: Arc> = Arc::new(Mutex::new(Pool::new(dotenv!("POOL_SIZE").parse().unwrap()))); 63 | } 64 | // Bind a TCP listener to the socket address. 65 | // Note that this is the Tokio TcpListener, which is fully async. 66 | let listener = TcpListener::bind(&addr).unwrap(); 67 | 68 | // The server task asynchronously iterates over and processes each 69 | // incoming connection. 70 | let server = listener 71 | .incoming() 72 | .for_each(move |socket| { 73 | let addr = socket.peer_addr().unwrap(); 74 | info!("New Connection: {}", addr); 75 | 76 | // Spawn a task to process the connection 77 | process(socket, &mutex, addr); 78 | 79 | Ok(()) 80 | }) 81 | .map_err(|err| { 82 | error!("accept error = {:?}", err); 83 | }); 84 | 85 | info!("StellarSQL running on {} port", port); 86 | tokio::run(server); 87 | } 88 | 89 | /// initialize the environment 90 | /// 91 | /// Note that any error are not allowed in this step, so panic directly. 92 | fn env_init() { 93 | // check `../.env`: FILE_BASE_PATH, create usernames.json 94 | let path = dotenv!("FILE_BASE_PATH"); 95 | if !Path::new(path).exists() { 96 | match DiskInterface::create_file_base(Some(path)) { 97 | Ok(_) => {} 98 | Err(e) => panic!(e), 99 | } 100 | } 101 | } 102 | 103 | /// Process the TCP socket connection 104 | /// 105 | /// The request message pass to [`Response`](connection/request/index.html) and get [`Response`](connection/response/index.html) 106 | fn process(socket: TcpStream, mutex: &'static Arc>, addr: std::net::SocketAddr) { 107 | let (reader, writer) = socket.split(); 108 | 109 | let messages = message::new(BufReader::new(reader)); 110 | 111 | let mut requests = Request::new(addr.to_string()); 112 | 113 | // note the `move` keyword on the closure here which moves ownership 114 | // of the reference into the closure, which we'll need for spawning the 115 | // client below. 116 | // 117 | // The `map` function here means that we'll run some code for all 118 | // requests (lines) we receive from the client. The actual handling here 119 | // is pretty simple, first we parse the request and if it's valid we 120 | // generate a response. 121 | let responses = messages.map(move |message| match Request::parse(&message, &mutex, &mut requests) { 122 | Ok(req) => req, 123 | Err(e) => return Response::Error { msg: format!("{}", e) }, 124 | }); 125 | 126 | // At this point `responses` is a stream of `Response` types which we 127 | // now want to write back out to the client. To do that we use 128 | // `Stream::fold` to perform a loop here, serializing each response and 129 | // then writing it out to the client. 130 | let writes = responses.fold(writer, |writer, response| { 131 | let response = response.serialize().into_bytes(); 132 | write_all(writer, response).map(|(w, _)| w) 133 | }); 134 | 135 | // `spawn` this client to ensure it 136 | // runs concurrently with all other clients, for now ignoring any errors 137 | // that we see. 138 | let connection = writes.then(move |_| { 139 | // write back 140 | let mut pool = mutex.lock().unwrap(); 141 | 142 | // TODO: retry if failed once 143 | match pool.write_back(addr.to_string()) { 144 | Ok(_) => {} 145 | // if failed to write back to client, just log error. 146 | Err(e) => error!("{}", e), 147 | } 148 | Ok(()) 149 | }); 150 | 151 | // Spawn the task. Internally, this submits the task to a thread pool. 152 | tokio::spawn(connection); 153 | } 154 | -------------------------------------------------------------------------------- /src/connection/request.rs: -------------------------------------------------------------------------------- 1 | use crate::manager::pool::{Pool, PoolError}; 2 | use crate::sql::parser::{Parser, ParserError}; 3 | use crate::storage::diskinterface::{DiskError, DiskInterface}; 4 | use crate::Response; 5 | use std::fmt; 6 | 7 | use std::sync::{Arc, Mutex}; 8 | 9 | #[derive(Debug)] 10 | pub struct Request { 11 | pub username: String, 12 | pub addr: String, 13 | pub key: i32, 14 | } 15 | 16 | #[derive(Debug)] 17 | pub enum RequestError { 18 | PoolError(PoolError), 19 | CauseByParser(ParserError), 20 | DiskError(DiskError), 21 | UserNotExist(String), 22 | CreateDBBeforeCmd, 23 | BadRequest, 24 | InvalidKey, 25 | } 26 | 27 | impl fmt::Display for RequestError { 28 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 29 | match *self { 30 | RequestError::PoolError(ref e) => write!(f, "error caused by pool: {}", e), 31 | RequestError::CauseByParser(ref e) => write!(f, "error caused by parser: {}", e), 32 | RequestError::DiskError(ref e) => write!(f, "error caused by file: {}", e), 33 | RequestError::UserNotExist(ref s) => write!(f, "user: {} not found", s), 34 | RequestError::CreateDBBeforeCmd => write!(f, "please create a database before any other commands"), 35 | RequestError::BadRequest => write!(f, "BadRequest, invalid request format"), 36 | RequestError::InvalidKey => write!(f, "invalid key format"), 37 | } 38 | } 39 | } 40 | 41 | impl Request { 42 | pub fn new(new_addr: String) -> Request { 43 | Request { 44 | username: "".to_string(), 45 | addr: new_addr, 46 | key: 0, 47 | } 48 | } 49 | pub fn parse(input: &str, mutex: &Arc>, req: &mut Request) -> Result { 50 | /* 51 | * request format 52 | * case0: init (must be first request in each connection) 53 | * username||||||key 54 | * case1: normal 55 | * username||dbname||command; 56 | * case2: user without a database 57 | * username||||create database dbname; 58 | * 59 | */ 60 | let split_str: Vec<&str> = input.split("||").collect(); 61 | 62 | // first connection 63 | if req.username == "" { 64 | if split_str.len() != 4 || split_str[1] != "" || split_str[2] != "" { 65 | return Err(RequestError::BadRequest); 66 | } 67 | let username = split_str[0]; 68 | 69 | // type check, save key 70 | let key: i32 = match split_str[3].parse() { 71 | Ok(_key) => _key, 72 | Err(_) => return Err(RequestError::InvalidKey), 73 | }; 74 | req.key = key; 75 | 76 | // initialize username 77 | match Request::user_verify(username) { 78 | Ok(()) => req.username = username.to_string(), 79 | Err(ret) => return Err(ret), 80 | } 81 | return Ok(Response::OK { 82 | msg: "Login OK!".to_string(), 83 | }); 84 | } 85 | 86 | if split_str.len() != 3 { 87 | return Err(RequestError::BadRequest); 88 | } 89 | 90 | let username = split_str[0]; 91 | let dbname = split_str[1]; 92 | let cmd = format!("{};", split_str[2]); 93 | 94 | // load sql object from memory pool 95 | let mut pool = mutex.lock().unwrap(); 96 | let mut sql = match pool.get(username, dbname, req.addr.clone()) { 97 | Ok(tsql) => tsql, 98 | Err(ret) => return Err(RequestError::PoolError(ret)), 99 | }; 100 | // initialize public key 101 | if sql.user.key == 0 { 102 | sql.user.key = req.key; 103 | } 104 | // check dbname 105 | if dbname != "" { 106 | let parser = match Parser::new(&cmd) { 107 | Ok(_parser) => _parser, 108 | Err(ret) => return Err(RequestError::CauseByParser(ret)), 109 | }; 110 | match parser.parse(&mut sql) { 111 | Err(ret) => return Err(RequestError::CauseByParser(ret)), 112 | Ok(_) => {} 113 | } 114 | } else { 115 | // check cmd if it is "create database dbname;" 116 | let mut iter = cmd.split_whitespace(); 117 | if iter.next() != Some("create") || iter.next() != Some("database") { 118 | return Err(RequestError::CreateDBBeforeCmd); 119 | } 120 | let parser = match Parser::new(&cmd) { 121 | Ok(_parser) => _parser, 122 | Err(ret) => return Err(RequestError::CauseByParser(ret)), 123 | }; 124 | match parser.parse(&mut sql) { 125 | Err(ret) => return Err(RequestError::CauseByParser(ret)), 126 | Ok(_) => {} 127 | } 128 | } 129 | if !sql.result_json.is_empty() { 130 | let return_json = sql.result_json.clone(); 131 | sql.result_json.clear(); 132 | return Ok(Response::OK { 133 | msg: return_json.to_string(), 134 | }); 135 | } 136 | Ok(Response::OK { 137 | msg: "Query OK!".to_string(), 138 | }) 139 | //Ok(Response::OK { msg: format!("{}, user:{}",input, sql.username) }) 140 | } 141 | fn user_verify(name: &str) -> Result<(), RequestError> { 142 | // auto create new users for now 143 | if name == "" { 144 | return Err(RequestError::UserNotExist(name.to_string())); 145 | } else { 146 | let users = match DiskInterface::get_usernames(Some(dotenv!("FILE_BASE_PATH"))) { 147 | Ok(us) => us, 148 | Err(ret) => return Err(RequestError::DiskError(ret)), 149 | }; 150 | if !users.contains(&name.to_string()) { 151 | match DiskInterface::create_username(name, Some(dotenv!("FILE_BASE_PATH"))) { 152 | Ok(_) => {} 153 | Err(ret) => return Err(RequestError::DiskError(ret)), 154 | } 155 | } 156 | } 157 | Ok(()) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/index/btree.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::index::tree::{NodeType, PagePtr, RowPtr, Tree}; 3 | use crate::storage::page::{INDEX_INTERNAL_HEADER_SIZE, INDEX_LEAF_HEADER_SIZE}; 4 | use std::cmp::PartialOrd; 5 | 6 | /// B+ Tree 7 | /// 8 | /// Internal Node 9 | /// ``` 10 | /// +---------------------------+ 11 | /// |P0|K1||P1|K1|| ... ||Pn|Kn| 12 | /// +---------------------------+ 13 | /// K: Key Value 14 | /// P(n-1): Pointer to the page with Kn-1 <= Value < Kn 15 | /// ``` 16 | /// 17 | /// Leaf Node 18 | /// ``` 19 | /// +-----------------------------------+ 20 | /// |P0||R0|K0||R1|K1|| ... ||Rn|Kn||P1| 21 | /// +-----------------------------------+ 22 | /// K: Key Value 23 | /// R: Record pointer 24 | /// P0: Previous page pointer 25 | /// P1: Next page pointer 26 | /// ``` 27 | pub struct BPlusTree { 28 | pid: u32, 29 | node_type: NodeType, 30 | key_type: DataType, 31 | capacity: usize, 32 | ptr_size: usize, 33 | key_size: usize, 34 | row_ptr_size: Option, 35 | ptrs: Vec, 36 | keys: Vec, 37 | rows: Option>, 38 | nodes: Vec>, 39 | height: u32, 40 | } 41 | 42 | impl Tree for BPlusTree { 43 | fn new( 44 | pid: u32, 45 | node_type: NodeType, 46 | key_type: DataType, 47 | ptr_size: usize, 48 | key_size: usize, 49 | row_ptr_size: Option, 50 | ) -> Box { 51 | match node_type { 52 | NodeType::Internal => { 53 | let capacity = get_internal_capacity(ptr_size, key_size); 54 | let ptrs: Vec = Vec::with_capacity(capacity + 1); 55 | let keys: Vec = Vec::with_capacity(capacity); 56 | let rows = None; 57 | let nodes: Vec> = Vec::with_capacity(capacity); 58 | Box::new(Self { 59 | pid, 60 | node_type, 61 | key_type, 62 | capacity, 63 | ptr_size, 64 | key_size, 65 | row_ptr_size, 66 | ptrs, 67 | keys, 68 | rows, 69 | nodes, 70 | height: 1, 71 | }) 72 | } 73 | NodeType::Leaf => { 74 | let capacity = get_leaf_capacity(ptr_size, key_size, row_ptr_size.unwrap()); 75 | let ptrs: Vec = Vec::with_capacity(2); 76 | let keys: Vec = Vec::with_capacity(capacity); 77 | let rows: Option> = Some(Vec::with_capacity(capacity)); 78 | let nodes: Vec> = Vec::with_capacity(2); 79 | Box::new(Self { 80 | pid, 81 | node_type, 82 | key_type, 83 | capacity, 84 | ptr_size, 85 | key_size, 86 | row_ptr_size, 87 | ptrs, 88 | keys, 89 | rows, 90 | nodes, 91 | height: 1, 92 | }) 93 | } 94 | } 95 | } 96 | fn height(&self) -> u32 { 97 | self.height 98 | } 99 | fn insert(&mut self, val: T) {} 100 | fn delete(&mut self, val: T) {} 101 | fn search(&self, val: T) -> RowPtr { 102 | (0, 0) 103 | } 104 | } 105 | 106 | impl BPlusTree { 107 | fn node_type(&self) -> &NodeType { 108 | &self.node_type 109 | } 110 | 111 | /// find_ptr: upper-bounded binary searching the key to find the page 112 | /// 113 | /// Internal Node: 114 | /// ``` 115 | /// +-------------+ 116 | /// |P0| Key=2 |P1| 117 | /// +-------------+ 118 | /// ``` 119 | /// - Find Key=1 -> P0 120 | /// - Find Key=2 -> P1 121 | /// - Find Key=3 -> P1 122 | fn find_ptr(arr: &Vec, left: usize, right: usize, val: T) -> usize { 123 | let mut l = left as i32; 124 | let mut r = right as i32; 125 | let mut pos = 0; 126 | while l < r { 127 | let m = l + (r - l) / 2; 128 | if arr[m as usize] > val { 129 | r = m; 130 | pos = r; 131 | } else { 132 | l = m + 1; 133 | pos = l; 134 | } 135 | } 136 | pos as usize 137 | } 138 | } 139 | 140 | fn get_internal_capacity(ptr_size: usize, key_size: usize) -> usize { 141 | let page_size = match dotenv!("PAGE_SIZE").parse::() { 142 | Ok(s) => s, 143 | Err(_) => 4096, 144 | }; 145 | // page_size - header_size > n(key_size) + (n+1)(ptr_size) 146 | (page_size - INDEX_INTERNAL_HEADER_SIZE - ptr_size) / (ptr_size + key_size) 147 | } 148 | 149 | fn get_leaf_capacity(ptr_size: usize, key_size: usize, row_ptr_size: usize) -> usize { 150 | let page_size = match dotenv!("PAGE_SIZE").parse::() { 151 | Ok(s) => s, 152 | Err(_) => 4096, 153 | }; 154 | // page_size - header_size > n(key_size + row_ptr_size) + 2(ptr_size) 155 | (page_size - INDEX_LEAF_HEADER_SIZE - 2 * ptr_size) / (row_ptr_size + key_size) 156 | } 157 | 158 | #[cfg(test)] 159 | mod tests { 160 | use super::*; 161 | 162 | #[test] 163 | fn test_new_b_plus_tree() { 164 | let _internal_tree: Box> = BPlusTree::new(0, NodeType::Internal, DataType::Int, 4, 8, None); 165 | let _leaf_tree: Box> = BPlusTree::new(0, NodeType::Leaf, DataType::Char(10), 4, 128, Some(8)); 166 | } 167 | 168 | #[test] 169 | fn test_find_ptr() { 170 | let arr = vec![vec![10], vec![10, 20], vec![10, 20, 30]]; 171 | for i in 0..arr.len() { 172 | let pos = BPlusTree::find_ptr(&arr[i], 0, arr[i].len(), 11); 173 | assert_eq!(pos, 1); // greater 174 | let pos = BPlusTree::find_ptr(&arr[i], 0, arr[i].len(), 10); 175 | assert_eq!(pos, 1); // equal 176 | let pos = BPlusTree::find_ptr(&arr[i], 0, arr[i].len(), 1); 177 | assert_eq!(pos, 0); // smaller 178 | } 179 | let arr = vec![1, 3, 5, 7, 9]; 180 | let pos = BPlusTree::find_ptr(&arr, 0, arr.len(), 11); 181 | assert_eq!(pos, 5); // right boundary 182 | let pos = BPlusTree::find_ptr(&arr, 0, arr.len(), 9); 183 | assert_eq!(pos, 5); 184 | let pos = BPlusTree::find_ptr(&arr, 0, arr.len(), 8); 185 | assert_eq!(pos, 4); 186 | let pos = BPlusTree::find_ptr(&arr, 0, arr.len(), 1); 187 | assert_eq!(pos, 1); 188 | let pos = BPlusTree::find_ptr(&arr, 0, arr.len(), 0); 189 | assert_eq!(pos, 0); // left boundary 190 | } 191 | 192 | } 193 | -------------------------------------------------------------------------------- /src/manager/pool.rs: -------------------------------------------------------------------------------- 1 | use crate::component::table::Row; 2 | use crate::sql::worker::{SQLError, SQL}; 3 | use crate::storage::diskinterface::{DiskError, DiskInterface}; 4 | use std::fmt; 5 | 6 | use std::collections::{BTreeMap, VecDeque}; 7 | 8 | /* 9 | * freelist: [recent use ..... least recent use] 10 | */ 11 | #[derive(Debug)] 12 | pub struct Pool { 13 | pub max_entry: usize, 14 | pub freelist: VecDeque, 15 | pub cache: BTreeMap, 16 | } 17 | 18 | #[derive(Debug)] 19 | pub enum PoolError { 20 | SQLError(SQLError), 21 | EntryNotExist, 22 | DiskError(DiskError), 23 | } 24 | 25 | impl fmt::Display for PoolError { 26 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 27 | match *self { 28 | PoolError::SQLError(ref e) => write!(f, "error cause by worker: {}", e), 29 | PoolError::EntryNotExist => write!(f, "entry is not existed"), 30 | PoolError::DiskError(ref e) => write!(f, "error cause by file: {}", e), 31 | } 32 | } 33 | } 34 | 35 | impl Pool { 36 | pub fn new(entry_number: usize) -> Pool { 37 | Pool { 38 | max_entry: entry_number, 39 | freelist: VecDeque::new(), 40 | cache: BTreeMap::new(), 41 | } 42 | } 43 | pub fn get(&mut self, username: &str, dbname: &str, addr: String) -> Result<&mut SQL, PoolError> { 44 | // get username entry from cache 45 | 46 | // if entry is not existed, load from disk to cache 47 | if !self.cache.contains_key(&addr) { 48 | let mut sql = SQL::new(username).unwrap(); 49 | if dbname != "" { 50 | match sql.load_database(dbname) { 51 | Ok(_) => {} 52 | Err(ret) => return Err(PoolError::SQLError(ret)), 53 | } 54 | } 55 | match self.insert(sql, addr.clone()) { 56 | Ok(_) => {} 57 | Err(ret) => return Err(ret), 58 | } 59 | } 60 | // if username entry is not in the front(most recent use), move it to the front 61 | if self.freelist[0] != addr { 62 | self.pop_from_freelist(&addr); 63 | let key = addr.clone(); 64 | self.freelist.push_front(key); 65 | } 66 | Ok(self.cache.get_mut(&addr).unwrap()) 67 | } 68 | pub fn insert(&mut self, sql: SQL, addr: String) -> Result<(), PoolError> { 69 | // if current size >= cache max size, pop and write back thr Least Recent Use(LRU) entry 70 | if self.cache.len() >= self.max_entry { 71 | let pop_addr = self.freelist.pop_back().unwrap(); 72 | match self.write_back(pop_addr) { 73 | Ok(_) => {} 74 | Err(ret) => return Err(ret), 75 | } 76 | } 77 | let key = addr.clone(); 78 | self.cache.insert(addr, sql); 79 | self.freelist.push_front(key); 80 | Ok(()) 81 | } 82 | pub fn write_back(&mut self, addr: String) -> Result<(), PoolError> { 83 | // pop username entry, write this entry back to disk 84 | 85 | self.pop_from_freelist(&addr); 86 | 87 | let sql = match self.cache.get(&addr) { 88 | Some(tsql) => tsql, 89 | None => return Err(PoolError::EntryNotExist), 90 | }; 91 | match Pool::hierarchic_check(sql) { 92 | Ok(_) => {} 93 | Err(e) => return Err(e), 94 | } 95 | 96 | // remove from cache 97 | self.cache.remove(&addr); 98 | Ok(()) 99 | } 100 | fn pop_from_freelist(&mut self, addr: &String) { 101 | let l = self.freelist.len(); 102 | for i in 0..l { 103 | if self.freelist[i] == *addr { 104 | self.freelist.remove(i); 105 | break; 106 | } 107 | } 108 | } 109 | fn hierarchic_check(sql: &SQL) -> Result<(), PoolError> { 110 | // 1. check dirty bit of database 111 | if sql.database.is_delete { 112 | match DiskInterface::remove_db(&sql.user.name, &sql.database.name, Some(dotenv!("FILE_BASE_PATH"))) { 113 | Ok(_) => return Ok(()), 114 | Err(e) => return Err(PoolError::DiskError(e)), 115 | } 116 | } 117 | if sql.database.is_dirty { 118 | match DiskInterface::create_db(&sql.user.name, &sql.database.name, Some(dotenv!("FILE_BASE_PATH"))) { 119 | Ok(_) => {} 120 | Err(e) => return Err(PoolError::DiskError(e)), 121 | } 122 | } 123 | // 2. check dirty bit of tables 124 | for (name, table) in sql.database.tables.iter() { 125 | if table.is_delete { 126 | match DiskInterface::drop_table( 127 | &sql.user.name, 128 | &sql.database.name, 129 | &name, 130 | Some(dotenv!("FILE_BASE_PATH")), 131 | ) { 132 | Ok(_) => {} 133 | Err(e) => return Err(PoolError::DiskError(e)), 134 | } 135 | continue; 136 | } 137 | if table.is_dirty { 138 | match DiskInterface::create_table( 139 | &sql.user.name, 140 | &sql.database.name, 141 | &table, 142 | Some(dotenv!("FILE_BASE_PATH")), 143 | ) { 144 | Ok(_) => {} 145 | Err(e) => return Err(PoolError::DiskError(e)), 146 | } 147 | } 148 | // 3. check dirty bit of rows 149 | let mut new_row: Vec = table.rows.clone(); 150 | let l = new_row.len(); 151 | for i in 0..l { 152 | if !new_row[i].is_dirty { 153 | // remove rows which are not dirty 154 | new_row.remove(i); 155 | } 156 | } 157 | if !new_row.is_empty() { 158 | match DiskInterface::append_rows( 159 | &sql.user.name, 160 | &sql.database.name, 161 | &name, 162 | &new_row, 163 | Some(dotenv!("FILE_BASE_PATH")), 164 | ) { 165 | Ok(_) => {} 166 | Err(e) => return Err(PoolError::DiskError(e)), 167 | } 168 | } 169 | } 170 | Ok(()) 171 | } 172 | } 173 | 174 | #[cfg(test)] 175 | mod tests { 176 | use super::*; 177 | use std::thread; 178 | 179 | #[test] 180 | fn test_multithread_correctness() {} 181 | 182 | #[test] 183 | fn test_pool_lru_algorithm() {} 184 | 185 | #[test] 186 | fn test_db_writeback() {} 187 | 188 | #[test] 189 | fn test_table_writeback() {} 190 | 191 | #[test] 192 | fn test_create_row_writeback() {} 193 | 194 | #[test] 195 | fn test_pool_error() {} 196 | } 197 | -------------------------------------------------------------------------------- /src/storage/page.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::index::tree::NodeType; 3 | use crate::storage::bytescoder::BytesCoder; 4 | 5 | trait IndexPage { 6 | fn new( 7 | pid: u32, 8 | capacity: usize, 9 | node_type: NodeType, 10 | key_type: DataType, 11 | ptr_size: usize, 12 | key_size: usize, 13 | row_ptr_size: Option, 14 | ) -> Self; 15 | } 16 | 17 | trait FilePage { 18 | fn new(pid: u32, block_length: usize) -> Self; 19 | } 20 | 21 | struct IndexInternalPage { 22 | header: HeaderBytes, 23 | content: ContentBytes, 24 | } 25 | 26 | struct IndexLeafPage { 27 | header: HeaderBytes, 28 | content: ContentBytes, 29 | } 30 | 31 | struct DataFilePage { 32 | header: HeaderBytes, 33 | content: ContentBytes, 34 | } 35 | 36 | trait Header { 37 | fn to_bytes(&self) -> HeaderBytes; 38 | fn from_bytes(header_bytes: &HeaderBytes) -> Self; 39 | } 40 | 41 | pub const FILE_HEADER_SIZE: usize = 20; 42 | struct FileHeader { 43 | pid: u32, 44 | capacity: usize, 45 | block_length: usize, 46 | } 47 | 48 | pub const INDEX_INTERNAL_HEADER_SIZE: usize = 20; 49 | struct IndexInternalHeader { 50 | pid: u32, 51 | capacity: usize, 52 | node_type: NodeType, 53 | key_type: DataType, 54 | ptr_size: usize, 55 | key_size: usize, 56 | } 57 | 58 | pub const INDEX_LEAF_HEADER_SIZE: usize = 20; 59 | struct IndexLeafHeader { 60 | pid: u32, 61 | capacity: usize, 62 | node_type: NodeType, 63 | key_type: DataType, 64 | ptr_size: usize, 65 | key_size: usize, 66 | row_ptr_size: usize, 67 | } 68 | 69 | type Bytes = Vec; 70 | struct HeaderBytes(Bytes); 71 | struct ContentBytes(Bytes); 72 | 73 | impl IndexPage for IndexInternalPage { 74 | fn new( 75 | pid: u32, 76 | capacity: usize, 77 | node_type: NodeType, 78 | key_type: DataType, 79 | ptr_size: usize, 80 | key_size: usize, 81 | row_ptr_size: Option, 82 | ) -> Self { 83 | let header = IndexInternalHeader { 84 | pid, 85 | capacity, 86 | node_type, 87 | key_type, 88 | ptr_size, 89 | key_size, 90 | }; 91 | 92 | let content = Vec::with_capacity(key_size * capacity + ptr_size * (capacity + 1)); 93 | 94 | Self { 95 | header: header.to_bytes(), 96 | content: ContentBytes(content), 97 | } 98 | } 99 | } 100 | 101 | impl IndexPage for IndexLeafPage { 102 | fn new( 103 | pid: u32, 104 | capacity: usize, 105 | node_type: NodeType, 106 | key_type: DataType, 107 | ptr_size: usize, 108 | key_size: usize, 109 | row_ptr_size: Option, 110 | ) -> Self { 111 | let row_ptr_size = row_ptr_size.unwrap(); 112 | let header = IndexLeafHeader { 113 | pid, 114 | capacity, 115 | node_type, 116 | key_type, 117 | ptr_size, 118 | key_size, 119 | row_ptr_size, 120 | }; 121 | 122 | let content = Vec::with_capacity((key_size + row_ptr_size) * capacity + ptr_size * 2); 123 | 124 | Self { 125 | header: header.to_bytes(), 126 | content: ContentBytes(content), 127 | } 128 | } 129 | } 130 | 131 | impl FilePage for DataFilePage { 132 | fn new(pid: u32, block_length: usize) -> Self { 133 | let capacity = get_file_capacity(&block_length); 134 | 135 | let header = FileHeader { 136 | pid, 137 | capacity: capacity.clone(), 138 | block_length: block_length.clone(), 139 | }; 140 | 141 | let content = Vec::with_capacity(block_length * capacity); 142 | 143 | Self { 144 | header: header.to_bytes(), 145 | content: ContentBytes(content), 146 | } 147 | } 148 | } 149 | 150 | fn get_file_capacity(block_length: &usize) -> usize { 151 | let page_size = match dotenv!("PAGE_SIZE").parse::() { 152 | Ok(s) => s, 153 | Err(_) => 4096, 154 | }; 155 | 156 | (page_size - FILE_HEADER_SIZE) / block_length 157 | } 158 | 159 | impl Header for FileHeader { 160 | fn to_bytes(&self) -> HeaderBytes { 161 | let mut bytes: Bytes = vec![]; 162 | bytes.extend_from_slice(&BytesCoder::attr_to_bytes(&DataType::Int, &self.pid.to_string()).unwrap()); 163 | bytes.extend_from_slice(&BytesCoder::attr_to_bytes(&DataType::Int, &self.capacity.to_string()).unwrap()); 164 | bytes.extend_from_slice(&BytesCoder::attr_to_bytes(&DataType::Int, &self.block_length.to_string()).unwrap()); 165 | 166 | HeaderBytes(bytes) 167 | } 168 | 169 | fn from_bytes(header_bytes: &HeaderBytes) -> Self { 170 | let bytes = &header_bytes.0; 171 | let pid = BytesCoder::bytes_to_attr(&DataType::Int, &bytes[0..4]) 172 | .unwrap() 173 | .parse::() 174 | .unwrap(); 175 | let capacity = BytesCoder::bytes_to_attr(&DataType::Int, &bytes[4..8]) 176 | .unwrap() 177 | .parse::() 178 | .unwrap(); 179 | let block_length = BytesCoder::bytes_to_attr(&DataType::Int, &bytes[8..12]) 180 | .unwrap() 181 | .parse::() 182 | .unwrap(); 183 | Self { 184 | pid, 185 | capacity, 186 | block_length, 187 | } 188 | } 189 | } 190 | 191 | impl Header for IndexInternalHeader { 192 | fn to_bytes(&self) -> HeaderBytes { 193 | let mut bytes: Bytes = vec![]; 194 | // TODO 195 | HeaderBytes(bytes) 196 | } 197 | 198 | fn from_bytes(header_bytes: &HeaderBytes) -> Self { 199 | // TODO 200 | let bytes = &header_bytes.0; 201 | let pid = 0; 202 | let capacity = 0; 203 | let node_type = NodeType::Internal; 204 | let key_type = DataType::Int; 205 | let ptr_size = 0; 206 | let key_size = 0; 207 | Self { 208 | pid, 209 | capacity, 210 | node_type, 211 | key_type, 212 | ptr_size, 213 | key_size, 214 | } 215 | } 216 | } 217 | 218 | impl Header for IndexLeafHeader { 219 | fn to_bytes(&self) -> HeaderBytes { 220 | let mut bytes: Bytes = vec![]; 221 | // TODO 222 | HeaderBytes(bytes) 223 | } 224 | 225 | fn from_bytes(header_bytes: &HeaderBytes) -> Self { 226 | // TODO 227 | let bytes = &header_bytes.0; 228 | let pid = 0; 229 | let capacity = 0; 230 | let node_type = NodeType::Leaf; 231 | let key_type = DataType::Int; 232 | let ptr_size = 0; 233 | let key_size = 0; 234 | let row_ptr_size = 0; 235 | Self { 236 | pid, 237 | capacity, 238 | node_type, 239 | key_type, 240 | ptr_size, 241 | key_size, 242 | row_ptr_size, 243 | } 244 | } 245 | } 246 | 247 | #[cfg(test)] 248 | mod tests { 249 | use super::*; 250 | 251 | #[test] 252 | pub fn test_file_header() { 253 | let header = FileHeader { 254 | pid: 1, 255 | capacity: 101, 256 | block_length: 128, 257 | }; 258 | 259 | let header_bytes = header.to_bytes(); 260 | let header = FileHeader::from_bytes(&header_bytes); 261 | 262 | assert_eq!(header.pid, 1); 263 | assert_eq!(header.capacity, 101); 264 | assert_eq!(header.block_length, 128); 265 | } 266 | 267 | #[test] 268 | pub fn test_create_file_page() { 269 | let _file_page = DataFilePage::new(0, 128); 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/storage/bytescoder.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::component::table::Row; 3 | use crate::storage::diskinterface::TableMeta; 4 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 5 | use std::fmt; 6 | use std::io; 7 | use std::num; 8 | use std::string; 9 | 10 | #[derive(Debug, Clone)] 11 | pub struct BytesCoder { 12 | /* definition */ 13 | // Ideally, BytesCoder is a stateless struct 14 | } 15 | 16 | #[derive(Debug, PartialEq, Clone)] 17 | pub enum BytesCoderError { 18 | Io, 19 | ParseInt, 20 | ParseFloat, 21 | StringLength, 22 | StringDecode, 23 | UrlLength, 24 | AttrNotExists, 25 | } 26 | 27 | // Implement the `trim` method for byte slices 28 | trait SliceExt { 29 | fn trim(&self) -> &Self; 30 | } 31 | 32 | impl SliceExt for [u8] { 33 | fn trim(&self) -> &[u8] { 34 | fn is_padding(c: &u8) -> bool { 35 | *c == 0 as u8 36 | } 37 | 38 | fn is_not_padding(c: &u8) -> bool { 39 | !is_padding(c) 40 | } 41 | 42 | if let Some(first) = self.iter().position(is_not_padding) { 43 | if let Some(last) = self.iter().rposition(is_not_padding) { 44 | &self[first..last + 1] 45 | } else { 46 | unreachable!(); 47 | } 48 | } else { 49 | &[] 50 | } 51 | } 52 | } 53 | 54 | impl From for BytesCoderError { 55 | fn from(_err: io::Error) -> BytesCoderError { 56 | BytesCoderError::Io 57 | } 58 | } 59 | 60 | impl From for BytesCoderError { 61 | fn from(_err: num::ParseIntError) -> BytesCoderError { 62 | BytesCoderError::ParseInt 63 | } 64 | } 65 | 66 | impl From for BytesCoderError { 67 | fn from(_err: num::ParseFloatError) -> BytesCoderError { 68 | BytesCoderError::ParseFloat 69 | } 70 | } 71 | 72 | impl From for BytesCoderError { 73 | fn from(_err: string::FromUtf8Error) -> BytesCoderError { 74 | BytesCoderError::StringDecode 75 | } 76 | } 77 | 78 | impl fmt::Display for BytesCoderError { 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 80 | match *self { 81 | BytesCoderError::Io => write!(f, "Error occurred during read/write from byte slices"), 82 | BytesCoderError::ParseInt => write!(f, "Error occurred during parsing value from Int data type."), 83 | BytesCoderError::ParseFloat => { 84 | write!(f, "Error occurred during parsing value from Float or Double data type.") 85 | } 86 | BytesCoderError::StringLength => write!(f, "The string attempt to store exceed the size of field."), 87 | BytesCoderError::StringDecode => write!(f, "Error occurred during decoding utf8 String from bytes."), 88 | BytesCoderError::UrlLength => write!(f, "The url attempt to store exceed the size of field."), 89 | BytesCoderError::AttrNotExists => write!(f, "The row does not contain specified attribute."), 90 | } 91 | } 92 | } 93 | 94 | impl BytesCoder { 95 | pub fn attr_to_bytes(datatype: &DataType, str_val: &str) -> Result, BytesCoderError> { 96 | let mut bs: Vec = vec![]; 97 | match datatype { 98 | DataType::Char(length) => { 99 | if str_val.len() > *length as usize { 100 | return Err(BytesCoderError::StringLength); 101 | } 102 | bs.extend_from_slice(str_val.as_bytes()); 103 | bs.extend_from_slice(&vec![0; *length as usize - str_val.len()]) 104 | } 105 | DataType::Double => bs.write_f64::(str_val.parse::()?)?, 106 | DataType::Float => bs.write_f32::(str_val.parse::()?)?, 107 | DataType::Int => bs.write_i32::(str_val.parse::()?)?, 108 | DataType::Varchar(length) => { 109 | if str_val.len() > *length as usize { 110 | return Err(BytesCoderError::StringLength); 111 | } 112 | bs.extend_from_slice(str_val.as_bytes()); 113 | bs.extend_from_slice(&vec![0; *length as usize - str_val.len()]) 114 | } 115 | DataType::Url => { 116 | if str_val.len() > 256 as usize { 117 | return Err(BytesCoderError::UrlLength); 118 | } 119 | bs.extend_from_slice(str_val.as_bytes()); 120 | bs.extend_from_slice(&vec![0; 256 as usize - str_val.len()]) 121 | } 122 | } 123 | 124 | Ok(bs) 125 | } 126 | 127 | pub fn bytes_to_attr(datatype: &DataType, bytes: &[u8]) -> Result { 128 | let s: String; 129 | match datatype { 130 | DataType::Char(_length) => s = String::from_utf8(bytes.trim().to_vec())?, 131 | DataType::Double => s = (&(*bytes)).read_f64::()?.to_string(), 132 | DataType::Float => s = (&(*bytes)).read_f32::()?.to_string(), 133 | DataType::Int => s = (&(*bytes)).read_i32::()?.to_string(), 134 | DataType::Varchar(_length) => s = String::from_utf8(bytes.trim().to_vec())?, 135 | DataType::Url => s = String::from_utf8(bytes.trim().to_vec())?, 136 | } 137 | 138 | Ok(s) 139 | } 140 | 141 | pub fn row_to_bytes(tablemeta: &TableMeta, row: &Row) -> Result, BytesCoderError> { 142 | // set `__valid__` to 1 143 | let mut row_bytes = vec![1]; 144 | for attr in tablemeta.attrs_order[1..].iter() { 145 | let attr_bytes = BytesCoder::attr_to_bytes( 146 | &tablemeta.attrs[attr].datatype, 147 | row.data.get(attr).ok_or_else(|| BytesCoderError::AttrNotExists)?, 148 | )?; 149 | row_bytes.extend_from_slice(&attr_bytes); 150 | } 151 | 152 | Ok(row_bytes) 153 | } 154 | 155 | pub fn bytes_to_row(tablemeta: &TableMeta, bytes: &Vec) -> Result { 156 | let mut attr_vals: Vec = vec![]; 157 | for (idx, attr) in tablemeta.attrs_order[1..].iter().enumerate() { 158 | let attr_bytes = bytes 159 | [tablemeta.attr_offset_ranges[idx + 1][0] as usize..tablemeta.attr_offset_ranges[idx + 1][1] as usize] 160 | .to_vec(); 161 | let attr_val = BytesCoder::bytes_to_attr(&tablemeta.attrs[attr].datatype, &attr_bytes)?; 162 | attr_vals.push(attr_val); 163 | } 164 | let mut new_row = Row::new(); 165 | for i in 0..attr_vals.len() { 166 | new_row 167 | .data 168 | .insert(tablemeta.attrs_order[i + 1].clone(), attr_vals[i].clone()); 169 | } 170 | 171 | Ok(new_row) 172 | } 173 | } 174 | 175 | #[cfg(test)] 176 | mod tests { 177 | use super::*; 178 | use crate::component::field; 179 | use crate::component::field::Field; 180 | use std::collections::HashMap; 181 | 182 | #[test] 183 | pub fn test_attr_encode_decode() { 184 | let datatype = DataType::Char(10); 185 | let data = "test你好".to_string(); 186 | assert_eq!( 187 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 188 | data 189 | ); 190 | 191 | let datatype = DataType::Double; 192 | let data = "3.1415926".to_string(); 193 | assert_eq!( 194 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 195 | data 196 | ); 197 | 198 | let datatype = DataType::Float; 199 | let data = "2.71".to_string(); 200 | assert_eq!( 201 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 202 | data 203 | ); 204 | 205 | let datatype = DataType::Int; 206 | let data = "123456543".to_string(); 207 | assert_eq!( 208 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 209 | data 210 | ); 211 | 212 | let datatype = DataType::Varchar(100); 213 | let data = "abcdefghijklmnopqrstuvwxyz12345438967`+=/{}[]<>-_|%$#@!&^*()?,.".to_string(); 214 | assert_eq!( 215 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 216 | data 217 | ); 218 | 219 | let datatype = DataType::Char(10); 220 | assert_eq!( 221 | BytesCoder::attr_to_bytes(&datatype, &data).unwrap_err(), 222 | BytesCoderError::StringLength 223 | ); 224 | 225 | let datatype = DataType::Url; 226 | let data = "https://developer.mozilla.org/zh-TW/docs/Web/JavaScript/Guide/Regular_Expressions".to_string(); 227 | assert_eq!( 228 | BytesCoder::bytes_to_attr(&datatype, &BytesCoder::attr_to_bytes(&datatype, &data).unwrap()).unwrap(), 229 | data 230 | ); 231 | } 232 | 233 | #[test] 234 | pub fn test_row_encode_decode() { 235 | let mut aff_table_meta = TableMeta { 236 | name: "Affiliates".to_string(), 237 | username: "crazyguy".to_string(), 238 | db_name: "BookerDB".to_string(), 239 | path_tsv: "Affiliates.tsv".to_string(), 240 | path_bin: "Affiliates.bin".to_string(), 241 | primary_key: vec!["AffID".to_string()], 242 | foreign_key: vec![], 243 | reference_table: None, 244 | reference_attr: None, 245 | attr_offset_ranges: vec![vec![0, 1], vec![1, 5], vec![5, 55], vec![55, 95], vec![95, 115]], 246 | row_length: 115, 247 | // ignore attrs checking 248 | attrs_order: vec![ 249 | "__valid__".to_string(), 250 | "AffID".to_string(), 251 | "AffEmail".to_string(), 252 | "AffName".to_string(), 253 | "AffPhoneNum".to_string(), 254 | ], 255 | attrs: HashMap::new(), 256 | }; 257 | 258 | aff_table_meta.attrs.insert( 259 | "AffID".to_string(), 260 | Field::new_all("AffID", DataType::Int, true, None, field::Checker::None, false), 261 | ); 262 | aff_table_meta.attrs.insert( 263 | "AffName".to_string(), 264 | Field::new_all( 265 | "AffName", 266 | DataType::Varchar(40), 267 | true, 268 | None, 269 | field::Checker::None, 270 | false, 271 | ), 272 | ); 273 | aff_table_meta.attrs.insert( 274 | "AffEmail".to_string(), 275 | Field::new_all( 276 | "AffEmail", 277 | DataType::Varchar(50), 278 | true, 279 | None, 280 | field::Checker::None, 281 | false, 282 | ), 283 | ); 284 | aff_table_meta.attrs.insert( 285 | "AffPhoneNum".to_string(), 286 | Field::new_all( 287 | "AffPhoneNum", 288 | DataType::Varchar(20), 289 | false, 290 | Some("+886900000000".to_string()), 291 | field::Checker::None, 292 | false, 293 | ), 294 | ); 295 | 296 | let data = vec![ 297 | ("AffID", "2"), 298 | ("AffName", "Ben"), 299 | ("AffEmail", "ben@foo.com"), 300 | ("AffPhoneNum", "+886900000002"), 301 | ]; 302 | 303 | let mut row = Row::new(); 304 | for i in 0..data.len() { 305 | row.data.insert(data[i].0.to_string(), data[i].1.to_string()); 306 | } 307 | 308 | let reconstructed_row = BytesCoder::bytes_to_row( 309 | &aff_table_meta, 310 | &BytesCoder::row_to_bytes(&aff_table_meta, &row).unwrap(), 311 | ) 312 | .unwrap(); 313 | 314 | for (attr, val) in row.data.iter() { 315 | assert_eq!(val.clone(), reconstructed_row.data[attr]); 316 | } 317 | } 318 | } 319 | -------------------------------------------------------------------------------- /src/sql/symbol.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | #[derive(Debug, Clone)] 4 | pub struct Symbol { 5 | pub name: String, 6 | pub len: usize, 7 | pub token: Token, 8 | pub group: Group, 9 | } 10 | 11 | #[derive(Debug, PartialEq, Clone)] 12 | pub enum Group { 13 | DataType, 14 | Function, 15 | Keyword, 16 | Operator, // >, >=, =, !=, <>, <, <= 17 | Identifier, // t1, a, b 18 | Delimiter, // `,`, (, ) 19 | } 20 | 21 | /// Token includes keywords, functions, and data types (by alphabetical order) 22 | #[derive(Debug, PartialEq, Clone)] 23 | pub enum Token { 24 | /* SQL Keywords */ 25 | Add, 26 | AddConstraint, 27 | AlterColumn, 28 | AlterTable, 29 | All, 30 | Any, 31 | As, 32 | Asc, 33 | Between, 34 | Case, 35 | Check, 36 | Column, 37 | Constraint, 38 | Create, 39 | CreateDatabase, 40 | CreateIndex, 41 | CreateOrReplaceView, 42 | CreateTable, 43 | CreateProcedure, 44 | CreateUniqueIndex, 45 | CreateView, 46 | Database, 47 | Default, 48 | Delete, 49 | Desc, 50 | Distinct, 51 | DropColumn, 52 | DropConstraint, 53 | DropDatabase, 54 | DropDefault, 55 | DropIndex, 56 | DropTable, 57 | DropView, 58 | Exec, 59 | Exists, 60 | ForeignKey, 61 | From, 62 | FullOuterJoin, 63 | GroupBy, 64 | Having, 65 | In, 66 | Index, 67 | InnerJoin, 68 | InsertInto, 69 | IsNull, 70 | IsNotNull, 71 | LeftJoin, 72 | Like, 73 | Limit, 74 | NotNull, 75 | On, 76 | OrderBy, 77 | Percent, 78 | PrimaryKey, 79 | Procedure, 80 | RightJoin, 81 | Rownum, 82 | Select, 83 | Set, 84 | Table, 85 | Top, 86 | TruncateTable, 87 | Union, 88 | UnionAll, 89 | Unique, 90 | Update, 91 | Values, 92 | View, 93 | Where, 94 | 95 | /* SQL Function */ 96 | Avg, 97 | Count, 98 | Max, 99 | Min, 100 | Sum, 101 | 102 | /* SQL Data Type */ 103 | Char, 104 | Double, 105 | Float, 106 | Int, 107 | Varchar, 108 | Url, 109 | 110 | /* Operator */ 111 | LT, // < 112 | LE, // <= 113 | EQ, // = 114 | NE, // !=, <> 115 | GT, // > 116 | GE, // >= 117 | AND, 118 | NOT, 119 | OR, 120 | 121 | /* Delimiter */ 122 | ParentLeft, // ( 123 | ParentRight, // ) 124 | Comma, // , 125 | Semicolon, // ; 126 | 127 | /* Any Identifier */ 128 | Identifier, 129 | 130 | /* Define by StellarSQL */ 131 | Encrypt, 132 | } 133 | 134 | pub fn sym(name: &str, token: Token, group: Group) -> Symbol { 135 | Symbol { 136 | name: name.to_string(), 137 | len: name.len(), 138 | token, 139 | group, 140 | } 141 | } 142 | 143 | lazy_static! { 144 | /// A static struct of token hashmap storing all tokens 145 | pub static ref SYMBOLS: HashMap<&'static str, Symbol> = { 146 | let mut m = HashMap::new(); 147 | 148 | // The following is maintained by hand according to `Token` 149 | 150 | /* SQL Keywords */ 151 | m.insert("add", sym("add", Token::Add, Group::Keyword)); 152 | m.insert("add constraint", sym("add constraint", Token::AddConstraint, Group::Keyword)); 153 | m.insert("alter column", sym("alter column", Token::AlterColumn, Group::Keyword)); 154 | m.insert("alter table", sym("alter table", Token::AlterTable, Group::Keyword)); 155 | m.insert("all", sym("all", Token::All, Group::Keyword)); 156 | m.insert("any", sym("any", Token::Any, Group::Keyword)); 157 | m.insert("as", sym("as", Token::As, Group::Keyword)); 158 | m.insert("asc", sym("asc", Token::Asc, Group::Keyword)); 159 | m.insert("between", sym("between", Token::Between, Group::Keyword)); 160 | m.insert("case", sym("case", Token::Case, Group::Keyword)); 161 | m.insert("check", sym("check", Token::Check, Group::Keyword)); 162 | m.insert("column", sym("column", Token::Column, Group::Keyword)); 163 | m.insert("constraint", sym("constraint", Token::Constraint, Group::Keyword)); 164 | m.insert("create", sym("create", Token::Create, Group::Keyword)); 165 | m.insert("create database", sym("create database", Token::CreateDatabase, Group::Keyword)); 166 | m.insert("create index", sym("create index", Token::CreateIndex, Group::Keyword)); 167 | m.insert("create or replace view", sym("create or replace view", Token::CreateOrReplaceView, Group::Keyword)); 168 | m.insert("create table", sym("create table", Token::CreateTable, Group::Keyword)); 169 | m.insert("create procedure", sym("create procedure", Token::CreateProcedure, Group::Keyword)); 170 | m.insert("create unique index", sym("create unique index", Token::CreateUniqueIndex, Group::Keyword)); 171 | m.insert("create view", sym("create view", Token::CreateView, Group::Keyword)); 172 | m.insert("database", sym("database", Token::Database, Group::Keyword)); 173 | m.insert("default", sym("default", Token::Default, Group::Keyword)); 174 | m.insert("delete", sym("delete", Token::Delete, Group::Keyword)); 175 | m.insert("desc", sym("desc", Token::Desc, Group::Keyword)); 176 | m.insert("distinct", sym("distinct", Token::Distinct, Group::Keyword)); 177 | m.insert("drop column", sym("drop column", Token::DropColumn, Group::Keyword)); 178 | m.insert("drop constraint", sym("drop constraint", Token::DropConstraint, Group::Keyword)); 179 | m.insert("drop database", sym("drop database", Token::DropDatabase, Group::Keyword)); 180 | m.insert("drop default", sym("drop default", Token::DropDefault, Group::Keyword)); 181 | m.insert("drop index", sym("drop index", Token::DropIndex, Group::Keyword)); 182 | m.insert("drop table", sym("drop table", Token::DropTable, Group::Keyword)); 183 | m.insert("drop view", sym("drop view", Token::DropView, Group::Keyword)); 184 | m.insert("exec", sym("exec", Token::Exec, Group::Keyword)); 185 | m.insert("exists", sym("exists", Token::Exists, Group::Keyword)); 186 | m.insert("foreign key", sym("foreign key", Token::ForeignKey, Group::Keyword)); 187 | m.insert("from", sym("from", Token::From, Group::Keyword)); 188 | m.insert("full outer join", sym("full outer join", Token::FullOuterJoin, Group::Keyword)); 189 | m.insert("group by", sym("group by", Token::GroupBy, Group::Keyword)); 190 | m.insert("having", sym("having", Token::Having, Group::Keyword)); 191 | m.insert("in", sym("in", Token::In, Group::Keyword)); 192 | m.insert("index", sym("index", Token::Index, Group::Keyword)); 193 | m.insert("inner join", sym("inner join", Token::InnerJoin, Group::Keyword)); 194 | m.insert("insert into", sym("insert into", Token::InsertInto, Group::Keyword)); 195 | m.insert("is null", sym("is null", Token::IsNull, Group::Keyword)); 196 | m.insert("is not null", sym("is not null", Token::IsNotNull, Group::Keyword)); 197 | m.insert("left join", sym("left join", Token::LeftJoin, Group::Keyword)); 198 | m.insert("like", sym("like", Token::Like, Group::Keyword)); 199 | m.insert("limit", sym("limit", Token::Limit, Group::Keyword)); 200 | m.insert("not null", sym("not null", Token::NotNull, Group::Keyword)); 201 | m.insert("on", sym("on", Token::On, Group::Keyword)); 202 | m.insert("order by", sym("order by", Token::OrderBy, Group::Keyword)); 203 | m.insert("percent", sym("percent", Token::Percent, Group::Keyword)); 204 | m.insert("primary key", sym("primary key", Token::PrimaryKey, Group::Keyword)); 205 | m.insert("procedure", sym("procedure", Token::Procedure, Group::Keyword)); 206 | m.insert("right join", sym("right join", Token::RightJoin, Group::Keyword)); 207 | m.insert("rownum", sym("rownum", Token::Rownum, Group::Keyword)); 208 | m.insert("select", sym("select", Token::Select, Group::Keyword)); 209 | m.insert("set", sym("set", Token::Set, Group::Keyword)); 210 | m.insert("table", sym("table", Token::Table, Group::Keyword)); 211 | m.insert("top", sym("top", Token::Top, Group::Keyword)); 212 | m.insert("truncate table", sym("truncate table", Token::TruncateTable, Group::Keyword)); 213 | m.insert("union", sym("union", Token::Union, Group::Keyword)); 214 | m.insert("union all", sym("union all", Token::UnionAll, Group::Keyword)); 215 | m.insert("unique", sym("unique", Token::Unique, Group::Keyword)); 216 | m.insert("update", sym("update", Token::Update, Group::Keyword)); 217 | m.insert("values", sym("values", Token::Values, Group::Keyword)); 218 | m.insert("view", sym("view", Token::View, Group::Keyword)); 219 | m.insert("where", sym("where", Token::Where, Group::Keyword)); 220 | 221 | /* SQL Function */ 222 | m.insert("avg", sym("avg", Token::Avg, Group::Function)); 223 | m.insert("count", sym("count", Token::Count, Group::Function)); 224 | m.insert("max", sym("max", Token::Max, Group::Function)); 225 | m.insert("min", sym("min", Token::Min, Group::Function)); 226 | m.insert("sum", sym("sum", Token::Sum, Group::Function)); 227 | 228 | /* SQL Data Type */ 229 | m.insert("char", sym("char", Token::Char, Group::DataType)); 230 | m.insert("double", sym("double", Token::Double, Group::DataType)); 231 | m.insert("float", sym("float", Token::Float, Group::DataType)); 232 | m.insert("int", sym("int", Token::Int, Group::DataType)); 233 | m.insert("varchar", sym("varchar", Token::Varchar, Group::DataType)); 234 | m.insert("url", sym("url", Token::Url, Group::DataType)); 235 | 236 | /* Operator */ 237 | m.insert(">", sym(">", Token::GT, Group::Operator)); 238 | m.insert(">=", sym(">=", Token::GE, Group::Operator)); 239 | m.insert("=", sym("=", Token::EQ, Group::Operator)); 240 | m.insert("!=", sym("!=", Token::NE, Group::Operator)); 241 | m.insert("<>", sym("<>", Token::NE, Group::Operator)); 242 | m.insert("<", sym("<", Token::LT, Group::Operator)); 243 | m.insert("<=", sym("<=", Token::LE, Group::Operator)); 244 | m.insert("and", sym("and", Token::AND, Group::Operator)); 245 | m.insert("not", sym("not", Token::NOT, Group::Operator)); 246 | m.insert("or", sym("or", Token::OR, Group::Operator)); 247 | 248 | /* StellarSQL */ 249 | m.insert("encrypt", sym("encrypt", Token::Encrypt, Group::Keyword)); 250 | 251 | m //return m 252 | }; 253 | } 254 | 255 | impl Symbol { 256 | pub fn match_delimiter(ch: char) -> Option { 257 | match ch { 258 | '(' => Some(sym("(", Token::ParentLeft, Group::Delimiter)), 259 | ')' => Some(sym(")", Token::ParentRight, Group::Delimiter)), 260 | ',' => Some(sym(",", Token::Comma, Group::Delimiter)), 261 | ';' => Some(sym(";", Token::Semicolon, Group::Delimiter)), 262 | _ => None, 263 | } 264 | } 265 | } 266 | 267 | /// Check if the word is the first word of any multi-word keywords, and then 268 | /// return how many words of all possible keywords. 269 | /// ex: `alter` could be `alter table` and `alter column`, so return `Some(vec![2])` 270 | /// `is` could be `is null` and `is not null`, so return `Some(vec![2, 3])` 271 | pub fn check_multi_keywords_front(s: &str) -> Option> { 272 | match s { 273 | "add" => Some(vec![2]), 274 | "alter" => Some(vec![2]), 275 | "create" => Some(vec![2, 3, 4]), 276 | "drop" => Some(vec![2]), 277 | "foreign" => Some(vec![2]), 278 | "full" => Some(vec![2]), 279 | "group" => Some(vec![2]), 280 | "inner" => Some(vec![2]), 281 | "insert" => Some(vec![2]), 282 | "is" => Some(vec![2, 3]), 283 | "left" => Some(vec![2]), 284 | "not" => Some(vec![2]), 285 | "order" => Some(vec![2]), 286 | "outer" => Some(vec![2]), 287 | "primary" => Some(vec![2]), 288 | "right" => Some(vec![2]), 289 | "select" => Some(vec![2]), 290 | "truncate" => Some(vec![2]), 291 | "union" => Some(vec![2]), 292 | _ => return None, 293 | } 294 | } 295 | 296 | #[cfg(test)] 297 | mod tests { 298 | use super::*; 299 | 300 | /// Test if `SYMBOLS` initialize. 301 | #[test] 302 | fn test_symbols() { 303 | let s = SYMBOLS.get("add").unwrap(); 304 | assert_eq!(s.name, "add"); 305 | assert_eq!(s.len, 3); 306 | assert_eq!(s.token, Token::Add); 307 | assert_eq!(s.group, Group::Keyword); 308 | let s = SYMBOLS.get(">").unwrap(); 309 | assert_eq!(s.name, ">"); 310 | assert_eq!(s.len, 1); 311 | assert_eq!(s.token, Token::GT); 312 | assert_eq!(s.group, Group::Operator); 313 | let s = SYMBOLS.get("url").unwrap(); 314 | assert_eq!(s.name, "url"); 315 | assert_eq!(s.len, 3); 316 | assert_eq!(s.token, Token::Url); 317 | assert_eq!(s.group, Group::DataType); 318 | } 319 | 320 | #[test] 321 | fn test_match_delimiter() { 322 | let mut chs = "){".chars(); 323 | let x = chs.next().unwrap(); 324 | let s = Symbol::match_delimiter(x).unwrap(); 325 | assert_eq!(s.token, Token::ParentRight); 326 | let x = chs.next().unwrap(); 327 | assert!(Symbol::match_delimiter(x).is_none()); 328 | } 329 | 330 | #[test] 331 | fn test_check_multi_keywords_front() { 332 | assert_eq!(check_multi_keywords_front("alter"), Some(vec![2])); 333 | assert!(check_multi_keywords_front("not_match").is_none()); 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /src/storage/index.rs: -------------------------------------------------------------------------------- 1 | extern crate byteorder; 2 | 3 | use crate::component::table::Row; 4 | use crate::storage::bytescoder::BytesCoder; 5 | use crate::storage::diskinterface::{DiskError, DiskInterface, TableMeta}; 6 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 7 | use std::fs; 8 | use std::io::{BufReader, Read, Write}; 9 | use std::path::Path; 10 | 11 | pub struct Index { 12 | table_meta: TableMeta, 13 | index_data: Vec, 14 | num_rows: u32, // row number of the table including deleted 15 | } 16 | 17 | /// (row, key_value) pair 18 | #[derive(Debug, Clone, Eq, Ord, PartialEq, PartialOrd)] 19 | pub struct RowPair { 20 | row: u32, 21 | key_value: Vec, 22 | } 23 | 24 | impl RowPair { 25 | pub fn new(row: u32, key_value: Vec) -> Self { 26 | RowPair { row, key_value } 27 | } 28 | 29 | pub fn to_bytes(&self) -> Result, DiskError> { 30 | let mut bs: Vec = vec![]; 31 | bs.write_u32::(self.row)?; 32 | bs.extend_from_slice(&self.key_value); 33 | 34 | Ok(bs) 35 | } 36 | } 37 | 38 | #[allow(dead_code)] 39 | impl Index { 40 | //// construct a new Index 41 | pub fn new(table_meta: TableMeta) -> Result { 42 | Ok(Index { 43 | table_meta, 44 | index_data: vec![], 45 | num_rows: 0, 46 | }) 47 | } 48 | 49 | /// build index from table bin file 50 | pub fn build_from_bin(&mut self, base_path: &str) -> Result<(), DiskError> { 51 | // perform storage check toward table level 52 | DiskInterface::storage_hierarchy_check( 53 | base_path, 54 | Some(&self.table_meta.username), 55 | Some(&self.table_meta.db_name), 56 | Some(&self.table_meta.name), 57 | ) 58 | .map_err(|e| e)?; 59 | 60 | // load table bin as chunk of bytes 61 | let table_bin_path = format!( 62 | "{}/{}/{}/{}.bin", 63 | base_path, self.table_meta.username, self.table_meta.db_name, self.table_meta.name 64 | ); 65 | let table_bin_file = fs::File::open(&table_bin_path)?; 66 | let mut buffered = BufReader::new(table_bin_file); 67 | 68 | let mut chunk_bytes = vec![]; 69 | buffered.read_to_end(&mut chunk_bytes)?; 70 | 71 | // parse chunk of bytes to vector of rows 72 | let mut new_index_data: Vec = vec![]; 73 | let mut num_rows: u32 = 0; 74 | for (row_id, row_bytes) in chunk_bytes.chunks(self.table_meta.row_length as usize).enumerate() { 75 | // ignore deleted rows 76 | if row_bytes[0] == 1 as u8 { 77 | new_index_data.push(RowPair::new( 78 | row_id as u32, 79 | row_bytes[self.table_meta.attr_offset_ranges[1][0] as usize 80 | ..self.table_meta.attr_offset_ranges[1][1] as usize] 81 | .to_vec(), 82 | )); 83 | } 84 | num_rows += 1; 85 | } 86 | 87 | new_index_data.sort_by(|rp1, rp2| rp1.key_value.cmp(&rp2.key_value)); 88 | 89 | self.index_data = new_index_data; 90 | self.num_rows = num_rows; 91 | 92 | Ok(()) 93 | } 94 | 95 | /// save(overwrite) index table into index file 96 | pub fn save(&self, base_path: &str) -> Result<(), DiskError> { 97 | // perform storage check toward table level 98 | DiskInterface::storage_hierarchy_check( 99 | base_path, 100 | Some(&self.table_meta.username), 101 | Some(&self.table_meta.db_name), 102 | Some(&self.table_meta.name), 103 | ) 104 | .map_err(|e| e)?; 105 | 106 | // create chunk of bytes to be written 107 | let mut chunk_bytes = vec![]; 108 | for rp in self.index_data.iter() { 109 | chunk_bytes.extend_from_slice(&rp.to_bytes()?); 110 | } 111 | 112 | // write chunk of bytes to index bin 113 | let index_bin_path = format!( 114 | "{}/{}/{}/{}_{}.idx", 115 | base_path, 116 | self.table_meta.username, 117 | self.table_meta.db_name, 118 | self.table_meta.name, 119 | self.table_meta.primary_key[0] 120 | ); 121 | let mut index_bin_file = fs::OpenOptions::new() 122 | .write(true) 123 | .create(true) 124 | .truncate(true) 125 | .open(index_bin_path)?; 126 | index_bin_file.write_all(&chunk_bytes)?; 127 | 128 | Ok(()) 129 | } 130 | 131 | /// Load index from storage 132 | pub fn load(&mut self, base_path: &str) -> Result<(), DiskError> { 133 | // perform storage check toward table level 134 | DiskInterface::storage_hierarchy_check( 135 | base_path, 136 | Some(&self.table_meta.username), 137 | Some(&self.table_meta.db_name), 138 | Some(&self.table_meta.name), 139 | ) 140 | .map_err(|e| e)?; 141 | 142 | let index_bin_path = format!( 143 | "{}/{}/{}/{}_{}.idx", 144 | base_path, 145 | self.table_meta.username, 146 | self.table_meta.db_name, 147 | self.table_meta.name, 148 | self.table_meta.primary_key[0] 149 | ); 150 | if !Path::new(&index_bin_path).exists() { 151 | return Err(DiskError::TableIdxFileNotExists); 152 | } 153 | let index_bin_file = fs::File::open(&index_bin_path)?; 154 | let mut buffered = BufReader::new(index_bin_file); 155 | 156 | let mut chunk_bytes = vec![]; 157 | buffered.read_to_end(&mut chunk_bytes)?; 158 | 159 | // parse chunk of bytes to vector of rows 160 | let mut index_data: Vec = vec![]; 161 | for rp_bytes in chunk_bytes 162 | .chunks((self.table_meta.attr_offset_ranges[1][1] - self.table_meta.attr_offset_ranges[1][0] + 4) as usize) 163 | { 164 | index_data.push(RowPair::new( 165 | (&rp_bytes[..4]).read_u32::()?, 166 | rp_bytes[4..].to_vec(), 167 | )); 168 | } 169 | 170 | self.index_data = index_data; 171 | self.num_rows = DiskInterface::get_num_rows( 172 | &self.table_meta.username, 173 | &self.table_meta.db_name, 174 | &self.table_meta.name, 175 | Some(base_path), 176 | )?; 177 | 178 | Ok(()) 179 | } 180 | 181 | /// insert a row-key pair into the index 182 | pub fn insert(&mut self, row: &Row) -> Result<(), DiskError> { 183 | let new_row_pair = RowPair::new( 184 | self.num_rows.clone(), 185 | BytesCoder::attr_to_bytes( 186 | &self.table_meta.attrs[&self.table_meta.primary_key[0]].datatype, 187 | row.data 188 | .get(&self.table_meta.primary_key[0]) 189 | .ok_or_else(|| DiskError::AttrNotExists)?, 190 | )?, 191 | ); 192 | match self 193 | .index_data 194 | .binary_search_by(|rp| rp.key_value.cmp(&new_row_pair.key_value)) 195 | { 196 | Ok(_pos) => return Err(DiskError::DuplicatedKey), 197 | Err(pos) => { 198 | self.index_data.insert(pos, new_row_pair); 199 | self.num_rows += 1; 200 | } 201 | } 202 | 203 | Ok(()) 204 | } 205 | 206 | /// delete a row-key pair from the index 207 | pub fn delete(&mut self, row: &Row) -> Result<(), DiskError> { 208 | let key_val = BytesCoder::attr_to_bytes( 209 | &self.table_meta.attrs[&self.table_meta.primary_key[0]].datatype, 210 | row.data 211 | .get(&self.table_meta.primary_key[0]) 212 | .ok_or_else(|| DiskError::AttrNotExists)?, 213 | )?; 214 | match self.index_data.binary_search_by(|rp| rp.key_value.cmp(&key_val)) { 215 | Ok(pos) => self.index_data.remove(pos), 216 | Err(_pos) => return Err(DiskError::IndexKeyNotFound), 217 | }; 218 | 219 | Ok(()) 220 | } 221 | } 222 | 223 | #[cfg(test)] 224 | mod tests { 225 | use super::*; 226 | use crate::component::datatype::DataType; 227 | use crate::component::field; 228 | use crate::component::field::Field; 229 | use crate::component::table::Table; 230 | use std::fs; 231 | use std::path::Path; 232 | 233 | #[test] 234 | pub fn test_build_save_load_insert_delete() { 235 | let file_base_path = "data9"; 236 | if Path::new(file_base_path).exists() { 237 | fs::remove_dir_all(file_base_path).unwrap(); 238 | } 239 | 240 | DiskInterface::create_file_base(Some(file_base_path)).unwrap(); 241 | DiskInterface::create_username("crazyguy", Some(file_base_path)).unwrap(); 242 | DiskInterface::create_db("crazyguy", "BookerDB", Some(file_base_path)).unwrap(); 243 | 244 | let mut aff_table = Table::new("Affiliates"); 245 | aff_table.fields.insert( 246 | "AffID".to_string(), 247 | Field::new_all("AffID", DataType::Int, true, None, field::Checker::None, false), 248 | ); 249 | aff_table.fields.insert( 250 | "AffName".to_string(), 251 | Field::new_all( 252 | "AffName", 253 | DataType::Varchar(40), 254 | true, 255 | None, 256 | field::Checker::None, 257 | false, 258 | ), 259 | ); 260 | aff_table.fields.insert( 261 | "AffEmail".to_string(), 262 | Field::new_all( 263 | "AffEmail", 264 | DataType::Varchar(50), 265 | true, 266 | None, 267 | field::Checker::None, 268 | false, 269 | ), 270 | ); 271 | aff_table.fields.insert( 272 | "AffPhoneNum".to_string(), 273 | Field::new_all( 274 | "AffPhoneNum", 275 | DataType::Varchar(20), 276 | false, 277 | Some("+886900000000".to_string()), 278 | field::Checker::None, 279 | false, 280 | ), 281 | ); 282 | aff_table.primary_key.push("AffID".to_string()); 283 | 284 | DiskInterface::create_table("crazyguy", "BookerDB", &aff_table, Some(file_base_path)).unwrap(); 285 | 286 | let data = vec![ 287 | ("AffID", "2"), 288 | ("AffName", "Tom"), 289 | ("AffEmail", "tom@foo.com"), 290 | ("AffPhoneNum", "+886900000001"), 291 | ]; 292 | aff_table.insert_row(data).unwrap(); 293 | 294 | let data = vec![ 295 | ("AffID", "7"), 296 | ("AffName", "Ben"), 297 | ("AffEmail", "ben@foo.com"), 298 | ("AffPhoneNum", "+886900000002"), 299 | ]; 300 | aff_table.insert_row(data).unwrap(); 301 | 302 | // to be deleted 303 | let data = vec![ 304 | ("AffID", "6"), 305 | ("AffName", "Leo"), 306 | ("AffEmail", "leo@dee.com"), 307 | ("AffPhoneNum", "+886900000003"), 308 | ]; 309 | aff_table.insert_row(data).unwrap(); 310 | 311 | let data = vec![ 312 | ("AffID", "1"), 313 | ("AffName", "John"), 314 | ("AffEmail", "john@dee.com"), 315 | ("AffPhoneNum", "+886900000004"), 316 | ]; 317 | aff_table.insert_row(data).unwrap(); 318 | 319 | // to be deleted 320 | let data = vec![ 321 | ("AffID", "4"), 322 | ("AffName", "Ray"), 323 | ("AffEmail", "ray@dee.com"), 324 | ("AffPhoneNum", "+886900000005"), 325 | ]; 326 | aff_table.insert_row(data).unwrap(); 327 | 328 | // to be deleted 329 | let data = vec![ 330 | ("AffID", "5"), 331 | ("AffName", "Bryn"), 332 | ("AffEmail", "bryn@dee.com"), 333 | ("AffPhoneNum", "+886900000006"), 334 | ]; 335 | aff_table.insert_row(data).unwrap(); 336 | 337 | let data = vec![ 338 | ("AffID", "8"), 339 | ("AffName", "Eric"), 340 | ("AffEmail", "eric@doo.com"), 341 | ("AffPhoneNum", "+886900000007"), 342 | ]; 343 | aff_table.insert_row(data).unwrap(); 344 | 345 | let data = vec![ 346 | ("AffID", "3"), 347 | ("AffName", "Vinc"), 348 | ("AffEmail", "vinc@doo.com"), 349 | ("AffPhoneNum", "+886900000008"), 350 | ]; 351 | aff_table.insert_row(data).unwrap(); 352 | 353 | DiskInterface::append_rows( 354 | "crazyguy", 355 | "BookerDB", 356 | "Affiliates", 357 | &aff_table.rows[..].iter().cloned().collect(), 358 | Some(file_base_path), 359 | ) 360 | .unwrap(); 361 | 362 | DiskInterface::delete_rows("crazyguy", "BookerDB", "Affiliates", &vec![2, 3], Some(file_base_path)).unwrap(); 363 | DiskInterface::delete_rows("crazyguy", "BookerDB", "Affiliates", &vec![4, 6], Some(file_base_path)).unwrap(); 364 | 365 | let index = 366 | DiskInterface::build_index_from_table_bin("crazyguy", "BookerDB", "Affiliates", Some(file_base_path)) 367 | .unwrap(); 368 | 369 | assert_eq!(index.index_data.len(), 5); 370 | assert_eq!(index.num_rows, 8); 371 | 372 | for i in 1..index.index_data.len() { 373 | assert!(index.index_data[i - 1].key_value < index.index_data[i].key_value); 374 | } 375 | 376 | let index_data = index.index_data.to_vec(); 377 | 378 | DiskInterface::save_index(&index, Some(file_base_path)).unwrap(); 379 | let mut index = DiskInterface::load_index("crazyguy", "BookerDB", "Affiliates", Some(file_base_path)).unwrap(); 380 | 381 | assert_eq!(index_data, index.index_data); 382 | 383 | let data = vec![ 384 | ("AffID", "5"), 385 | ("AffName", "Allie"), 386 | ("AffEmail", "allie@doo.com"), 387 | ("AffPhoneNum", "+886900000005"), 388 | ]; 389 | aff_table.insert_row(data).unwrap(); 390 | 391 | index.insert(&aff_table.rows[aff_table.rows.len() - 1]).unwrap(); 392 | 393 | assert_eq!(index.index_data.len(), 6); 394 | for i in 1..index.index_data.len() { 395 | assert!(index.index_data[i - 1].key_value < index.index_data[i].key_value); 396 | } 397 | assert_eq!(index.num_rows, 9); 398 | 399 | assert_eq!( 400 | index.insert(&aff_table.rows[aff_table.rows.len() - 1]).unwrap_err(), 401 | DiskError::DuplicatedKey, 402 | ); 403 | 404 | index.delete(&aff_table.rows[aff_table.rows.len() - 2]).unwrap(); 405 | 406 | assert_eq!(index.index_data.len(), 5); 407 | for i in 1..index.index_data.len() { 408 | assert!(index.index_data[i - 1].key_value < index.index_data[i].key_value); 409 | } 410 | assert_eq!(index.num_rows, 9); 411 | 412 | assert_eq!( 413 | index.delete(&aff_table.rows[aff_table.rows.len() - 2]).unwrap_err(), 414 | DiskError::IndexKeyNotFound, 415 | ); 416 | 417 | let index_data = index.index_data.to_vec(); 418 | 419 | DiskInterface::save_index(&index, Some(file_base_path)).unwrap(); 420 | let index = DiskInterface::load_index("crazyguy", "BookerDB", "Affiliates", Some(file_base_path)).unwrap(); 421 | 422 | assert_eq!(index_data, index.index_data); 423 | } 424 | } 425 | -------------------------------------------------------------------------------- /src/sql/worker.rs: -------------------------------------------------------------------------------- 1 | use crate::component::database::Database; 2 | use crate::component::database::DatabaseError; 3 | use crate::component::table::Table; 4 | use crate::sql::query::NodePtr; 5 | use crate::sql::query::QueryData; 6 | use std::collections::HashSet; 7 | use std::fmt; 8 | 9 | #[derive(Debug)] 10 | pub struct SQL { 11 | pub user: User, 12 | pub database: Database, 13 | pub querydata: QueryData, 14 | pub result_json: String, 15 | } 16 | 17 | #[derive(Debug)] 18 | pub struct User { 19 | pub name: String, 20 | pub key: i32, 21 | } 22 | impl User { 23 | pub fn new(username: &str) -> User { 24 | User { 25 | name: username.to_string(), 26 | key: 0, 27 | } 28 | } 29 | } 30 | 31 | #[derive(Debug)] 32 | pub enum SQLError { 33 | CauserByDatabase(DatabaseError), 34 | SemanticError(String), 35 | } 36 | 37 | impl fmt::Display for SQLError { 38 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 39 | match *self { 40 | SQLError::CauserByDatabase(ref e) => write!(f, "{}", e), 41 | SQLError::SemanticError(ref s) => write!(f, "semantic error: {}", s), 42 | } 43 | } 44 | } 45 | 46 | impl SQL { 47 | pub fn new(username: &str) -> Result { 48 | Ok(SQL { 49 | user: User::new(username), 50 | database: Database::new(""), // empty db 51 | querydata: QueryData::new(), 52 | result_json: "".to_string(), 53 | }) 54 | } 55 | 56 | // Create a new database 57 | pub fn create_database(&mut self, db_name: &str) -> Result<(), SQLError> { 58 | self.database = Database::new(db_name); 59 | Ok(()) 60 | } 61 | 62 | /// Load a database 63 | pub fn load_database(&mut self, db_name: &str) -> Result<(), SQLError> { 64 | self.database = Database::load_db(&self.user.name, db_name).map_err(|e| SQLError::CauserByDatabase(e))?; 65 | Ok(()) 66 | } 67 | 68 | // TODO 69 | /// Drop the database 70 | pub fn drop_database(&mut self, db_name: &str) -> Result<(), SQLError> { 71 | Ok(()) 72 | } 73 | 74 | // TODO: check db delete bit 75 | /// Load the database and create a new table 76 | pub fn create_table(&mut self, table: &Table) -> Result<(), SQLError> { 77 | self.database.insert_new_table(table.clone()); 78 | Ok(()) 79 | } 80 | 81 | // TODO 82 | /// Drop the table 83 | pub fn drop_table(&mut self, table_name: &str) -> Result<(), SQLError> { 84 | Ok(()) 85 | } 86 | 87 | // TODO: check db, table delete bit 88 | /// Insert new rows into the table 89 | pub fn insert_into_table( 90 | &mut self, 91 | table_name: &str, 92 | attrs: Vec, 93 | rows: Vec>, 94 | ) -> Result<(), SQLError> { 95 | let table = self 96 | .database 97 | .tables 98 | .get_mut(table_name) 99 | .ok_or(SQLError::SemanticError("table not exists".to_string()))?; 100 | if table.public_key == 0 { 101 | table.public_key = self.user.key; 102 | } 103 | 104 | for row in rows { 105 | let mut row_in_pair: Vec<(&str, &str)> = Vec::new(); 106 | for i in 0..attrs.len() { 107 | row_in_pair.push((&attrs[i], &row[i])); 108 | } 109 | table 110 | .insert_row(row_in_pair) 111 | .map_err(|e| SQLError::SemanticError(format!("{}", e)))?; 112 | } 113 | 114 | Ok(()) 115 | } 116 | 117 | /// Handle the `select` query 118 | /// 119 | /// Syntax: 120 | /// 121 | /// ```sql 122 | /// (8) SELECT (9) DISTINCT (11) TOP 123 | /// (1) FROM 124 | /// (3) JOIN 125 | /// (2) ON 126 | /// (4) WHERE 127 | /// (5) GROUP BY 128 | /// (6) WITH {CUBE | ROLLUP} 129 | /// (7) HAVING 130 | /// (10) ORDER BY 131 | /// ``` 132 | /// 133 | /// Process: 134 | /// 0. Semantic check: tables exists, fields exists, predicate is valid. 135 | /// 1. `FROM`: If there is no where clause or join-on clause join, the virtual table 136 | /// `VT3` is that table, and go step 4. Else, it is a join. A Cartesian product 137 | /// (cross join) is performed between each two tables, and as a result: 138 | /// - 1-1. If the number of tables between `FROM` and `JOIN` are more than one, 139 | /// it is a where-clause inner join. Cross join tables and generate virtual 140 | /// table `VT3`. Go step 4. 141 | /// - 1-2. Otherwise, there should be only a table between `FROM` and `JOIN`, and 142 | /// there must be `JOIN ON` clause(s). If the first `JOIN ON`, cross join 143 | /// `FROM` and `JOIN` to make `VT1`, else cross join `VT1` and the next 144 | /// `JOIN`. Go step 2. 145 | /// 2. `ON`: The `ON` filter is applied to `VT1`. Only rows for which the 146 | /// `` is `TRUE` are inserted to `VT2`. 147 | /// 3. `OUTER` (join): If an `OUTER JOIN` is specified (as opposed to 148 | /// an `INNER JOIN`), rows from the preserved table or tables for 149 | /// which a match was not found are added to the rows from `VT2` as outer 150 | /// rows, generating `VT3`. If more than two tables appear in the `FROM` 151 | /// clause, steps 1 through 3 are applied repeatedly between the result 152 | /// of the last join and the next table in the `FROM` clause until all 153 | /// tables are processed. 154 | /// 4. `WHERE`: The `WHERE` filter is applied to `VT3`. Only rows for which 155 | /// the `` is `TRUE` are inserted to `VT4`. 156 | /// 5. `GROUP BY`: The rows from `VT4` are arranged in groups based on the 157 | /// column list specified in the `GROUP BY` clause. `VT5` is generated. 158 | /// 6. `CUBE | ROLLUP`: Supergroups (groups of groups) are added to the 159 | /// rows from `VT5`, generating `VT6`. 160 | /// 7. `HAVING`: The `HAVING` filter is applied to `VT6`. Only groups for which 161 | /// the `` is `TRUE` are inserted to `VT7`. 162 | /// 8. `SELECT`: The `SELECT` list is processed, generating `VT8`. 163 | /// 9. `DISTINCT`: Duplicate rows are removed from `VT8`. `VT9` is generated. 164 | /// 10. `ORDER BY`: The rows from `VT9` are sorted according to the column list 165 | /// specified in the ORDER BY clause. A cursor is generated (`VC10`). 166 | /// 11. `TOP`: The specified number or percentage of rows is selected from 167 | /// the beginning of `VC10`. Table `VT11` is generated and returned to the 168 | /// caller. 169 | /// 170 | /// reference: [stack overflow #1018822](https://stackoverflow.com/a/1944492/6798649) 171 | pub fn select(&mut self) -> Result<(), SQLError> { 172 | let mut is_where_clause = false; 173 | let mut is_join_on_clause = false; 174 | 175 | // TODO: step 0 176 | 177 | // step 1 178 | 179 | // copy the first table 180 | let mut vt1 = self 181 | .database 182 | .tables 183 | .get(&self.querydata.tables[0]) 184 | .ok_or(SQLError::SemanticError("table not exists".to_string()))? 185 | .clone(); 186 | 187 | // dealing cross joins 188 | if self.querydata.tables.len() > 1 { 189 | is_where_clause = true; 190 | } 191 | if self.querydata.joins.len() > 0 { 192 | is_join_on_clause = true; 193 | } 194 | 195 | let mut vt3 = Table::new(""); // stand by 196 | 197 | match (is_where_clause, is_join_on_clause) { 198 | (true, false) => { 199 | // TODO: step 1.1 200 | } 201 | (false, true) => { 202 | // TODO: step 1.2 203 | } 204 | (true, true) => { 205 | return Err(SQLError::SemanticError(String::from( 206 | "where and join on clause cannot be together", 207 | ))); 208 | } 209 | (false, false) => { 210 | // No join. The virtual table is the table. 211 | vt1.load_all_rows_data(&self.user.name, &self.database.name) 212 | .map_err(|e| SQLError::SemanticError(format!("{}", e)))?; 213 | vt3 = vt1; 214 | } 215 | } 216 | 217 | // step 4 218 | let mut vt4; 219 | if self.querydata.predicate.is_some() { 220 | table_predicate(&mut vt3, &mut self.querydata.predicate)?; 221 | let set = match self.querydata.predicate.as_ref() { 222 | Some(s) => s.set.clone(), 223 | None => HashSet::new(), // should not happen, but still set empty if ever happen 224 | }; 225 | vt3.set_row_set(set); 226 | } 227 | vt4 = vt3; 228 | 229 | // step 8 230 | let data = vt4 231 | .select(self.querydata.fields.clone()) 232 | .map_err(|e| SQLError::SemanticError(format!("{}", e)))?; 233 | 234 | self.result_json = serde_json::to_string(&data).unwrap(); 235 | Ok(()) 236 | } 237 | } 238 | 239 | fn table_predicate(tb: &mut Table, node: &mut NodePtr) -> Result<(), SQLError> { 240 | match node.as_mut() { 241 | Some(p) => { 242 | // due to the mechanism of borrowing, set the mutable variable first before call them. 243 | let left; 244 | let right; 245 | let mut left_node_root = "".to_string(); 246 | let mut left_node_set = HashSet::new(); 247 | let mut right_node_root = "".to_string(); 248 | let mut right_node_set = HashSet::new(); 249 | let mut ll = false; 250 | let mut lr = false; 251 | let mut rl = false; 252 | let mut rr = false; 253 | let this_node_root: &str = &p.root; 254 | 255 | // post-order traversal 256 | table_predicate(tb, &mut p.left)?; 257 | table_predicate(tb, &mut p.right)?; 258 | 259 | match p.left.as_mut() { 260 | Some(s) => { 261 | left = true; 262 | left_node_root = s.root.to_string(); 263 | left_node_set = s.set.clone(); 264 | ll = s.left.is_some(); 265 | lr = s.right.is_some(); 266 | } 267 | None => left = false, 268 | } 269 | 270 | match p.right.as_mut() { 271 | Some(s) => { 272 | right = true; 273 | right_node_root = s.root.to_string(); 274 | right_node_set = s.set.clone(); 275 | rl = s.left.is_some(); 276 | rr = s.right.is_some(); 277 | } 278 | None => right = false, 279 | } 280 | 281 | debug!("current node: {}", this_node_root); 282 | debug!("left node: {}", left_node_root); 283 | debug!("right node: {}", right_node_root); 284 | debug!("grandchildren nodes: {:?}", (ll, lr, rl, rr)); 285 | 286 | if left && right { 287 | match (ll, lr, rl, rr) { 288 | (false, false, false, false) => match this_node_root { 289 | "and" => { 290 | let set: HashSet = left_node_set.intersection(&right_node_set).cloned().collect(); 291 | (*p).set = set; 292 | } 293 | "or" => { 294 | let set: HashSet = left_node_set.union(&right_node_set).cloned().collect(); 295 | (*p).set = set; 296 | } 297 | _ => { 298 | (*p).set = tb 299 | .operator_filter_rows(&left_node_root, this_node_root, &right_node_root) 300 | .map_err(|e| SQLError::SemanticError(format!("{}", e)))?; 301 | 302 | (*p).left = None; 303 | (*p).right = None; 304 | } 305 | }, 306 | (_, _, _, _) => {} 307 | }; 308 | } 309 | 310 | if right { 311 | match this_node_root { 312 | "not" => { 313 | let all = tb.get_all_rows_set(); 314 | let set: HashSet = all.difference(&right_node_set).cloned().collect(); 315 | (*p).set = set; 316 | // cut the tree 317 | (*p).left = None; 318 | (*p).right = None; 319 | } 320 | _ => {} 321 | } 322 | } 323 | 324 | debug!("this node set: {:?}", (*p).set); 325 | } 326 | None => {} 327 | } 328 | Ok(()) 329 | } 330 | 331 | #[cfg(test)] 332 | mod tests { 333 | use super::*; 334 | use crate::sql::parser::*; 335 | use env_logger; 336 | 337 | fn fake_sql() -> SQL { 338 | let mut sql = SQL::new("Tiger").unwrap(); 339 | sql.create_database("db11").unwrap(); 340 | 341 | let query = "create table t1 (a1 int, a2 char(7), a3 double);"; 342 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 343 | 344 | let query = "insert into t1(a1, a2, a3) values 345 | (1, 'aaa', 2.1), 346 | (2, 'aaa', 2.2), 347 | (3, 'bbb', 2.3), 348 | (4, 'bbb', 2.4), 349 | (5, 'bbb', 2.5);"; 350 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 351 | 352 | sql 353 | } 354 | 355 | #[test] 356 | fn test_select_where_and() { 357 | let mut sql = fake_sql(); 358 | 359 | let query = "select a1, a2, a3 from t1 where a1 > 2 and a3 < 2.5;"; 360 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 361 | 362 | assert_eq!( 363 | sql.result_json, 364 | "{\"fields\":[\"a1\",\"a2\",\"a3\"],\"rows\":[[\"3\",\"bbb\",\"2.3\"],[\"4\",\"bbb\",\"2.4\"]]}" 365 | .to_string() 366 | ); 367 | } 368 | 369 | #[test] 370 | fn test_select_where_or() { 371 | let mut sql = fake_sql(); 372 | 373 | let query = "select a1, a2, a3 from t1 where a1 < 2 or a3 > 2.4;"; 374 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 375 | 376 | assert_eq!( 377 | sql.result_json, 378 | "{\"fields\":[\"a1\",\"a2\",\"a3\"],\"rows\":[[\"1\",\"aaa\",\"2.1\"],[\"5\",\"bbb\",\"2.5\"]]}" 379 | .to_string() 380 | ); 381 | } 382 | 383 | #[test] 384 | fn test_select_where_not() { 385 | let mut sql = fake_sql(); 386 | 387 | let query = "select a1, a2, a3 from t1 where not a1 < 2;"; 388 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 389 | 390 | assert_eq!( 391 | sql.result_json, 392 | "{\"fields\":[\"a1\",\"a2\",\"a3\"],\"rows\":[[\"2\",\"aaa\",\"2.2\"],[\"3\",\"bbb\",\"2.3\"],[\"4\",\"bbb\",\"2.4\"],[\"5\",\"bbb\",\"2.5\"]]}" 393 | .to_string() 394 | ); 395 | } 396 | 397 | #[test] 398 | fn test_select_where_not_with_and() { 399 | let mut sql = fake_sql(); 400 | 401 | let query = "select a1, a2, a3 from t1 where not a1 < 2 and not a2 = 'aaa';"; 402 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 403 | 404 | assert_eq!( 405 | sql.result_json, 406 | "{\"fields\":[\"a1\",\"a2\",\"a3\"],\"rows\":[[\"3\",\"bbb\",\"2.3\"],[\"4\",\"bbb\",\"2.4\"],[\"5\",\"bbb\",\"2.5\"]]}" 407 | .to_string() 408 | ); 409 | } 410 | 411 | #[test] 412 | fn test_select_where_complicated_predicate() { 413 | let mut sql = fake_sql(); 414 | 415 | let query = "select a1, a2, a3 from t1 where not (not a1 < 2 and not (not a2 = 'aaa' or a3 > 2.3));"; 416 | Parser::new(query).unwrap().parse(&mut sql).unwrap(); 417 | 418 | assert_eq!( 419 | sql.result_json, 420 | "{\"fields\":[\"a1\",\"a2\",\"a3\"],\"rows\":[[\"1\",\"aaa\",\"2.1\"],[\"3\",\"bbb\",\"2.3\"],[\"4\",\"bbb\",\"2.4\"],[\"5\",\"bbb\",\"2.5\"]]}" 421 | .to_string() 422 | ); 423 | } 424 | } 425 | -------------------------------------------------------------------------------- /src/storage/diskinterface.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::component::field::Field; 3 | use crate::component::table::Row; 4 | use crate::component::table::Table; 5 | use crate::storage::bytescoder; 6 | use crate::storage::file::File; 7 | use crate::storage::index::Index; 8 | use std::collections::HashMap; 9 | use std::fmt; 10 | use std::fs; 11 | use std::io; 12 | use std::path::Path; 13 | 14 | #[derive(Debug, Clone)] 15 | pub struct DiskInterface { 16 | /* definition */ 17 | // Ideally, DiskInterface is a stateless struct 18 | } 19 | 20 | // structure of `usernames.json` 21 | 22 | #[derive(Debug, Serialize, Deserialize)] 23 | pub struct UsernamesJson { 24 | pub usernames: Vec, 25 | } 26 | 27 | #[derive(Debug, Serialize, Deserialize)] 28 | pub struct UsernameInfo { 29 | pub name: String, 30 | pub path: String, 31 | } 32 | 33 | // structure of `dbs.json` 34 | 35 | #[derive(Debug, Serialize, Deserialize)] 36 | pub struct DbsJson { 37 | pub dbs: Vec, 38 | } 39 | 40 | #[derive(Debug, Serialize, Deserialize)] 41 | pub struct DbInfo { 42 | pub name: String, 43 | pub path: String, 44 | } 45 | 46 | // structure of `tables.json` 47 | 48 | #[derive(Debug, Serialize, Deserialize)] 49 | pub struct TablesJson { 50 | pub tables: Vec, 51 | } 52 | 53 | #[derive(Debug, Serialize, Deserialize)] 54 | pub struct TableMeta { 55 | pub name: String, 56 | pub username: String, 57 | pub db_name: String, 58 | pub path_tsv: String, 59 | pub path_bin: String, 60 | pub primary_key: Vec, 61 | pub foreign_key: Vec, 62 | pub reference_table: Option, 63 | pub reference_attr: Option, 64 | pub row_length: u32, 65 | pub attrs: HashMap, 66 | pub attrs_order: Vec, 67 | pub attr_offset_ranges: Vec>, 68 | } 69 | 70 | #[derive(Debug, PartialEq, Clone)] 71 | pub enum DiskError { 72 | Io, 73 | BaseDirExists, 74 | BaseDirNotExists, 75 | UsernamesJsonNotExists, 76 | UsernameExists, 77 | UsernameNotExists, 78 | UsernameDirNotExists, 79 | DbsJsonNotExists, 80 | DbExists, 81 | DbNotExists, 82 | DbDirNotExists, 83 | TablesJsonNotExists, 84 | TableExists, 85 | TableNotExists, 86 | TableBinNotExists, 87 | TableTsvNotExists, 88 | TableIdxFileNotExists, 89 | JsonParse, 90 | RangeContainsDeletedRecord, 91 | RangeExceedLatestRecord, 92 | RangeAndNumRowsMismatch, 93 | AttrNotExists, 94 | BytesError, 95 | DuplicatedKey, 96 | IndexKeyNotFound, 97 | } 98 | 99 | impl From for DiskError { 100 | fn from(_err: io::Error) -> DiskError { 101 | DiskError::Io 102 | } 103 | } 104 | 105 | impl From for DiskError { 106 | fn from(_err: serde_json::Error) -> DiskError { 107 | DiskError::JsonParse 108 | } 109 | } 110 | 111 | impl From for DiskError { 112 | fn from(_err: bytescoder::BytesCoderError) -> DiskError { 113 | DiskError::BytesError 114 | } 115 | } 116 | 117 | impl fmt::Display for DiskError { 118 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 119 | match *self { 120 | DiskError::Io => write!(f, "No such file or directory."), 121 | DiskError::BaseDirExists => write!(f, "Base dir already exists and cannot be created again."), 122 | DiskError::BaseDirNotExists => write!(f, "Base data directory not exists. All data lost."), 123 | DiskError::UsernamesJsonNotExists => write!(f, "The file `usernames.json` is lost"), 124 | DiskError::UsernameExists => write!(f, "User name already exists and cannot be created again."), 125 | DiskError::UsernameNotExists => { 126 | write!(f, "Specified user name not exists. Please create this username first.") 127 | } 128 | DiskError::UsernameDirNotExists => write!(f, "Username exists but corresponding data folder is lost."), 129 | DiskError::DbsJsonNotExists => write!(f, "The `dbs.json` of the username is lost"), 130 | DiskError::DbExists => write!(f, "DB already exists and cannot be created again."), 131 | DiskError::DbNotExists => write!(f, "DB not exists. Please create DB first."), 132 | DiskError::DbDirNotExists => write!(f, "DB exists but correspoding data folder is lost."), 133 | DiskError::TablesJsonNotExists => write!(f, "The `tables.json` of the DB is lost."), 134 | DiskError::TableExists => write!(f, "Table already exists and cannot be created again."), 135 | DiskError::TableNotExists => write!(f, "Table not exists. Please create table first."), 136 | DiskError::TableBinNotExists => write!(f, "Table exists but correspoding bin file is lost."), 137 | DiskError::TableTsvNotExists => write!(f, "Table exists but correspoding tsv file is lost."), 138 | DiskError::TableIdxFileNotExists => write!( 139 | f, 140 | "Index file does not exist. Please build and save it before you can load from it." 141 | ), 142 | DiskError::JsonParse => write!(f, "JSON parsing error."), 143 | DiskError::RangeContainsDeletedRecord => write!(f, "The range of rows to fetch contains deleted records."), 144 | DiskError::RangeExceedLatestRecord => { 145 | write!(f, "The range of rows to fetch exceeds the latest record on the table.") 146 | } 147 | DiskError::RangeAndNumRowsMismatch => { 148 | write!(f, "The range of rows does not match number of rows to be modified.") 149 | } 150 | DiskError::AttrNotExists => write!(f, "The row does not contain specified attribute."), 151 | DiskError::BytesError => write!(f, "Error raised from BytesCoder."), 152 | DiskError::DuplicatedKey => write!(f, "Attempting to insert an duplicated key to index."), 153 | DiskError::IndexKeyNotFound => { 154 | write!(f, "Attempting to access or delete a key which does not exist in index.") 155 | } 156 | } 157 | } 158 | } 159 | 160 | #[allow(dead_code)] 161 | impl DiskInterface { 162 | pub fn create_file_base(file_base_path: Option<&str>) -> Result<(), DiskError> { 163 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 164 | Ok(File::create_file_base(base_path)?) 165 | } 166 | 167 | pub fn create_username(username: &str, file_base_path: Option<&str>) -> Result<(), DiskError> { 168 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 169 | Ok(File::create_username(username, base_path)?) 170 | } 171 | 172 | pub fn get_usernames(file_base_path: Option<&str>) -> Result, DiskError> { 173 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 174 | Ok(File::get_usernames(base_path)?) 175 | } 176 | 177 | pub fn remove_username(username: &str, file_base_path: Option<&str>) -> Result<(), DiskError> { 178 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 179 | Ok(File::remove_username(username, base_path)?) 180 | } 181 | 182 | pub fn create_db(username: &str, db_name: &str, file_base_path: Option<&str>) -> Result<(), DiskError> { 183 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 184 | Ok(File::create_db(username, db_name, base_path)?) 185 | } 186 | 187 | pub fn get_dbs(username: &str, file_base_path: Option<&str>) -> Result, DiskError> { 188 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 189 | Ok(File::get_dbs(username, base_path)?) 190 | } 191 | 192 | pub fn remove_db(username: &str, db_name: &str, file_base_path: Option<&str>) -> Result<(), DiskError> { 193 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 194 | Ok(File::remove_db(username, db_name, base_path)?) 195 | } 196 | 197 | pub fn create_table( 198 | username: &str, 199 | db_name: &str, 200 | table: &Table, 201 | file_base_path: Option<&str>, 202 | ) -> Result<(), DiskError> { 203 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 204 | Ok(File::create_table(username, db_name, table, base_path)?) 205 | } 206 | 207 | pub fn get_tables(username: &str, db_name: &str, file_base_path: Option<&str>) -> Result, DiskError> { 208 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 209 | Ok(File::get_tables(username, db_name, base_path)?) 210 | } 211 | 212 | pub fn load_tables_meta( 213 | username: &str, 214 | db_name: &str, 215 | file_base_path: Option<&str>, 216 | ) -> Result, DiskError> { 217 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 218 | Ok(File::load_tables_meta(username, db_name, base_path)?) 219 | } 220 | 221 | pub fn load_table_meta( 222 | username: &str, 223 | db_name: &str, 224 | table_name: &str, 225 | file_base_path: Option<&str>, 226 | ) -> Result { 227 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 228 | Ok(File::load_table_meta(username, db_name, table_name, base_path)?) 229 | } 230 | 231 | pub fn drop_table( 232 | username: &str, 233 | db_name: &str, 234 | table_name: &str, 235 | file_base_path: Option<&str>, 236 | ) -> Result<(), DiskError> { 237 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 238 | Ok(File::drop_table(username, db_name, table_name, base_path)?) 239 | } 240 | 241 | pub fn append_rows( 242 | username: &str, 243 | db_name: &str, 244 | table_name: &str, 245 | rows: &Vec, 246 | file_base_path: Option<&str>, 247 | ) -> Result<(), DiskError> { 248 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 249 | Ok(File::append_rows(username, db_name, table_name, rows, base_path)?) 250 | } 251 | 252 | pub fn fetch_rows( 253 | username: &str, 254 | db_name: &str, 255 | table_name: &str, 256 | row_range: &Vec, 257 | file_base_path: Option<&str>, 258 | ) -> Result, DiskError> { 259 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 260 | Ok(File::fetch_rows(username, db_name, table_name, row_range, base_path)?) 261 | } 262 | 263 | pub fn delete_rows( 264 | username: &str, 265 | db_name: &str, 266 | table_name: &str, 267 | row_range: &Vec, 268 | file_base_path: Option<&str>, 269 | ) -> Result<(), DiskError> { 270 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 271 | Ok(File::delete_rows(username, db_name, table_name, row_range, base_path)?) 272 | } 273 | 274 | pub fn modify_rows( 275 | username: &str, 276 | db_name: &str, 277 | table_name: &str, 278 | row_range: &Vec, 279 | new_rows: &Vec, 280 | file_base_path: Option<&str>, 281 | ) -> Result<(), DiskError> { 282 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 283 | Ok(File::modify_rows( 284 | username, db_name, table_name, row_range, new_rows, base_path, 285 | )?) 286 | } 287 | 288 | pub fn get_num_rows( 289 | username: &str, 290 | db_name: &str, 291 | table_name: &str, 292 | file_base_path: Option<&str>, 293 | ) -> Result { 294 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 295 | Ok(File::get_num_rows(username, db_name, table_name, base_path)?) 296 | } 297 | 298 | pub fn storage_hierarchy_check( 299 | base_path: &str, 300 | username: Option<&str>, 301 | db_name: Option<&str>, 302 | table_name: Option<&str>, 303 | ) -> Result<(), DiskError> { 304 | // check if base directory exists 305 | if !Path::new(base_path).exists() { 306 | return Err(DiskError::BaseDirNotExists); 307 | } 308 | 309 | // check if `usernames.json` exists 310 | let usernames_json_path = format!("{}/{}", base_path, "usernames.json"); 311 | if !Path::new(&usernames_json_path).exists() { 312 | return Err(DiskError::UsernamesJsonNotExists); 313 | } 314 | 315 | // base level check passed 316 | if username == None { 317 | return Ok(()); 318 | } 319 | 320 | // check if username exists 321 | let usernames_file = fs::File::open(&usernames_json_path)?; 322 | let usernames_json: UsernamesJson = serde_json::from_reader(usernames_file)?; 323 | if !usernames_json 324 | .usernames 325 | .iter() 326 | .map(|username_info| username_info.name.clone()) 327 | .collect::>() 328 | .contains(&username.unwrap().to_string()) 329 | { 330 | return Err(DiskError::UsernameNotExists); 331 | } 332 | 333 | // check if username directory exists 334 | let username_path = format!("{}/{}", base_path, username.unwrap()); 335 | if !Path::new(&username_path).exists() { 336 | return Err(DiskError::UsernameDirNotExists); 337 | } 338 | 339 | // check if `dbs.json` exists 340 | let dbs_json_path = format!("{}/{}", username_path, "dbs.json"); 341 | if !Path::new(&dbs_json_path).exists() { 342 | return Err(DiskError::DbsJsonNotExists); 343 | } 344 | 345 | // username level check passed 346 | if db_name == None { 347 | return Ok(()); 348 | } 349 | 350 | // check if db exists 351 | let dbs_file = fs::File::open(&dbs_json_path)?; 352 | let dbs_json: DbsJson = serde_json::from_reader(dbs_file)?; 353 | if !dbs_json 354 | .dbs 355 | .iter() 356 | .map(|db_info| db_info.name.clone()) 357 | .collect::>() 358 | .contains(&db_name.unwrap().to_string()) 359 | { 360 | return Err(DiskError::DbNotExists); 361 | } 362 | 363 | // check if db directory exists 364 | let db_path = format!("{}/{}", username_path, db_name.unwrap()); 365 | if !Path::new(&db_path).exists() { 366 | return Err(DiskError::DbDirNotExists); 367 | } 368 | 369 | // check if `tables.json` exists 370 | let tables_json_path = format!("{}/{}", db_path, "tables.json"); 371 | if !Path::new(&tables_json_path).exists() { 372 | return Err(DiskError::TablesJsonNotExists); 373 | } 374 | 375 | // db level check passed 376 | if table_name == None { 377 | return Ok(()); 378 | } 379 | 380 | // check if table exists 381 | let tables_file = fs::File::open(&tables_json_path)?; 382 | let tables_json: TablesJson = serde_json::from_reader(tables_file)?; 383 | if !tables_json 384 | .tables 385 | .iter() 386 | .map(|table_meta| table_meta.name.clone()) 387 | .collect::>() 388 | .contains(&table_name.unwrap().to_string()) 389 | { 390 | return Err(DiskError::TableNotExists); 391 | } 392 | 393 | // check if table bin exists 394 | let table_bin_path = format!("{}/{}.bin", db_path, table_name.unwrap()); 395 | if !Path::new(&table_bin_path).exists() { 396 | return Err(DiskError::TableBinNotExists); 397 | } 398 | 399 | if dotenv!("ENABLE_TSV") == "true" { 400 | // check if table tsv exists 401 | let table_tsv_path = format!("{}/{}.tsv", db_path, table_name.unwrap()); 402 | if !Path::new(&table_tsv_path).exists() { 403 | return Err(DiskError::TableTsvNotExists); 404 | } 405 | } 406 | 407 | Ok(()) 408 | } 409 | 410 | pub fn get_datatype_size(datatype: &DataType) -> u32 { 411 | match datatype { 412 | DataType::Char(length) => length.clone() as u32, 413 | DataType::Double => 8, 414 | DataType::Float => 4, 415 | DataType::Int => 4, 416 | DataType::Varchar(length) => length.clone() as u32, 417 | DataType::Url => 256, 418 | } 419 | } 420 | 421 | pub fn build_index_from_table_bin( 422 | username: &str, 423 | db_name: &str, 424 | table_name: &str, 425 | file_base_path: Option<&str>, 426 | ) -> Result { 427 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 428 | let table_meta = DiskInterface::load_table_meta(username, db_name, table_name, Some(base_path))?; 429 | let mut index = Index::new(table_meta)?; 430 | index.build_from_bin(base_path)?; 431 | 432 | Ok(index) 433 | } 434 | 435 | pub fn load_index( 436 | username: &str, 437 | db_name: &str, 438 | table_name: &str, 439 | file_base_path: Option<&str>, 440 | ) -> Result { 441 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 442 | let table_meta = DiskInterface::load_table_meta(username, db_name, table_name, Some(base_path))?; 443 | let mut index = Index::new(table_meta)?; 444 | index.load(base_path)?; 445 | 446 | Ok(index) 447 | } 448 | 449 | pub fn save_index(index: &Index, file_base_path: Option<&str>) -> Result<(), DiskError> { 450 | let base_path = file_base_path.unwrap_or(dotenv!("FILE_BASE_PATH")); 451 | index.save(base_path)?; 452 | 453 | Ok(()) 454 | } 455 | } 456 | 457 | // #[cfg(test)] 458 | // mod tests { 459 | // use super::*; 460 | // } 461 | -------------------------------------------------------------------------------- /src/component/table.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::component::field::Field; 3 | use crate::storage::diskinterface::{DiskError, DiskInterface, TableMeta}; 4 | use regex::Regex; 5 | use std::collections::HashMap; 6 | use std::collections::HashSet; 7 | use std::fmt; 8 | use uuid::Uuid; 9 | 10 | #[derive(Debug, Clone)] 11 | pub struct Table { 12 | /* definition */ 13 | pub name: String, 14 | pub fields: HashMap, // aka attributes 15 | pub primary_key: Vec, 16 | pub foreign_key: Vec, 17 | pub reference_table: Option, 18 | pub reference_attr: Option, 19 | 20 | /* value */ 21 | pub rows: Vec, 22 | 23 | /* storage */ 24 | pub is_data_loaded: bool, // if load the data from storage 25 | pub is_dirty: bool, 26 | pub dirty_cursor: u32, // where is the dirty data beginning 27 | pub is_delete: bool, 28 | 29 | /* virtual table */ 30 | is_predicate_init: bool, // if ever filter rows for predicate 31 | row_set: HashSet, // record rows for predicate 32 | 33 | /* encryption */ 34 | pub public_key: i32, 35 | 36 | uuid: String, 37 | } 38 | 39 | #[derive(Debug, Clone)] 40 | pub struct Row { 41 | pub data: HashMap, 42 | pub is_dirty: bool, 43 | pub is_delete: bool, 44 | uuid: String, 45 | } 46 | 47 | impl Row { 48 | pub fn new() -> Row { 49 | Row { 50 | data: HashMap::new(), 51 | is_dirty: true, 52 | is_delete: false, 53 | uuid: Uuid::new_v4().to_string(), 54 | } 55 | } 56 | } 57 | 58 | #[derive(Debug, Serialize)] 59 | pub struct SelectData { 60 | pub fields: Vec, 61 | pub rows: Vec>, 62 | } 63 | 64 | impl SelectData { 65 | pub fn new() -> SelectData { 66 | SelectData { 67 | fields: vec![], 68 | rows: vec![], 69 | } 70 | } 71 | } 72 | 73 | #[derive(Debug, Clone)] 74 | pub enum TableError { 75 | InsertFieldNotExisted(String), 76 | InsertFieldNotNullMismatched(String), 77 | InsertFieldDefaultMismatched(String), 78 | IllegalValue(String), 79 | SelectFieldNotExisted(String), 80 | CausedByFile(DiskError), 81 | KeyNotExist, 82 | } 83 | 84 | impl fmt::Display for TableError { 85 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 86 | match *self { 87 | TableError::InsertFieldNotExisted(ref attr_name) => { 88 | write!(f, "Insert Error: the table doesn't have `{}` attribute.", attr_name) 89 | } 90 | TableError::InsertFieldNotNullMismatched(ref attr_name) => { 91 | write!(f, "Insert Error: {} could not be null", attr_name) 92 | } 93 | TableError::InsertFieldDefaultMismatched(ref attr_name) => write!( 94 | f, 95 | "Insert Error: {} has no default value. Need to declare the value.", 96 | attr_name 97 | ), 98 | TableError::IllegalValue(ref value) => write!( 99 | f, 100 | "Insert Error: value {} is illegal. Need to check the content or the datatype:", 101 | value 102 | ), 103 | TableError::SelectFieldNotExisted(ref name) => write!(f, "Selected field not exists: {}", name), 104 | TableError::CausedByFile(ref e) => write!(f, "error caused by file: {}", e), 105 | TableError::KeyNotExist => write!(f, "encrypt error: public key is not existed"), 106 | } 107 | } 108 | } 109 | 110 | impl Table { 111 | pub fn new(name: &str) -> Table { 112 | Table { 113 | name: name.to_string(), 114 | fields: HashMap::new(), 115 | rows: vec![], 116 | primary_key: vec![], 117 | foreign_key: vec![], 118 | reference_table: None, 119 | reference_attr: None, 120 | 121 | is_data_loaded: false, 122 | is_dirty: true, 123 | dirty_cursor: 0, 124 | is_delete: false, 125 | 126 | is_predicate_init: false, 127 | row_set: HashSet::new(), 128 | 129 | public_key: 0, 130 | 131 | uuid: Uuid::new_v4().to_string(), 132 | } 133 | } 134 | 135 | /// Load a table with meta data 136 | #[allow(dead_code)] 137 | pub fn load_meta(username: &str, db_name: &str, table_name: &str) -> Result { 138 | let meta = DiskInterface::load_table_meta(username, db_name, table_name, None) 139 | .map_err(|e| TableError::CausedByFile(e))?; 140 | let mut table = Table::new(table_name); 141 | 142 | table.format_meta(meta); 143 | 144 | Ok(table) 145 | } 146 | 147 | /// format metadata into table 148 | pub fn format_meta(&mut self, meta: TableMeta) { 149 | self.fields = meta.attrs; 150 | self.primary_key = meta.primary_key; 151 | self.foreign_key = meta.foreign_key; 152 | self.reference_table = meta.reference_table; 153 | self.reference_attr = meta.reference_attr; 154 | self.is_dirty = false; 155 | } 156 | 157 | /// load the particular range of rows from storage 158 | pub fn load_rows_data(&mut self, username: &str, db_name: &str) -> Result<(), TableError> { 159 | // TODO: read index file, find all row data range, call fetch_rows 160 | //let row_data = DiskInterface::fetch_rows(username, db_name, self.name, , None).unwrap().map_err(|e| TableError::CauseByFile(e))?; 161 | //self.rows = row_data; 162 | self.is_data_loaded = true; 163 | Ok(()) 164 | } 165 | 166 | /// load the all data from storage 167 | pub fn load_all_rows_data(&mut self, username: &str, db_name: &str) -> Result<(), TableError> { 168 | self.is_data_loaded = true; 169 | Ok(()) 170 | } 171 | 172 | pub fn insert_new_field(&mut self, field: Field) { 173 | self.fields.insert(field.name.clone(), field); 174 | } 175 | 176 | /// `insert` row into the table 177 | /// `key` and `value` are `&str`, and will be formated to the right type. 178 | pub fn insert_row(&mut self, row: Vec<(&str, &str)>) -> Result<(), TableError> { 179 | let mut new_row = Row::new(); 180 | 181 | // insert data into row 182 | for (key, value) in row { 183 | match self.fields.get(key) { 184 | Some(field) => { 185 | if field.not_null && value == "null" { 186 | return Err(TableError::InsertFieldNotNullMismatched(field.clone().name)); 187 | } 188 | if !is_value_valid(value, &field.datatype) { 189 | return Err(TableError::IllegalValue(value.to_string())); 190 | } 191 | new_row.data.insert(key.to_string(), value.to_string()); 192 | } 193 | None => return Err(TableError::InsertFieldNotExisted(key.to_string())), 194 | } 195 | } 196 | 197 | // check if the row fits the field 198 | for (key, field) in self.fields.iter() { 199 | match new_row.data.get(key) { 200 | Some(_) => {} 201 | None => { 202 | match field.clone().default { 203 | // if the attribute has default value, then insert with the default value. 204 | Some(value) => new_row.data.insert(key.to_string(), value.to_string()), 205 | None => return Err(TableError::InsertFieldDefaultMismatched(key.to_string())), 206 | }; 207 | } 208 | }; 209 | } 210 | 211 | for (key, field) in self.fields.iter() { 212 | if field.encrypt { 213 | if self.public_key == 0 { 214 | // 0 is default key value, which is not a valid key 215 | return Err(TableError::KeyNotExist); 216 | } 217 | let value = new_row.data.get_mut(key).unwrap(); 218 | // TODO: encrypt value with self.public_key 219 | } 220 | } 221 | self.rows.push(new_row); 222 | 223 | Ok(()) 224 | } 225 | 226 | /// return the set of all rows' id of the table 227 | pub fn get_all_rows_set(&self) -> HashSet { 228 | let mut set: HashSet = HashSet::new(); 229 | for i in 0..self.rows.len() { 230 | set.insert(i); 231 | } 232 | set 233 | } 234 | 235 | /// filter rows by the predicate and update the row_set 236 | /// 237 | /// Note: this assume all data of rows and the predicate follow the rules, so there is no check for 238 | /// data type and field name. 239 | pub fn operator_filter_rows( 240 | &mut self, 241 | field_name: &str, 242 | operator: &str, 243 | value: &str, 244 | ) -> Result, TableError> { 245 | let data_type = self.fields.get(field_name).unwrap().datatype.clone(); 246 | let mut set = HashSet::new(); 247 | 248 | // if the first time, the predicate range is the range of all rows. 249 | if !self.is_predicate_init { 250 | for i in 0..self.rows.len() { 251 | self.row_set.insert(i); 252 | } 253 | // TODO: analyse when to set true. 254 | // currently always false, so it will get all rows every times.HashSet 255 | // We need to figure out when to let it to be true, as when there is `OR` then 256 | // it should keep false. 257 | 258 | // self.is_predicate_init = true; 259 | } 260 | 261 | for i in self.row_set.iter() { 262 | let row = &self.rows[*i]; 263 | if match data_type { 264 | DataType::Int => { 265 | let data = row.data.get(field_name).unwrap().parse::().unwrap(); 266 | let value = value.parse::().unwrap(); 267 | cmp(data, operator, value) 268 | } 269 | DataType::Float => { 270 | let data = row.data.get(field_name).unwrap().parse::().unwrap(); 271 | let value = value.parse::().unwrap(); 272 | cmp(data, operator, value) 273 | } 274 | DataType::Double => { 275 | let data = row.data.get(field_name).unwrap().parse::().unwrap(); 276 | let value = value.parse::().unwrap(); 277 | cmp(data, operator, value) 278 | } 279 | DataType::Char(_) => { 280 | let data = row.data.get(field_name).unwrap().clone(); 281 | cmp(data, operator, value.to_string()) 282 | } 283 | DataType::Varchar(_) => { 284 | let data = row.data.get(field_name).unwrap().clone(); 285 | cmp(data, operator, value.to_string()) 286 | } 287 | DataType::Url => { 288 | let data = row.data.get(field_name).unwrap().clone(); 289 | cmp(data, operator, value.to_string()) 290 | } 291 | } { 292 | set.insert(*i); 293 | } 294 | } 295 | 296 | self.row_set = set; 297 | Ok(self.row_set.clone()) 298 | } 299 | 300 | /// set the new row set 301 | pub fn set_row_set(&mut self, set: HashSet) { 302 | self.row_set = set; 303 | self.is_predicate_init = true; 304 | } 305 | 306 | /// select fields from rows in row_set of the table 307 | pub fn select(&mut self, field_names: Vec) -> Result { 308 | let mut data = SelectData::new(); 309 | for name in &field_names { 310 | data.fields.push(name.to_string()); 311 | } 312 | // if no predicate, select all data 313 | if !self.is_predicate_init { 314 | for i in 0..self.rows.len() { 315 | self.row_set.insert(i); 316 | } 317 | self.is_predicate_init = true; 318 | } 319 | // only which is in row_set will be picked 320 | for i in &self.row_set { 321 | let row = &self.rows[*i]; 322 | let mut r = vec![]; 323 | for name in &field_names { 324 | r.push( 325 | row.data 326 | .get::(name) 327 | .ok_or(TableError::SelectFieldNotExisted(name.to_string()))? 328 | .clone(), 329 | ); 330 | } 331 | data.rows.push(r); 332 | } 333 | data.rows.sort(); 334 | Ok(data) 335 | } 336 | } 337 | 338 | #[inline] 339 | fn cmp(left: T, operator: &str, right: T) -> bool { 340 | match operator { 341 | "=" => left == right, 342 | ">" => left > right, 343 | ">=" => left >= right, 344 | "<" => left < right, 345 | "<=" => left <= right, 346 | "!=" => left != right, 347 | "<>" => left != right, 348 | _ => false, // never happen 349 | } 350 | } 351 | 352 | fn is_value_valid(value: &str, datatype: &DataType) -> bool { 353 | match datatype { 354 | DataType::Url => { 355 | let re = Regex::new(r"://[\w\-\.]+(:\d+)?(/[~\w/\.]*)?(\?\S*)?(#\S*)?").unwrap(); // url 356 | let re_2 = 357 | Regex::new(r"^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$") 358 | .unwrap(); //IPv4 359 | re.is_match(value) || re_2.is_match(value) 360 | } 361 | _ => true, // TODO: check other datatype 362 | } 363 | } 364 | 365 | #[cfg(test)] 366 | mod tests { 367 | use super::*; 368 | use crate::component::datatype::DataType; 369 | use crate::component::field; 370 | 371 | #[test] 372 | fn test_insert_row() { 373 | let mut table = Table::new("table_1"); 374 | table.fields.insert( 375 | "attr_1".to_string(), 376 | Field::new_all( 377 | "attr_1", 378 | DataType::Int, 379 | true, // not_null is true 380 | Some("123".to_string()), // default is 123 381 | field::Checker::None, 382 | false, 383 | ), 384 | ); 385 | table.fields.insert( 386 | "attr_2".to_string(), 387 | Field::new_all( 388 | "attr_2", 389 | DataType::Int, 390 | true, // not_null is true 391 | None, // no default 392 | field::Checker::None, 393 | false, 394 | ), 395 | ); 396 | table.fields.insert( 397 | "attr_3".to_string(), 398 | Field::new_all( 399 | "attr_3", 400 | DataType::Int, 401 | false, // not null is false 402 | None, // no default 403 | field::Checker::None, 404 | false, 405 | ), 406 | ); 407 | 408 | println!("correct data"); 409 | let data = vec![("attr_1", "123"), ("attr_2", "123"), ("attr_3", "123")]; 410 | assert!(table.insert_row(data).is_ok()); 411 | 412 | println!("`attr_2` is null while its not_null is true"); 413 | let data = vec![("attr_1", "123"), ("attr_2", "null"), ("attr_3", "123")]; 414 | assert!(table.insert_row(data).is_err()); 415 | 416 | println!("`attr_3` is null while its not_null is false"); 417 | let data = vec![("attr_1", "123"), ("attr_2", "123"), ("attr_3", "null")]; 418 | assert!(table.insert_row(data).is_ok()); 419 | 420 | println!("none given value `attr_2` while its default is None"); 421 | let data = vec![("attr_1", "123"), ("attr_3", "123")]; 422 | assert!(table.insert_row(data).is_err()); 423 | 424 | println!("none given value `attr_1` while it has default"); 425 | let data = vec![("attr_2", "123"), ("attr_3", "123")]; 426 | assert!(table.insert_row(data).is_ok()); 427 | 428 | println!("fields mismatched"); 429 | let data = vec![ 430 | ("attr_1", "123"), 431 | ("attr_2", "123"), 432 | ("attr_3", "123"), 433 | ("attr_4", "123"), 434 | ]; 435 | assert!(table.insert_row(data).is_err()); 436 | let data = vec![("attr_1", "123")]; 437 | assert!(table.insert_row(data).is_err()); 438 | } 439 | 440 | #[test] 441 | #[rustfmt::skip] 442 | fn test_operator_filter_rows() { 443 | let mut table = Table::new("table_1"); 444 | table.fields.insert("a1".to_string(), Field::new("attr_1", DataType::Int)); 445 | table.fields.insert("a2".to_string(), Field::new("attr_1", DataType::Char(20))); 446 | let data = vec![("a1", "1"), ("a2", "aaa")]; 447 | let _ = table.insert_row(data).unwrap(); 448 | let data = vec![("a1", "2"), ("a2", "bbb")]; 449 | let _ = table.insert_row(data).unwrap(); 450 | let data = vec![("a1", "3"), ("a2", "aaa")]; 451 | let _ = table.insert_row(data).unwrap(); 452 | let data = vec![("a1", "4"), ("a2", "bbb")]; 453 | let _ = table.insert_row(data).unwrap(); 454 | 455 | let set = table.operator_filter_rows("a1", ">", "2").unwrap(); 456 | table.set_row_set(set); 457 | let select_data = table.select(vec!["a1".to_string(), "a2".to_string()]).unwrap(); 458 | assert_eq!(select_data.rows, vec![["3", "aaa"], ["4", "bbb"]]); 459 | 460 | let set = table.operator_filter_rows("a2", "=", "bbb").unwrap(); 461 | table.set_row_set(set); 462 | let select_data = table.select(vec!["a1".to_string(), "a2".to_string()]).unwrap(); 463 | assert_eq!(select_data.rows, vec![vec!["4", "bbb"]]); 464 | } 465 | 466 | #[test] 467 | fn test_url() { 468 | let mut table = Table::new("table_1"); 469 | table.fields.insert( 470 | "attr_1".to_string(), 471 | Field::new_all("attr_1", DataType::Url, true, None, field::Checker::None, false), 472 | ); 473 | println!("correct data"); 474 | let data = vec![("attr_1", "https://github.com/stellarsql/stellarsql")]; 475 | assert!(table.insert_row(data).is_ok()); 476 | let data = vec![( 477 | "attr_1", 478 | "https://zh.wikipedia.org/wiki/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F", 479 | )]; 480 | assert!(table.insert_row(data).is_ok()); 481 | let data = vec![("attr_1", "127.0.0.1")]; 482 | assert!(table.insert_row(data).is_ok()); 483 | let data = vec![("attr_1", "ftp://abc:1234@192.168.0.1")]; 484 | assert!(table.insert_row(data).is_ok()); 485 | 486 | println!("illegal url"); 487 | let data = vec![("attr_1", "https:github.comstellarsqlStellarSQL")]; 488 | assert!(table.insert_row(data).is_err()); 489 | } 490 | } 491 | -------------------------------------------------------------------------------- /src/sql/lexer.rs: -------------------------------------------------------------------------------- 1 | use crate::sql::symbol; 2 | use std::fmt; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct Scanner { 6 | message: String, 7 | tokens: Vec, 8 | pos: Pos, 9 | } 10 | 11 | #[derive(Debug, Clone)] 12 | struct Pos { 13 | cursor_l: usize, 14 | cursor_r: usize, 15 | } 16 | 17 | #[derive(Debug)] 18 | pub enum LexerError { 19 | NotAllowedChar, 20 | QuoteError, 21 | } 22 | 23 | impl fmt::Display for LexerError { 24 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 25 | match *self { 26 | LexerError::NotAllowedChar => write!(f, "please use ascii character."), 27 | LexerError::QuoteError => write!(f, "please check the quotes"), 28 | } 29 | } 30 | } 31 | 32 | impl Scanner { 33 | pub fn new(message: &str) -> Scanner { 34 | Scanner { 35 | message: message.to_lowercase().trim().to_string(), 36 | tokens: vec![], 37 | pos: Pos { 38 | cursor_l: 0, 39 | cursor_r: 0, 40 | }, 41 | } 42 | } 43 | pub fn scan_tokens(&mut self) -> Result, LexerError> { 44 | debug!("Starting scanning message:\n`{}`", self.message); 45 | let mut chars = self.message.chars(); 46 | let mut is_quoted = false; 47 | let mut quote = '\0'; 48 | 49 | loop { 50 | match chars.next() { 51 | Some(x) => { 52 | // first meet " or ' 53 | if !is_quoted && (x == '"' || x == '\'') { 54 | quote = x.clone(); 55 | } 56 | if x == quote || is_quoted { 57 | self.pos.cursor_r += 1; 58 | if !is_quoted { 59 | is_quoted = true; 60 | } else if x == quote { 61 | let word = self.message.get(self.pos.cursor_l + 1..self.pos.cursor_r - 1).unwrap(); // delete quotes 62 | self.tokens 63 | .push(symbol::sym(word, symbol::Token::Identifier, symbol::Group::Identifier)); 64 | is_quoted = false; 65 | self.pos.cursor_l = self.pos.cursor_r; 66 | quote = '\0'; 67 | } 68 | } else if is_identifier_char(x) || is_operator(x) { 69 | self.pos.cursor_r += 1; 70 | } else { 71 | match x { 72 | ' ' | '\t' | '\r' | '\n' | '(' | ')' | ',' | ';' => { 73 | if self.pos.cursor_l != self.pos.cursor_r { 74 | let word = self.message.get(self.pos.cursor_l..self.pos.cursor_r).unwrap(); 75 | debug!("encounter `{}`, last word is `{}`", x, word); 76 | 77 | let mut is_multi_keyword = false; 78 | 79 | // if this char is delimiter, it must not be a multikeyword 80 | if !is_delimiter(x) { 81 | // if this is possible a multikeyword, search the following chars 82 | match symbol::check_multi_keywords_front(word) { 83 | // parts for how many parts in this possible keyword 84 | Some(parts) => { 85 | debug!("The word `{}` might be a multikeyword", word); 86 | 87 | for keyword_total_parts in parts { 88 | debug!("Assume this keyword has {} parts", keyword_total_parts); 89 | 90 | // copy remaining chars for testing 91 | let mut test_chars = chars.as_str().chars(); 92 | // for testing if the string a multikeyword. Insert the first word 93 | // and a space already. (because start scanning from next word) 94 | let mut test_str = String::from(format!("{} ", word)); 95 | 96 | // for checking a new word 97 | let mut is_last_letter = false; 98 | 99 | // record the right cursor position when checking if multikeyword 100 | // if match a multikeyword, shift right cursor with steps 101 | let mut step_counter = 0; 102 | 103 | // How many words added in the test_str 104 | // if the keyword is 3 parts, the following_parts should be 2 105 | let mut following_parts = 0; 106 | 107 | loop { 108 | match test_chars.next() { 109 | Some(y) => { 110 | // A multikeyword should be all ASCII alphabetic character 111 | if y.is_ascii_alphabetic() { 112 | if !is_last_letter { 113 | is_last_letter = true; 114 | } 115 | test_str.push(y); 116 | } else { 117 | match y { 118 | ' ' | '\t' | '\r' | '\n' => { 119 | if is_last_letter { 120 | // from letter to space, count one 121 | following_parts += 1; 122 | // find enough parts, break earlier 123 | if following_parts 124 | == keyword_total_parts - 1 125 | { 126 | break; // loop 127 | } 128 | // add ` ` between words 129 | test_str.push(' '); 130 | is_last_letter = false 131 | } 132 | } 133 | // &, %, *, @, etc. 134 | // keywords must be letters 135 | _ => break, // loop 136 | } 137 | } 138 | } 139 | None => break, // loop 140 | } 141 | step_counter += 1; 142 | } 143 | 144 | debug!("Checking `{}` ...", test_str); 145 | match symbol::SYMBOLS.get(test_str.as_str()) { 146 | // a multikeyword 147 | Some(token) => { 148 | debug!("Found keyword `{}`", test_str); 149 | self.tokens.push(token.clone()); 150 | 151 | // shift the right cursor to the right of multikeyword 152 | self.pos.cursor_r += step_counter; 153 | // skip the chars included in this multikeyword 154 | for _ in 0..step_counter { 155 | chars.next(); 156 | } 157 | 158 | is_multi_keyword = true; 159 | break; // parts 160 | } 161 | None => debug!("`{}` not a keyword", test_str), 162 | } 163 | } 164 | } 165 | None => {} 166 | } 167 | } 168 | 169 | // a single word 170 | if !is_multi_keyword { 171 | match symbol::SYMBOLS.get(word) { 172 | // either keyword 173 | Some(token) => { 174 | self.tokens.push(token.clone()); 175 | } 176 | // or identifier 177 | None => { 178 | self.tokens.push(symbol::sym( 179 | word, 180 | symbol::Token::Identifier, 181 | symbol::Group::Identifier, 182 | )); 183 | } 184 | } 185 | } 186 | } 187 | if is_delimiter(x) { 188 | debug!("take `{}`", x); 189 | self.tokens.push(symbol::Symbol::match_delimiter(x).unwrap()); 190 | } 191 | // set the cursor next to `x` in the right 192 | self.pos.cursor_r += 1; 193 | self.pos.cursor_l = self.pos.cursor_r; 194 | } 195 | // A special case 196 | '*' => { 197 | self.tokens.push(symbol::sym( 198 | "*", 199 | symbol::Token::Identifier, 200 | symbol::Group::Identifier, 201 | )); 202 | self.pos.cursor_r += 1; 203 | self.pos.cursor_l = self.pos.cursor_r; 204 | } 205 | _ => { 206 | return Err(LexerError::NotAllowedChar); 207 | } 208 | } 209 | } 210 | } 211 | // iter to the end 212 | None => { 213 | if is_quoted { 214 | // if find no second quote 215 | return Err(LexerError::QuoteError); 216 | } 217 | break; 218 | } 219 | }; 220 | } 221 | Ok(self.tokens.clone()) 222 | } 223 | } 224 | 225 | fn is_identifier_char(ch: char) -> bool { 226 | ch.is_digit(10) || ch.is_ascii_alphabetic() || ch == '\'' || ch == '.' || ch == '"' 227 | } 228 | 229 | fn is_operator(ch: char) -> bool { 230 | ch == '>' || ch == '=' || ch == '<' || ch == '-' || ch == '+' 231 | } 232 | 233 | fn is_delimiter(ch: char) -> bool { 234 | ch == '(' || ch == ')' || ch == ',' || ch == ';' 235 | } 236 | 237 | #[cfg(test)] 238 | mod tests { 239 | use super::*; 240 | use env_logger; 241 | 242 | #[test] 243 | pub fn test_quote() { 244 | let message = "'123://'"; 245 | let mut s = Scanner::new(message); 246 | let tokens = s.scan_tokens().unwrap(); 247 | let mut iter = (&tokens).iter(); 248 | let x = iter.next().unwrap(); 249 | println!("test{:?}", x.name); 250 | assert_eq!( 251 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 252 | "\"123://\", Identifier, Identifier" 253 | ); 254 | 255 | let message = "'qqq\"' ,123"; 256 | let mut s = Scanner::new(message); 257 | let tokens = s.scan_tokens().unwrap(); 258 | let mut iter = (&tokens).iter(); 259 | let x = iter.next().unwrap(); 260 | println!("test{:?}", x.name); 261 | assert_eq!( 262 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 263 | "\"qqq\\\"\", Identifier, Identifier" 264 | ); 265 | 266 | let message = "\"qqq\', 123 "; 267 | let mut s = Scanner::new(message); 268 | match s.scan_tokens() { 269 | Ok(_) => {} 270 | Err(e) => assert_eq!(format!("{}", e), "please check the quotes"), 271 | } 272 | } 273 | 274 | #[test] 275 | pub fn test_scan_tokens() { 276 | let message = "select customername, contactname, address from customers where address is null;"; 277 | let mut s = Scanner::new(message); 278 | let tokens = s.scan_tokens().unwrap(); 279 | let mut iter = (&tokens).iter(); 280 | let x = iter.next().unwrap(); 281 | assert_eq!( 282 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 283 | "\"select\", Select, Keyword" 284 | ); 285 | let x = iter.next().unwrap(); 286 | assert_eq!( 287 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 288 | "\"customername\", Identifier, Identifier" 289 | ); 290 | let x = iter.next().unwrap(); 291 | assert_eq!( 292 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 293 | "\",\", Comma, Delimiter" 294 | ); 295 | let x = iter.next().unwrap(); 296 | assert_eq!( 297 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 298 | "\"contactname\", Identifier, Identifier" 299 | ); 300 | let x = iter.next().unwrap(); 301 | assert_eq!( 302 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 303 | "\",\", Comma, Delimiter" 304 | ); 305 | let x = iter.next().unwrap(); 306 | assert_eq!( 307 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 308 | "\"address\", Identifier, Identifier" 309 | ); 310 | let x = iter.next().unwrap(); 311 | assert_eq!( 312 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 313 | "\"from\", From, Keyword" 314 | ); 315 | let x = iter.next().unwrap(); 316 | assert_eq!( 317 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 318 | "\"customers\", Identifier, Identifier" 319 | ); 320 | let x = iter.next().unwrap(); 321 | assert_eq!( 322 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 323 | "\"where\", Where, Keyword" 324 | ); 325 | let x = iter.next().unwrap(); 326 | assert_eq!( 327 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 328 | "\"address\", Identifier, Identifier" 329 | ); 330 | let x = iter.next().unwrap(); 331 | assert_eq!( 332 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 333 | "\"is null\", IsNull, Keyword" 334 | ); 335 | let x = iter.next().unwrap(); 336 | assert_eq!( 337 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 338 | "\";\", Semicolon, Delimiter" 339 | ); 340 | assert!(iter.next().is_none()); 341 | 342 | let message = "select * from customers;"; 343 | let mut s = Scanner::new(message); 344 | let tokens = s.scan_tokens().unwrap(); 345 | let mut iter = (&tokens).iter(); 346 | let x = iter.next().unwrap(); 347 | assert_eq!( 348 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 349 | "\"select\", Select, Keyword" 350 | ); 351 | let x = iter.next().unwrap(); 352 | assert_eq!( 353 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 354 | "\"*\", Identifier, Identifier" 355 | ); 356 | let x = iter.next().unwrap(); 357 | assert_eq!( 358 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 359 | "\"from\", From, Keyword" 360 | ); 361 | let x = iter.next().unwrap(); 362 | assert_eq!( 363 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 364 | "\"customers\", Identifier, Identifier" 365 | ); 366 | let x = iter.next().unwrap(); 367 | assert_eq!( 368 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 369 | "\";\", Semicolon, Delimiter" 370 | ); 371 | assert!(iter.next().is_none()); 372 | 373 | let message = "insert \n\r\tinto \t\tcustomers \n(customername,\n\n city)\n\n values ('cardinal', 'norway');"; 374 | let mut s = Scanner::new(message); 375 | let tokens = s.scan_tokens().unwrap(); 376 | let mut iter = (&tokens).iter(); 377 | let x = iter.next().unwrap(); 378 | assert_eq!( 379 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 380 | "\"insert into\", InsertInto, Keyword" 381 | ); 382 | let x = iter.next().unwrap(); 383 | assert_eq!( 384 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 385 | "\"customers\", Identifier, Identifier" 386 | ); 387 | let x = iter.next().unwrap(); 388 | assert_eq!( 389 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 390 | "\"(\", ParentLeft, Delimiter" 391 | ); 392 | let x = iter.next().unwrap(); 393 | assert_eq!( 394 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 395 | "\"customername\", Identifier, Identifier" 396 | ); 397 | let x = iter.next().unwrap(); 398 | assert_eq!( 399 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 400 | "\",\", Comma, Delimiter" 401 | ); 402 | let x = iter.next().unwrap(); 403 | assert_eq!( 404 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 405 | "\"city\", Identifier, Identifier" 406 | ); 407 | let x = iter.next().unwrap(); 408 | assert_eq!( 409 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 410 | "\")\", ParentRight, Delimiter" 411 | ); 412 | let x = iter.next().unwrap(); 413 | assert_eq!( 414 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 415 | "\"values\", Values, Keyword" 416 | ); 417 | let x = iter.next().unwrap(); 418 | assert_eq!( 419 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 420 | "\"(\", ParentLeft, Delimiter" 421 | ); 422 | let x = iter.next().unwrap(); 423 | assert_eq!( 424 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 425 | "\"cardinal\", Identifier, Identifier" 426 | ); 427 | let x = iter.next().unwrap(); 428 | assert_eq!( 429 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 430 | "\",\", Comma, Delimiter" 431 | ); 432 | let x = iter.next().unwrap(); 433 | assert_eq!( 434 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 435 | "\"norway\", Identifier, Identifier" 436 | ); 437 | let x = iter.next().unwrap(); 438 | assert_eq!( 439 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 440 | "\")\", ParentRight, Delimiter" 441 | ); 442 | let x = iter.next().unwrap(); 443 | assert_eq!( 444 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 445 | "\";\", Semicolon, Delimiter" 446 | ); 447 | assert!(iter.next().is_none()); 448 | 449 | let message = "create table x1;"; 450 | let mut s = Scanner::new(message); 451 | let tokens = s.scan_tokens().unwrap(); 452 | debug!("{:?}", tokens); 453 | let mut iter = (&tokens).iter(); 454 | let x = iter.next().unwrap(); 455 | assert_eq!( 456 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 457 | "\"create table\", CreateTable, Keyword" 458 | ); 459 | let x = iter.next().unwrap(); 460 | assert_eq!( 461 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 462 | "\"x1\", Identifier, Identifier" 463 | ); 464 | let x = iter.next().unwrap(); 465 | assert_eq!( 466 | format!("{:?}, {:?}, {:?}", x.name, x.token, x.group), 467 | "\";\", Semicolon, Delimiter" 468 | ); 469 | assert!(iter.next().is_none()); 470 | } 471 | 472 | #[test] 473 | fn test_scan_tokens_error() { 474 | let message = "create table $1234"; 475 | let mut s = Scanner::new(message); 476 | match s.scan_tokens() { 477 | Ok(_) => {} 478 | Err(e) => assert_eq!(format!("{}", e), "please use ascii character."), 479 | } 480 | } 481 | } 482 | -------------------------------------------------------------------------------- /src/sql/parser.rs: -------------------------------------------------------------------------------- 1 | use crate::component::datatype::DataType; 2 | use crate::component::field::Field; 3 | use crate::component::table::Table; 4 | use crate::sql::lexer::LexerError; 5 | use crate::sql::lexer::Scanner; 6 | use crate::sql::query::Join; 7 | use crate::sql::query::Node; 8 | use crate::sql::query::QueryData; 9 | use crate::sql::query::TopType; 10 | use crate::sql::symbol::Group; 11 | use crate::sql::symbol::Symbol; 12 | use crate::sql::symbol::Token; 13 | use crate::sql::worker::SQLError; 14 | use crate::sql::worker::SQL; 15 | use std::fmt; 16 | use std::iter::Peekable; 17 | use std::slice::Iter; 18 | 19 | #[derive(Debug)] 20 | pub struct Parser { 21 | tokens: Vec, 22 | } 23 | 24 | #[derive(Debug)] 25 | pub enum ParserError { 26 | CauseByLexer(LexerError), 27 | TokenLengthZero, 28 | SyntaxError(String), 29 | SQLError(SQLError), 30 | } 31 | 32 | impl fmt::Display for ParserError { 33 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 34 | match *self { 35 | ParserError::CauseByLexer(ref e) => write!(f, "error caused by lexer: {}", e), 36 | ParserError::TokenLengthZero => write!(f, "error caused by a zero length tokens"), 37 | ParserError::SyntaxError(ref s) => write!(f, "error caused by wrong syntax `{}`", s), 38 | ParserError::SQLError(ref e) => write!(f, "error caused by semantic: {}", e), 39 | } 40 | } 41 | } 42 | 43 | impl Parser { 44 | pub fn new(message: &str) -> Result { 45 | let mut s: Scanner = Scanner::new(message); 46 | match s.scan_tokens() { 47 | Ok(tokens) => { 48 | if tokens.len() == 0 { 49 | return Err(ParserError::TokenLengthZero); 50 | } 51 | Ok(Parser { tokens }) 52 | } 53 | Err(e) => Err(ParserError::CauseByLexer(e)), 54 | } 55 | } 56 | pub fn parse(&self, sql: &mut SQL) -> Result<(), ParserError> { 57 | debug!("Parser parsing started..."); 58 | 59 | let mut iter = self.tokens.iter().peekable(); 60 | 61 | match iter.peek() { 62 | Some(symbol) => match symbol.token { 63 | Token::CreateDatabase => { 64 | let _ = iter.next(); // "create database" 65 | 66 | let db_name_sym = iter 67 | .next() 68 | .ok_or(ParserError::SyntaxError(String::from("no db name")))?; 69 | check_id(db_name_sym)?; 70 | 71 | sql.create_database(&db_name_sym.name) 72 | .map_err(|e| ParserError::SQLError(e))?; 73 | 74 | return Ok(()); 75 | } 76 | Token::CreateTable => { 77 | debug!("-> create table"); 78 | let table = parser_create_table(&mut iter)?; 79 | sql.create_table(&table).map_err(|e| ParserError::SQLError(e))?; 80 | return Ok(()); 81 | } 82 | Token::InsertInto => { 83 | debug!("-> insert into table"); 84 | let (table_name, attrs, rows) = parser_insert_into_table(&mut iter)?; 85 | sql.insert_into_table(&table_name, attrs, rows) 86 | .map_err(|e| ParserError::SQLError(e))?; 87 | Ok(()) 88 | } 89 | Token::Select => { 90 | debug!("-> select table"); 91 | sql.querydata = parse_select(&mut iter)?; 92 | sql.select().map_err(|e| ParserError::SQLError(e))?; 93 | Ok(()) 94 | } 95 | Token::DropTable => { 96 | debug!("-> drop table"); 97 | let _ = iter.next(); // "drop table" 98 | let tb_name_sym = iter 99 | .next() 100 | .ok_or(ParserError::SyntaxError(String::from("no table name")))?; 101 | check_id(tb_name_sym)?; 102 | 103 | sql.drop_table(&tb_name_sym.name) 104 | .map_err(|e| ParserError::SQLError(e))?; 105 | Ok(()) 106 | } 107 | Token::DropDatabase => { 108 | debug!("-> drop database"); 109 | let _ = iter.next(); // "drop database" 110 | let db_name_sym = iter 111 | .next() 112 | .ok_or(ParserError::SyntaxError(String::from("no db name")))?; 113 | check_id(db_name_sym)?; 114 | 115 | sql.drop_database(&db_name_sym.name) 116 | .map_err(|e| ParserError::SQLError(e))?; 117 | Ok(()) 118 | } 119 | _ => { 120 | return Err(ParserError::SyntaxError(String::from("unknown keyword"))); 121 | } 122 | }, 123 | None => { 124 | return Err(ParserError::SyntaxError(String::from("miss query"))); 125 | } 126 | } 127 | } 128 | } 129 | 130 | fn parser_create_table(iter: &mut Peekable>) -> Result { 131 | let _ = iter.next(); 132 | 133 | let table_name_sym = iter 134 | .next() 135 | .ok_or(ParserError::SyntaxError(String::from("no table name")))?; 136 | check_id(table_name_sym)?; 137 | 138 | let table_name = table_name_sym.name.clone(); 139 | debug!(" - table name: {}", table_name); 140 | 141 | assert_token(iter.next(), Token::ParentLeft)?; 142 | 143 | // create table. 144 | let mut table = Table::new(&table_name); 145 | loop { 146 | debug!(" -- new field:"); 147 | 148 | let mut field; 149 | 150 | match iter.peek() { 151 | // setting a field 152 | Some(s) if s.group == Group::Identifier => { 153 | // 1. column 154 | let var_name = iter 155 | .next() 156 | .ok_or(ParserError::SyntaxError(String::from("miss column name")))? 157 | .name 158 | .clone(); 159 | debug!(" --- field name: {}", var_name); 160 | 161 | // 2. datatype 162 | let var_type_sym = iter 163 | .next() 164 | .ok_or(ParserError::SyntaxError(String::from("miss column type")))?; 165 | debug!(" --- field type: {}", var_type_sym.name); 166 | 167 | // 2.1 case: varchar, char 168 | if var_type_sym.token == Token::Varchar || var_type_sym.token == Token::Char { 169 | assert_token(iter.next(), Token::ParentLeft)?; 170 | 171 | let varchar_len_str = iter 172 | .next() 173 | .ok_or(ParserError::SyntaxError(String::from("miss column type length")))? 174 | .name 175 | .clone(); 176 | let varchar_len = varchar_len_str 177 | .parse::() 178 | .map_err(|_| ParserError::SyntaxError(String::from("type length invalid")))?; 179 | debug!(" --- field type length: {}", varchar_len); 180 | 181 | let datatype = DataType::get(&var_type_sym.name, Some(varchar_len)) 182 | .ok_or(ParserError::SyntaxError(String::from("invalid type")))?; 183 | field = Field::new(&var_name, datatype); 184 | 185 | assert_token(iter.next(), Token::ParentRight)?; 186 | 187 | // 2.2 case: other type 188 | } else { 189 | let datatype = DataType::get(&var_type_sym.name, None) 190 | .ok_or(ParserError::SyntaxError(String::from("invalid type")))?; 191 | field = Field::new(&var_name, datatype); 192 | } 193 | // 3. column properties 194 | loop { 195 | match iter.peek() { 196 | Some(s) if s.token == Token::Comma => { 197 | iter.next(); 198 | debug!(" go next field"); 199 | break; 200 | } 201 | Some(s) if s.token == Token::NotNull => { 202 | iter.next(); 203 | field.not_null = true 204 | } 205 | Some(s) if s.token == Token::Default => { 206 | iter.next(); 207 | let default_value = iter 208 | .next() 209 | .ok_or(ParserError::SyntaxError(String::from("miss default value")))? 210 | .name 211 | .clone(); 212 | field.default = Some(default_value); 213 | } 214 | Some(s) if s.token == Token::Check => { 215 | // TODO: handle check syntax. Do not use `check` in sql now. 216 | return Err(ParserError::SyntaxError(String::from("check syntax error"))); 217 | } 218 | Some(s) if s.token == Token::Encrypt => { 219 | iter.next(); 220 | field.encrypt = true; 221 | } 222 | // end of table block 223 | Some(s) if s.token == Token::ParentRight => break, 224 | Some(_) | None => return Err(ParserError::SyntaxError(String::from(""))), 225 | } 226 | } 227 | } 228 | 229 | // setting table properties 230 | Some(s) if s.group == Group::Keyword => { 231 | // TODO: primary key, foreign key & reference 232 | return Err(ParserError::SyntaxError(String::from(""))); 233 | } 234 | 235 | // finish table block 236 | Some(s) if s.token == Token::ParentRight => { 237 | debug!(" - fields setting done."); 238 | break; 239 | } 240 | 241 | Some(_) | None => return Err(ParserError::SyntaxError(String::from(""))), 242 | } 243 | 244 | table.insert_new_field(field); 245 | debug!(" - insert new field into table"); 246 | } 247 | 248 | Ok(table) 249 | } 250 | 251 | fn parser_insert_into_table( 252 | iter: &mut Peekable>, 253 | ) -> Result<(String, Vec, Vec>), ParserError> { 254 | let _ = iter.next(); 255 | 256 | let table_name_sym = iter 257 | .next() 258 | .ok_or(ParserError::SyntaxError(String::from("miss table name")))?; 259 | check_id(table_name_sym)?; 260 | 261 | let table_name = table_name_sym.name.clone(); 262 | debug!(" - table name: {}", table_name); 263 | 264 | // get attributes 265 | let attrs = get_id_list(iter, true)?; 266 | debug!(" -- attributes: {:?}", attrs); 267 | 268 | assert_token(iter.next(), Token::Values)?; 269 | 270 | let mut rows: Vec> = Vec::new(); 271 | loop { 272 | match iter.peek() { 273 | Some(s) if s.token == Token::ParentLeft => { 274 | let row = get_id_list(iter, true)?; 275 | debug!(" -- row: {:?}", row); 276 | if attrs.len() != row.len() { 277 | return Err(ParserError::SyntaxError(String::from( 278 | "tuple length mismatch definition", 279 | ))); 280 | } 281 | rows.push(row); 282 | } 283 | Some(s) if s.token == Token::Comma => { 284 | iter.next(); 285 | continue; 286 | } 287 | Some(_) | None => break, 288 | } 289 | } 290 | 291 | assert_token(iter.next(), Token::Semicolon)?; 292 | Ok((table_name, attrs, rows)) 293 | } 294 | 295 | /// Parse select query 296 | /// 297 | /// Syntax: 298 | /// 299 | /// ```sql 300 | /// SELECT DISTINCT TOP 301 | /// FROM 302 | /// JOIN 303 | /// ON 304 | /// WHERE 305 | /// GROUP BY 306 | /// WITH {CUBE | ROLLUP} 307 | /// HAVING 308 | /// ORDER BY 309 | /// ``` 310 | /// 311 | #[inline] 312 | fn parse_select(iter: &mut Peekable>) -> Result { 313 | let _ = iter.next(); // select 314 | 315 | let mut query_data = QueryData::new(); 316 | 317 | if check_token(iter.peek(), Token::Distinct) { 318 | iter.next(); 319 | query_data.is_distinct = true; 320 | } 321 | 322 | if check_token(iter.peek(), Token::Top) { 323 | iter.next(); // top 324 | 325 | let top_spec = match iter.next() { 326 | Some(s) => s.name.clone(), 327 | None => return Err(ParserError::SyntaxError(String::from("invalid select top syntax"))), 328 | }; 329 | 330 | if check_token(iter.peek(), Token::Percent) { 331 | iter.next(); 332 | query_data.top = TopType::Percent( 333 | top_spec 334 | .parse::() 335 | .map_err(|_| ParserError::SyntaxError(String::from("invalid select top syntax")))?, 336 | ); 337 | } else { 338 | query_data.top = TopType::Number( 339 | top_spec 340 | .parse::() 341 | .map_err(|_| ParserError::SyntaxError(String::from("invalid select top syntax")))?, 342 | ); 343 | } 344 | } 345 | 346 | query_data.fields = get_id_list(iter, false)?; 347 | 348 | assert_token(iter.next(), Token::From)?; 349 | 350 | query_data.tables = get_id_list(iter, false)?; 351 | 352 | loop { 353 | match check_token(iter.peek(), Token::InnerJoin) 354 | || check_token(iter.peek(), Token::FullOuterJoin) 355 | || check_token(iter.peek(), Token::LeftJoin) 356 | || check_token(iter.peek(), Token::RightJoin) 357 | { 358 | true => { 359 | let mut join = Join::new(&iter.next().unwrap().name); 360 | match iter.next() { 361 | Some(s) if s.token == Token::Identifier => join.table = s.name.clone(), 362 | Some(_) | None => return Err(ParserError::SyntaxError(String::from("invalid select join syntax"))), 363 | } 364 | 365 | assert_token(iter.next(), Token::On)?; 366 | 367 | let mut symbols: Vec<&Symbol> = vec![]; 368 | loop { 369 | match iter.peek() { 370 | Some(s) if s.group == Group::Keyword || s.token == Token::Semicolon => break, 371 | Some(_) => symbols.push(iter.next().unwrap()), 372 | None => break, 373 | } 374 | } 375 | join.condition = Some(parse_predicate(symbols)?); 376 | 377 | query_data.joins.push(join); 378 | } 379 | false => break, 380 | } 381 | } 382 | 383 | match iter.peek() { 384 | Some(s) if s.token == Token::Where => { 385 | let _ = iter.next(); 386 | let mut symbols: Vec<&Symbol> = vec![]; 387 | loop { 388 | match iter.peek() { 389 | Some(s) if s.group == Group::Keyword || s.token == Token::Semicolon => break, 390 | Some(_) => symbols.push(iter.next().unwrap()), 391 | None => break, 392 | } 393 | } 394 | query_data.predicate = Some(parse_predicate(symbols)?); 395 | } 396 | Some(_) | None => {} 397 | } 398 | 399 | if check_token(iter.peek(), Token::GroupBy) { 400 | // TODO: 401 | } 402 | 403 | if check_token(iter.peek(), Token::OrderBy) { 404 | // TODO: 405 | } 406 | 407 | assert_token(iter.next(), Token::Semicolon)?; 408 | Ok(query_data) 409 | } 410 | 411 | /// Parse a predicate as a tree 412 | fn parse_predicate(symbols: Vec<&Symbol>) -> Result, ParserError> { 413 | let postfix_vec = parse_infix_postfix(symbols)?; 414 | parse_postfix_tree(postfix_vec) 415 | } 416 | 417 | /// Parse a postfix to a binary tree, and do semantic check 418 | fn parse_postfix_tree(symbols: Vec<&Symbol>) -> Result, ParserError> { 419 | let mut iter = symbols.iter(); 420 | let mut nodes_stack: Vec = vec![]; 421 | loop { 422 | match iter.next() { 423 | Some(s) if s.group == Group::Identifier => nodes_stack.push(Node::new(s.name.clone())), 424 | Some(s) if s.group == Group::Operator => match s.token { 425 | Token::AND | Token::OR => { 426 | let tree = Node::new(s.name.clone()) 427 | .right(nodes_stack.pop().unwrap()) 428 | .left(nodes_stack.pop().unwrap()); 429 | nodes_stack.push(tree); 430 | } 431 | Token::NOT => { 432 | let tree = Node::new(s.name.clone()).right(nodes_stack.pop().unwrap()); 433 | nodes_stack.push(tree); 434 | } 435 | Token::LT | Token::LE | Token::EQ | Token::NE | Token::GT | Token::GE => { 436 | let right = nodes_stack.pop().unwrap(); 437 | let left = nodes_stack.pop().unwrap(); 438 | 439 | // TODO: check right value 440 | // TODO: check left value 441 | 442 | let tree = Node::new(s.name.clone()).right(right).left(left); 443 | nodes_stack.push(tree); 444 | } 445 | _ => {} 446 | }, 447 | Some(_) => return Err(ParserError::SyntaxError(String::from("invalid predicate syntax"))), 448 | None => break, 449 | } 450 | } 451 | 452 | let tree = nodes_stack.pop().unwrap(); 453 | 454 | if nodes_stack.len() != 0 { 455 | return Err(ParserError::SyntaxError(String::from("invalid predicate syntax"))); 456 | } 457 | 458 | Ok(Box::new(tree)) 459 | } 460 | 461 | /// parse predicate tokens from infix to postfix 462 | #[allow(unused_assignments)] 463 | fn parse_infix_postfix(symbols: Vec<&Symbol>) -> Result, ParserError> { 464 | let mut iter = symbols.iter(); 465 | let mut stack: Vec<&Symbol> = vec![]; 466 | let mut output: Vec<&Symbol> = vec![]; 467 | loop { 468 | let mut parent_counter = 0; 469 | match iter.next() { 470 | Some(s) if s.token == Token::ParentLeft => { 471 | parent_counter += 1; 472 | stack.push(*s); 473 | } 474 | Some(s) if s.token == Token::ParentRight => loop { 475 | match stack.pop() { 476 | Some(s_) if s_.token == Token::ParentLeft => { 477 | parent_counter -= 1; 478 | break; 479 | } 480 | Some(s_) => output.push(s_), 481 | None => break, 482 | } 483 | }, 484 | Some(s) if s.group == Group::Operator => { 485 | loop { 486 | match stack.last() { 487 | Some(last) if last.group == Group::Operator => { 488 | let l = operator_priority(&last.token); 489 | let r = operator_priority(&s.token); 490 | if l >= r { 491 | output.push(*last); 492 | stack.pop(); 493 | } else { 494 | break; 495 | } 496 | } 497 | Some(_) | None => break, 498 | } 499 | } 500 | stack.push(*s); 501 | } 502 | Some(s) => output.push(*s), 503 | None => { 504 | if parent_counter > 0 { 505 | return Err(ParserError::SyntaxError(String::from("invalid predicate syntax"))); 506 | } 507 | loop { 508 | match stack.pop() { 509 | Some(s_) => output.push(s_), 510 | None => break, 511 | } 512 | } 513 | break; 514 | } 515 | } 516 | } 517 | 518 | Ok(output) 519 | } 520 | 521 | #[inline] 522 | fn operator_priority(t: &Token) -> u32 { 523 | match t { 524 | &Token::NOT => 2, 525 | &Token::AND | &Token::OR => 1, 526 | _ => 3, // >=, >, =, <, <= 527 | } 528 | } 529 | 530 | /// Get a list of identifiers, which in form as 531 | /// 532 | /// `is_parent` parameter 533 | /// - is_parent: `(a1, a2, a3, a4)` 534 | /// - !is_parent: `a1, a2, a3, a4` 535 | fn get_id_list(iter: &mut Peekable>, is_parent: bool) -> Result, ParserError> { 536 | let mut v = vec![]; 537 | if is_parent { 538 | assert_token(iter.next(), Token::ParentLeft)?; 539 | } 540 | loop { 541 | match iter.next() { 542 | Some(s) if s.token == Token::Identifier => { 543 | v.push(s.name.clone()); 544 | match iter.peek() { 545 | Some(s) if s.token == Token::Comma => { 546 | iter.next(); 547 | continue; 548 | } 549 | Some(_) | None => break, 550 | } 551 | } 552 | Some(_) | None => return Err(ParserError::SyntaxError(String::from("invalid syntax"))), 553 | } 554 | } 555 | if is_parent { 556 | assert_token(iter.next(), Token::ParentRight)?; 557 | } 558 | Ok(v) 559 | } 560 | 561 | /// Check if the symbol is an identifier 562 | #[inline] 563 | fn check_id(sym: &Symbol) -> Result<(), ParserError> { 564 | if sym.group != Group::Identifier { 565 | return Err(ParserError::SyntaxError(format!("{} is not an", &sym.name))); 566 | } 567 | Ok(()) 568 | } 569 | 570 | /// Check if the next symbol is the expected token. 571 | #[inline] 572 | fn check_token(sym: Option<&&Symbol>, token: Token) -> bool { 573 | match sym { 574 | Some(s) if s.token == token => true, 575 | Some(_) | None => false, 576 | } 577 | } 578 | 579 | /// Assert the symbol is the expected token. 580 | #[inline] 581 | fn assert_token(sym: Option<&Symbol>, token: Token) -> Result<(), ParserError> { 582 | if sym 583 | .ok_or(ParserError::SyntaxError(String::from("invalid syntax")))? 584 | .token 585 | != token 586 | { 587 | return Err(ParserError::SyntaxError(String::from("invalid syntax"))); 588 | } 589 | Ok(()) 590 | } 591 | 592 | #[cfg(test)] 593 | mod tests { 594 | use super::*; 595 | use crate::sql::query::JoinType; 596 | use crate::sql::query::*; 597 | use env_logger; 598 | 599 | fn fake_sql() -> SQL { 600 | let mut sql = SQL::new("Jenny").unwrap(); 601 | sql.create_database("db1").unwrap(); 602 | sql 603 | } 604 | 605 | /// in order traversal 606 | fn in_order(node: Box, vec: &mut Vec) { 607 | if node.left.is_some() { 608 | in_order(node.left.unwrap(), vec); 609 | } 610 | vec.push(node.root.clone()); 611 | if node.right.is_some() { 612 | in_order(node.right.unwrap(), vec); 613 | } 614 | } 615 | 616 | #[test] 617 | fn test_parser_create_database() { 618 | let mut sql = fake_sql(); 619 | 620 | let query = "create database db2;"; 621 | let parser = Parser::new(query).unwrap(); 622 | parser.parse(&mut sql).unwrap(); 623 | assert_eq!(sql.database.name, "db2"); 624 | } 625 | 626 | #[test] 627 | fn test_parser_create_table() { 628 | let mut sql = fake_sql(); 629 | 630 | let query = "create table t1 (a1 int, b1 char(7), c1 double);"; 631 | let parser = Parser::new(query).unwrap(); 632 | parser.parse(&mut sql).unwrap(); 633 | 634 | let db = sql.database.clone(); 635 | let table = db.tables.get("t1").unwrap(); 636 | assert!(table.fields.contains_key("a1")); 637 | assert!(table.fields.contains_key("b1")); 638 | assert!(table.fields.contains_key("c1")); 639 | 640 | let query = "create table t1 (a1 int not null default 5 encrypt, b1 char(7) not null, c1 double default 1.2);"; 641 | let parser = Parser::new(query).unwrap(); 642 | parser.parse(&mut sql).unwrap(); 643 | 644 | let db = sql.database.clone(); 645 | let table = db.tables.get("t1").unwrap(); 646 | let a1 = table.fields.get("a1").unwrap(); 647 | let b1 = table.fields.get("b1").unwrap(); 648 | let c1 = table.fields.get("c1").unwrap(); 649 | assert_eq!(a1.not_null, true); 650 | assert_eq!(a1.default.clone().unwrap(), "5"); 651 | assert_eq!(a1.encrypt, true); 652 | assert_eq!(b1.not_null, true); 653 | assert_eq!(c1.default.clone().unwrap(), "1.2"); 654 | } 655 | 656 | #[test] 657 | fn test_insert_into_table1() { 658 | let query = "insert into t1(a1, a2, a3) values (1, 2, 3), (4, 5, 6);"; 659 | let parser = Parser::new(query).unwrap(); 660 | let mut iter = parser.tokens.iter().peekable(); 661 | let (table_name, attrs, rows) = parser_insert_into_table(&mut iter).unwrap(); 662 | assert_eq!(table_name, "t1"); 663 | assert_eq!(attrs, vec![String::from("a1"), String::from("a2"), String::from("a3")]); 664 | assert_eq!( 665 | rows, 666 | vec![ 667 | vec![String::from("1"), String::from("2"), String::from("3")], 668 | vec![String::from("4"), String::from("5"), String::from("6")] 669 | ] 670 | ); 671 | 672 | let query = "insert into t1(a1, a2, a3) values (1, 2, 3);"; 673 | let parser = Parser::new(query).unwrap(); 674 | let mut iter = parser.tokens.iter().peekable(); 675 | let (table_name, attrs, rows) = parser_insert_into_table(&mut iter).unwrap(); 676 | assert_eq!(table_name, "t1"); 677 | assert_eq!(attrs, vec![String::from("a1"), String::from("a2"), String::from("a3")]); 678 | assert_eq!( 679 | rows, 680 | vec![vec![String::from("1"), String::from("2"), String::from("3")],] 681 | ); 682 | 683 | let query = "insert into t1(a1) values (1);"; 684 | let parser = Parser::new(query).unwrap(); 685 | let mut iter = parser.tokens.iter().peekable(); 686 | let (table_name, attrs, rows) = parser_insert_into_table(&mut iter).unwrap(); 687 | assert_eq!(table_name, "t1"); 688 | assert_eq!(attrs, vec![String::from("a1")]); 689 | assert_eq!(rows, vec![vec![String::from("1")]]); 690 | } 691 | 692 | #[test] 693 | fn test_insert_into_table_syntax_error() { 694 | // values not match attributes 695 | let query = "insert into t1(a1, a2, a3) values (1, 2);"; 696 | let parser = Parser::new(query).unwrap(); 697 | let mut iter = parser.tokens.iter().peekable(); 698 | assert!(parser_insert_into_table(&mut iter).is_err()); 699 | 700 | let query = "insert into t1(a1, a2, a3) values (1, 2, 3, 4);"; 701 | let parser = Parser::new(query).unwrap(); 702 | let mut iter = parser.tokens.iter().peekable(); 703 | assert!(parser_insert_into_table(&mut iter).is_err()); 704 | } 705 | 706 | #[test] 707 | fn test_parser_insert_into_table() { 708 | let mut sql = fake_sql(); 709 | 710 | let query = "create table t1 (a1 int, b1 char(7), c1 double);"; 711 | let parser = Parser::new(query).unwrap(); 712 | parser.parse(&mut sql).unwrap(); 713 | 714 | let query = "insert into t1(a1, b1, c1) values (33, 'abc', 3.43);"; 715 | let parser = Parser::new(query).unwrap(); 716 | assert!(parser.parse(&mut sql).is_ok()); 717 | } 718 | 719 | #[test] 720 | fn test_parser_insert_into_table_error() { 721 | let mut sql = fake_sql(); 722 | 723 | let query = "create table t1 (a1 int, b1 char(7), c1 double);"; 724 | let parser = Parser::new(query).unwrap(); 725 | parser.parse(&mut sql).unwrap(); 726 | 727 | // miss the attribute, but it has no default value 728 | let query = "insert into t1(a1, c1) values (33, 3.43);"; 729 | let parser = Parser::new(query).unwrap(); 730 | assert!(parser.parse(&mut sql).is_err()); 731 | } 732 | 733 | #[test] 734 | fn test_parser_new_error() { 735 | let query = "create table $1234;"; 736 | match Parser::new(query) { 737 | Ok(_) => {} 738 | Err(e) => assert_eq!(format!("{}", e), "error caused by lexer: please use ascii character."), 739 | } 740 | } 741 | 742 | fn assert_parse_infix_postfix(query: &str, answer: Vec<&str>) { 743 | let mut parser = Parser::new(query).unwrap(); 744 | parser.tokens.pop(); // `;` 745 | let mut iter = parser.tokens.iter(); 746 | let mut tokens: Vec<&Symbol> = vec![]; 747 | loop { 748 | match iter.next() { 749 | Some(s) => tokens.push(s), 750 | None => break, 751 | } 752 | } 753 | let postfix = parse_infix_postfix(tokens).unwrap(); 754 | println!("{:?}", postfix); 755 | println!("{:?}", answer); 756 | for i in 0..answer.len() { 757 | assert_eq!(&postfix[i].name, answer[i]); 758 | } 759 | } 760 | 761 | #[test] 762 | fn test_parse_infix_postfix() { 763 | let query = "not a1 = 3 and b2 >= 5;"; 764 | let answer = ["a1", "3", "=", "not", "b2", "5", ">=", "and"]; 765 | assert_parse_infix_postfix(query, answer.to_vec()); 766 | 767 | let query = "not(not a1 = 3 and not (b2 >= 5 or c1 < 7));"; 768 | let answer = [ 769 | "a1", "3", "=", "not", "b2", "5", ">=", "c1", "7", "<", "or", "not", "and", "not", 770 | ]; 771 | assert_parse_infix_postfix(query, answer.to_vec()); 772 | } 773 | 774 | fn assert_parse_postfix_tree(query: &str, answer: Vec<&str>) { 775 | let mut parser = Parser::new(query).unwrap(); 776 | parser.tokens.pop(); // `;` 777 | let mut iter = parser.tokens.iter(); 778 | let mut tokens: Vec<&Symbol> = vec![]; 779 | loop { 780 | match iter.next() { 781 | Some(s) => tokens.push(s), 782 | None => break, 783 | } 784 | } 785 | let mut output = vec![]; 786 | let tree = parse_postfix_tree(tokens).unwrap(); 787 | in_order(tree, &mut output); 788 | println!("{:?}", output); 789 | println!("{:?}", answer); 790 | for i in 0..answer.len() { 791 | assert_eq!(&output[i], answer[i]); 792 | } 793 | } 794 | 795 | #[test] 796 | fn test_parse_postfix_tree() { 797 | let postfix = "a1 3 = not b2 5 >= and ;"; 798 | let answer = ["not", "a1", "=", "3", "and", "b2", ">=", "5"]; 799 | assert_parse_postfix_tree(postfix, answer.to_vec()); 800 | 801 | let postfix = "a1 3 = not b2 5 >= c1 7 < or not and not ;"; 802 | let answer = [ 803 | "not", "not", "a1", "=", "3", "and", "not", "b2", ">=", "5", "or", "c1", "<", "7", 804 | ]; 805 | assert_parse_postfix_tree(postfix, answer.to_vec()); 806 | } 807 | 808 | fn assert_parse_predicate(query: &str, answer: &str) { 809 | let mut parser = Parser::new(query).unwrap(); 810 | parser.tokens.pop(); // `;` 811 | let mut iter = parser.tokens.iter(); 812 | let mut tokens: Vec<&Symbol> = vec![]; 813 | loop { 814 | match iter.next() { 815 | Some(s) => tokens.push(s), 816 | None => break, 817 | } 818 | } 819 | let mut output = vec![]; 820 | let tree = parse_predicate(tokens).unwrap(); 821 | in_order(tree, &mut output); 822 | let mut in_order_string = "".to_string(); 823 | for i in output { 824 | in_order_string += &i; 825 | in_order_string += " "; 826 | } 827 | in_order_string += ";"; 828 | assert_eq!(in_order_string, answer); 829 | } 830 | 831 | #[test] 832 | fn test_parse_predicate() { 833 | let query = "a1 >= 3 and b3 <= 7 or c1 = 4 and not d1 = 3 ;"; // a space before `;` is required 834 | assert_parse_predicate(query, query); 835 | 836 | let query = "not (a1 >= 3 and b3 <= 7) or not (c1 = 4 and d1 = 3);"; 837 | let answer = "not a1 >= 3 and b3 <= 7 or not c1 = 4 and d1 = 3 ;"; // a space before `;` is required 838 | assert_parse_predicate(query, answer); 839 | } 840 | 841 | #[test] 842 | fn test_parse_select_field_table() { 843 | let query = "select t1.a1, t1.a2, t1.a3 from t1, t2;"; 844 | let parser = Parser::new(query).unwrap(); 845 | let mut iter = parser.tokens.iter().peekable(); 846 | 847 | let querydata = parse_select(&mut iter).unwrap(); 848 | assert_eq!( 849 | querydata.fields, 850 | vec![String::from("t1.a1"), String::from("t1.a2"), String::from("t1.a3")] 851 | ); 852 | assert_eq!(querydata.tables, vec![String::from("t1"), String::from("t2")]); 853 | } 854 | 855 | #[test] 856 | fn test_parse_select_distinct_top() { 857 | let query = "select distinct top 5 t1.a1, t1.a2, t1.a3 from t1;"; 858 | let parser = Parser::new(query).unwrap(); 859 | let mut iter = parser.tokens.iter().peekable(); 860 | 861 | let querydata = parse_select(&mut iter).unwrap(); 862 | assert_eq!( 863 | querydata.fields, 864 | vec![String::from("t1.a1"), String::from("t1.a2"), String::from("t1.a3")] 865 | ); 866 | assert_eq!(querydata.tables, vec![String::from("t1")]); 867 | assert_eq!(querydata.top, TopType::Number(5)); 868 | assert_eq!(querydata.is_distinct, true); 869 | 870 | let query = "select top 50 percent a1, b1, c1 from t1;"; 871 | let parser = Parser::new(query).unwrap(); 872 | let mut iter = parser.tokens.iter().peekable(); 873 | 874 | let querydata = parse_select(&mut iter).unwrap(); 875 | assert_eq!(querydata.top, TopType::Percent(50.0)); 876 | assert_eq!(querydata.is_distinct, false); 877 | } 878 | 879 | #[test] 880 | fn test_parse_select_join() { 881 | let query = "select t1.a1, t1.a2, t1.a3 from t1 inner join t2 on t1.a1 = t2.a1 left join t3 on t1.a1 = t3.a1;"; 882 | let parser = Parser::new(query).unwrap(); 883 | let mut iter = parser.tokens.iter().peekable(); 884 | 885 | let querydata = parse_select(&mut iter).unwrap(); 886 | assert_eq!( 887 | querydata.fields, 888 | vec![String::from("t1.a1"), String::from("t1.a2"), String::from("t1.a3")] 889 | ); 890 | assert_eq!(querydata.tables, vec![String::from("t1")]); 891 | assert_eq!(querydata.joins[0].join_type, JoinType::InnerJoin); 892 | assert_eq!(querydata.joins[0].table, "t2".to_string()); 893 | assert_eq!(querydata.joins[1].join_type, JoinType::LeftJoin); 894 | assert_eq!(querydata.joins[1].table, "t3".to_string()); 895 | } 896 | 897 | } 898 | --------------------------------------------------------------------------------