├── tests ├── deletefromtest.sql ├── errortest.sql ├── insertintotest.sql ├── createselecttest.sql ├── selecttest_more.sql ├── selecttest.sql ├── createtest.sql └── scratch ├── .gitignore ├── src ├── common ├── exprtest ├── termtest ├── row.c ├── delete.c ├── literal.c ├── insert.c ├── common.c ├── lex │ └── sql.l ├── vector.c ├── mock_db.c ├── condition.c ├── create.c ├── ra.c ├── column.c ├── expression.c ├── yacc │ └── sql.y ├── list.c └── sra.c ├── sqlparser.docx ├── parsesql.sh ├── include ├── delete.h ├── insert.h ├── row.h ├── mock_db.h ├── literal.h ├── vector.h ├── ra.h ├── sra.txt ├── common.h ├── create.h ├── condition.h ├── column.h ├── expression.h ├── list.h └── sra.h ├── haskell ├── Tests.hs ├── SRA.hs └── Desugar.hs ├── sql_grammar_condensed.txt ├── Makefile └── README.md /tests/deletefromtest.sql: -------------------------------------------------------------------------------- 1 | DELETE FROM blubby where blorp = 10; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/* 2 | src/y.tab.* 3 | src/lex.yy.c 4 | src/auxfiles/* -------------------------------------------------------------------------------- /src/common: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thinkpad20/sql/HEAD/src/common -------------------------------------------------------------------------------- /sqlparser.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thinkpad20/sql/HEAD/sqlparser.docx -------------------------------------------------------------------------------- /src/exprtest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thinkpad20/sql/HEAD/src/exprtest -------------------------------------------------------------------------------- /src/termtest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thinkpad20/sql/HEAD/src/termtest -------------------------------------------------------------------------------- /parsesql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Attempting to parse $1..." 4 | cat $1 | bin/sql_parser -------------------------------------------------------------------------------- /tests/errortest.sql: -------------------------------------------------------------------------------- 1 | SELECT FROM Table blob WHERE ; 2 | 3 | SELECT buncha.blibber blobs FROM cribbage c croopage p; -------------------------------------------------------------------------------- /tests/insertintotest.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO blub VALUES (5,6); 2 | 3 | INSERT INTO blorp (yo, hey, hi) VALUES ("hello", 8, 9); -------------------------------------------------------------------------------- /tests/createselecttest.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Foo ( 2 | i int, j int, k int 3 | ); 4 | 5 | CREATE TABLE Bar ( 6 | a int, b int, j char, i int 7 | ); -------------------------------------------------------------------------------- /tests/selecttest_more.sql: -------------------------------------------------------------------------------- 1 | SELECT u.username, p.content from Users u, Posts p WHERE u.id = 1; 2 | 3 | SELECT u.username, p.content 4 | from Users u 5 | INNER JOIN Posts p 6 | ON u.id = p.user_id 7 | WHERE a != -3 and b > 5 or c = a; -------------------------------------------------------------------------------- /include/delete.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELETE_H_ 2 | #define __DELETE_H_ 3 | 4 | #include "common.h" 5 | 6 | typedef struct Delete_t { 7 | char *table_name; 8 | Condition_t *where; 9 | } Delete_t; 10 | 11 | Delete_t *Delete_make(const char *table_name, Condition_t *where); 12 | void deleteDelete(Delete_t *del); 13 | void Delete_print(Delete_t *del); 14 | void Delete_free(Delete_t *del); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /include/insert.h: -------------------------------------------------------------------------------- 1 | #ifndef __INSERT_H_ 2 | #define __INSERT_H_ 3 | 4 | #include "common.h" 5 | #include "create.h" 6 | 7 | typedef struct Insert_t { 8 | RA_t *ra; 9 | StrList_t *col_names; 10 | Literal_t *values; 11 | } Insert_t; 12 | 13 | Insert_t *Insert_make(RA_t *ra, StrList_t *opt_col_names, Literal_t *values); 14 | void Insert_print(Insert_t *insert); 15 | void Insert_free(Insert_t *insert); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /include/row.h: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "column.h" 3 | 4 | typedef struct DBTable_s { 5 | 6 | } DBTable_t; 7 | 8 | typedef struct Row_s { 9 | size_t size, num_cols; 10 | size_t *offsets; 11 | void *data; 12 | } Row_t; 13 | 14 | Row_t *Row_makeFirst(Column_t *cols); 15 | Row_t *Row_make(Row_t *row); 16 | 17 | void Row_insertData(Row_t *row, ssize_t *cols, void **data); 18 | 19 | int Row_matchByInt(Row_t *row, int i); 20 | int Row_matchByChar(Row_t *row, char c); 21 | int Row_matchByDouble(Row_t *row, double d); 22 | int Row_matchByString(Row_t *row, const char *str); 23 | -------------------------------------------------------------------------------- /tests/selecttest.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT "User: " || u.username, p.content from Users u, Posts p WHERE u.id = 1; 2 | 3 | SELECT COUNT(u.username) from Users u NATURAL JOIN Tweets t; 4 | 5 | -- this is a comment 6 | /* this 7 | is a 8 | multiline 9 | comment*/ 10 | 11 | SELECT (u.age * 4)/25 as b, p.content 12 | from Users u 13 | INNER JOIN Posts p 14 | ON u.id = p.user_id 15 | FULL OUTER JOIN q 16 | ON q.qux < (u.baz * p.puz) and ((78 + p.biz * q.qux) > 3 or q.zippy > 5) 17 | WHERE a != -3 and b > 5.76 or c = a and name in ("tom", "dick", "harry") 18 | GROUP BY p.content ORDER BY foo; -------------------------------------------------------------------------------- /src/row.c: -------------------------------------------------------------------------------- 1 | #include "../include/row.h" 2 | 3 | Row_t *Row_makeFirst(Column_t *cols) { 4 | Row_t *row = (Row_t *)calloc(1, sizeof(Row_t)); 5 | Column_t *c = cols; 6 | while (c) { 7 | row->size += Column_getSize(c); 8 | row->num_cols++; 9 | row-> 10 | c = c->next; 11 | } 12 | 13 | } 14 | 15 | Row_t *Row_make(Row_t *row); 16 | 17 | void Row_insertData(Row_t *row, ssize_t *cols, void **data); 18 | 19 | int Row_matchByInt(Row_t *row, int i); 20 | int Row_matchByChar(Row_t *row, char c); 21 | int Row_matchByDouble(Row_t *row, double d); 22 | int Row_matchByString(Row_t *row, const char *str); 23 | -------------------------------------------------------------------------------- /tests/createtest.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Users ( 2 | id int NOT NULL AUTO_INCREMENT, 3 | username varchar(50) DEFAULT "hello", 4 | fullname text(250), 5 | gender char DEFAULT 'M', 6 | PRIMARY KEY (id) 7 | ); 8 | 9 | CREATE TABLE Posts ( 10 | id integer NOT NULL PRIMARY KEY, 11 | user_id int NOT NULL FOREIGN KEY REFERENCES Users(id), 12 | content text(250) DEFAULT "this is the default post..." 13 | ); 14 | 15 | CREATE TABLE Likes ( 16 | user_id int NOT NULL FOREIGN KEY REFERENCES Users(id), 17 | post_id int NOT NULL, 18 | FOREIGN KEY (post_id) REFERENCES Posts(id) 19 | ); 20 | 21 | CREATE TABLE Hashtag ( 22 | content text(250) CHECK a != 3 23 | ); -------------------------------------------------------------------------------- /include/mock_db.h: -------------------------------------------------------------------------------- 1 | #ifndef __MOCK_DB_H__ 2 | #define __MOCK_DB_H__ 3 | 4 | #include "common.h" 5 | #include "create.h" 6 | #include "list.h" 7 | 8 | void mock_db_init(void); 9 | void add_table(Table_t *table); 10 | void remove_table(Table_t *table); 11 | Table_t *table_by_name(const char *name); 12 | void show_tables(void); 13 | /* puts all of table's columns in a List_t */ 14 | List_t column_list(Table_t *table); 15 | /* Creates a List_t of all columns shared by table1 and table2 */ 16 | List_t columns_in_common(Table_t *table1, Table_t *table2); 17 | /* Same as above methods, but searches through existing tables */ 18 | List_t column_list_str(const char *table_name); 19 | List_t columns_in_common_str(const char *table1, const char *table2); 20 | 21 | #endif -------------------------------------------------------------------------------- /include/literal.h: -------------------------------------------------------------------------------- 1 | #ifndef __LITERAL_H_ 2 | #define __LITERAL_H_ 3 | 4 | #include "common.h" 5 | 6 | union LitVal { 7 | int ival; 8 | double dval; 9 | char cval; 10 | char *strval; 11 | }; 12 | 13 | typedef struct Literal_t { 14 | enum data_type t; 15 | union LitVal val; 16 | struct Literal_t *next; /* linked list */ 17 | } Literal_t; 18 | 19 | Literal_t *litInt(int i); 20 | Literal_t *litDouble(double d); 21 | Literal_t *litChar(char c); 22 | Literal_t *litText(char *str); 23 | Literal_t *Literal_append(Literal_t *val, Literal_t *toAppend); 24 | 25 | void Literal_free(Literal_t *lval); 26 | void Literal_freeList(Literal_t *lval); 27 | 28 | void Literal_print(Literal_t *val); 29 | void Literal_printList(Literal_t *val); 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /tests/scratch: -------------------------------------------------------------------------------- 1 | getName :: RA -> String 2 | getName (Table name _) = name 3 | getName (Pi _ ra) = getName ra 4 | getName (Rho _ _ ra) = getName ra 5 | getName (Sigma _ ra) = getName ra 6 | getName (RhoTable s _) = s 7 | getName (Union l r) = getName l ++ "~" ++ getName r 8 | getName (Difference l r) = getName l ++ "-" ++ getName r 9 | getName (Cross l r) = getName l ++ "*" ++ getName r 10 | 11 | 12 | desugar (NaturalJoin sra1 sra2) = 13 | let 14 | ra1 = desugar sra1 15 | ra2 = desugar sra2 16 | toTups :: RA -> [(Expression, Type)] 17 | toTups ra = map (\(Column e t _) -> (e,t)) (getCols ra) 18 | colsInEither = map fst $ union (toTups ra1) (toTups ra2) 19 | colsInBoth = map fst $ intersect (toTups ra1) (toTups ra2) 20 | cond = case inBoth of 21 | [] -> Bool False 22 | --(e:es) -> makeCond (BinaryCond "=" es 23 | makeCond [] = 24 | in 25 | ra1 -------------------------------------------------------------------------------- /src/delete.c: -------------------------------------------------------------------------------- 1 | #include "../include/delete.h" 2 | #include "../include/ra.h" 3 | 4 | Delete_t *Delete_make(const char *table_name, Condition_t *where) { 5 | Delete_t *new_free = (Delete_t *)calloc(1, sizeof(Delete_t)); 6 | new_free->table_name = strdup(table_name); 7 | new_free->where = where; 8 | return new_free; 9 | } 10 | 11 | void deleteDelete(Delete_t *del) { 12 | Condition_free(del->where); 13 | free(del->table_name); 14 | free(del); 15 | } 16 | 17 | void Delete_print(Delete_t *del) { 18 | printf("Delete from %s where ", del->table_name); 19 | Condition_print(del->where); 20 | puts(""); 21 | } 22 | 23 | void Delete_free(Delete_t *del) { 24 | if (!del) { 25 | fprintf(stderr, "Warning: Delete_free called on null pointer\n"); 26 | return; 27 | } 28 | free(del->table_name); 29 | if (del->where) 30 | Condition_free(del->where); 31 | free(del); 32 | } 33 | -------------------------------------------------------------------------------- /include/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef __VECTOR_H_ 2 | #define __VECTOR_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef struct vector_s { 10 | size_t size, max_size; 11 | void **data; 12 | void (*free) (void *); /* free function */ 13 | } vector_t; 14 | 15 | vector_t *vector(size_t init_size); 16 | vector_t *vector_withData(size_t size, ...); 17 | 18 | vector_t *vector_push(vector_t *vector, void *elem); 19 | void *vector_pop(vector_t *vector); 20 | 21 | void *vector_getAt(vector_t *vector, size_t i); 22 | void vector_setAt(vector_t *vector, size_t i, void *data); 23 | 24 | size_t vector_size(vector_t *vector); 25 | 26 | void vector_setFreeFunc(vector_t *vector, void (*free) (void *)); 27 | 28 | void vector_print(vector_t *vector, void (*print) (void *)); 29 | void vector_printStrings(vector_t *vector); 30 | 31 | void vector_clear(vector_t *vector); 32 | void vector_free(vector_t *vector); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /include/ra.h: -------------------------------------------------------------------------------- 1 | #ifndef __RA_H_ 2 | #define __RA_H_ 3 | 4 | #include "common.h" 5 | #include "condition.h" 6 | #include "list.h" 7 | #include "mock_db.h" /* later, change this to whatever header file 8 | stores information about what tables you have */ 9 | 10 | /* 11 | RA_t in Haskell 12 | data RA_t = Table String 13 | | Select Expression_t RA_t -- see below for Expression_t def 14 | | Project [String] RA 15 | | Union RA_t RA 16 | | Difference RA_t RA 17 | | Cross RA_t RA 18 | | Rename String [String] RA 19 | */ 20 | 21 | enum RA_Type { 22 | RA_TABLE, 23 | RA_SIGMA, 24 | RA_PI, 25 | RA_UNION, 26 | RA_DIFFERENCE, 27 | RA_CROSS, 28 | RA_RHO_TABLE, 29 | RA_RHO_EXPR, 30 | }; 31 | 32 | struct RA_s { 33 | enum RA_Type t; 34 | union { 35 | struct { char *name; } table; 36 | struct { RA_t *ra; Condition_t *cond; } sigma; 37 | struct { RA_t *ra; Expression_t *expr_list; } pi; 38 | struct { RA_t *ra1, *ra2; } binary; 39 | struct { RA_t *ra; Expression_t *to_rename; char *new_name;} rho; 40 | }; 41 | List_t columns; /* if efficiency is desired, change this to 42 | a red-black tree */ 43 | }; 44 | 45 | void RA_print(RA_t *ra); 46 | 47 | RA_t *RA_Table(const char *name); 48 | RA_t *RA_Sigma(RA_t *ra, Condition_t *expr); 49 | RA_t *RA_Pi(RA_t *ra, Expression_t *expr_list); 50 | RA_t *RA_Union(RA_t *ra1, RA_t *ra2); 51 | RA_t *RA_Difference(RA_t *ra1, RA_t *ra2); 52 | RA_t *RA_Cross(RA_t *ra1, RA_t *ra2); 53 | RA_t *RA_RhoTable(RA_t *ra, const char *new_name); 54 | RA_t *RA_RhoExpr(RA_t *ra, Expression_t *expr, const char *new_name); 55 | 56 | void RA_free(RA_t *ra); 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /include/sra.txt: -------------------------------------------------------------------------------- 1 | /* planning to put the relational algebra representation types here */ 2 | 3 | /* 4 | SRA - sugared relational algebra 5 | 6 | Project(expression list, table) 7 | Select(condition list, table) 8 | Rename(old name, new name, table) 9 | Join(type, table, table) 10 | 11 | 12 | */ 13 | 14 | /* 15 | RA - relational algebra 16 | 17 | Haskell: 18 | data RA = Table String 19 | | Select Expression_t RA 20 | | Project [String] RA 21 | | Union RA RA 22 | | Difference RA RA 23 | | Cross RA RA 24 | | Rename String [String] RA 25 | 26 | data Expression_t = Eq String String 27 | | Lt String String 28 | | Gt String String 29 | | And Expression_t Expression 30 | | Or Expression_t Expression 31 | | Not Expression 32 | 33 | C: 34 | typedef struct RATable { 35 | const char *name; 36 | } RATable; 37 | 38 | typedef struct RASelect { 39 | Condition_t *expr; 40 | RA *ra; 41 | } RASelect; 42 | 43 | typedef struct RAProject { 44 | unsigned num_cols; 45 | const char **cols; 46 | RA *ra; 47 | } RAProject; 48 | 49 | typedef struct RAUnion { 50 | RA *ra1, *ra2; 51 | } RAUnion; 52 | 53 | typedef struct RADifference { 54 | RA *ra1, *ra2; 55 | } RADifference; 56 | 57 | typedef struct RACross { 58 | RA *ra1, *ra2; 59 | } RACross; 60 | 61 | typedef struct RARename { 62 | const char *table_name; 63 | unsigned num_col_names; 64 | const char **col_names; 65 | RA *ra; 66 | } RARename; 67 | 68 | enum RAType { 69 | RA_TABLE, 70 | RA_SELECT, 71 | RA_PROJECT, 72 | RA_UNION, 73 | RA_DIFFERENCE, 74 | RA_CROSS, 75 | RA_RENAME 76 | }; 77 | 78 | typedef struct RA { 79 | enum RAType t; 80 | union { 81 | 82 | } 83 | } RA; 84 | 85 | */ -------------------------------------------------------------------------------- /haskell/Tests.hs: -------------------------------------------------------------------------------- 1 | module Tests where 2 | 3 | import Desugar 4 | import SRA 5 | import Control.Monad (forM_) 6 | 7 | {------------------- Example queries/tables -------------------} 8 | t = [("x", Int), ("y", Int)] 9 | u = [("x", Int), ("y", Int), ("z", Int)] 10 | tables :: TableMap 11 | tables = tFromList [("t", t), ("u", u)] 12 | 13 | -- simplifying things... 14 | selectStar = Project [(Col "*" Nothing, Nothing)] 15 | col x = Col x Nothing 16 | dot tname colname = Col colname (Just tname) 17 | colN x = (col x, Nothing) 18 | as :: Expression -> String -> NamedExpr 19 | as expr name = (expr, Just name) 20 | plus = Binary "+" 21 | eq = Compare "=" 22 | innerJoin t u = Join Inner t u Nothing 23 | innerJoinOn t u cond = Join Inner t u (Just cond) 24 | 25 | -- t; 26 | tblT = (TableName "t" Nothing) 27 | -- u; 28 | tblU = (TableName "u" Nothing) 29 | -- select x, y from t; 30 | ex0 = Project [colN "x", colN "y"] tblT 31 | 32 | -- select x, x + y as z from t; 33 | ex1 = Project [colN "x", ((col "x" `plus` col "y") `as` "z")] tblT 34 | 35 | -- select z from (select x, x+y as z from t); 36 | ex2 = Project [colN "z"] ex1 37 | 38 | -- select x, z from (select x, x+y as z from t) where x = z; 39 | ex3 = Project [colN "x", colN "z"] $ Select (col "x" `eq` col "z") ex1 40 | 41 | -- select x, y, z as foo from u; 42 | ex4 = Project [colN "x", colN "y", col "z" `as` "foo"] tblU 43 | 44 | -- select * from u; 45 | ex5 = selectStar tblU 46 | 47 | -- select * from t, u; 48 | ex6 = selectStar $ tblT `innerJoin` tblU 49 | 50 | -- select * from t,u where t.x = u.x; 51 | ex7 = selectStar $ innerJoinOn tblT tblU (("t" `dot` "x") `eq` ("u" `dot` "x")) 52 | 53 | -- select * from t natural join u; 54 | ex8 = selectStar (tblT `NaturalJoin` tblU) 55 | 56 | examples = [ex0, ex1, ex2, ex3, ex4, ex5, ex6, ex7, ex8] 57 | 58 | runTests = forM_ examples (putStrLn . show . desugar tables) -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_H_ 2 | #define __COMMON_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "vector.h" 9 | #include "list.h" 10 | 11 | /* Forward declarations */ 12 | typedef struct RA_s RA_t; 13 | typedef struct Condition_t Condition_t; 14 | /* With these we can make the grand union of all SQL queries :) */ 15 | struct SRA_s; /* select query */ 16 | struct Table_s; /* create table query */ 17 | struct Index_s; /* create index */ 18 | struct Insert_s; /* insert into */ 19 | struct Delete_s; /* delete from */ 20 | 21 | enum query_type { 22 | SELECT_Q, CREATE_T_Q, CREATE_I_Q, INSERT_Q, DELETE_Q 23 | }; 24 | 25 | typedef struct Query_s { 26 | enum query_type t; 27 | union { 28 | struct SRA_s *sra; 29 | struct Table_s *table; 30 | struct Index_s *index; 31 | struct Insert_s *insert; 32 | struct Delete_s *del; 33 | }; 34 | } Query_t; 35 | 36 | enum data_type { 37 | TYPE_INT, 38 | TYPE_DOUBLE, 39 | TYPE_CHAR, 40 | TYPE_TEXT 41 | }; 42 | 43 | typedef struct StrList_t { 44 | char *str; 45 | struct StrList_t *next; 46 | } StrList_t; 47 | 48 | char *typeToString(enum data_type type, char *buf); 49 | StrList_t *StrList_makeWithNext(const char *str, StrList_t *next); 50 | StrList_t *StrList_make(char *str); 51 | StrList_t *StrList_append(StrList_t *list1, StrList_t *list2); 52 | void StrList_print(StrList_t *list); 53 | void StrList_free(StrList_t *list); 54 | void upInd(void); 55 | void downInd(void); 56 | void indent_print(const char *format,...); 57 | 58 | void Query_free(Query_t *query); 59 | Query_t *Query_fromSRA(struct SRA_s *sra); 60 | Query_t *Query_fromTable(struct Table_s *table); 61 | Query_t *Query_fromIndex(struct Index_s *index); 62 | Query_t *Query_fromInsert(struct Insert_s *insert); 63 | Query_t *Query_fromDelete(struct Delete_s *del); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /include/create.h: -------------------------------------------------------------------------------- 1 | #ifndef __CREATE_H_ 2 | #define __CREATE_H_ 3 | 4 | #include "common.h" 5 | #include "column.h" 6 | 7 | typedef struct Table_s { 8 | char *name; 9 | Column_t *columns; 10 | } Table_t; 11 | 12 | enum key_dec_type {KEY_DEC_PRIMARY, KEY_DEC_FOREIGN}; 13 | 14 | typedef struct KeyDec_s { 15 | enum key_dec_type t; 16 | union { 17 | StrList_t *primary_keys; 18 | ForeignKeyRef_t fkey; 19 | } dec; 20 | struct KeyDec_s *next; 21 | } KeyDec_t; 22 | 23 | typedef struct TableReference_s { 24 | char *table_name, *alias; 25 | } TableReference_t; 26 | 27 | typedef struct Index_s { 28 | char *name, *table_name, *column_name; 29 | int unique; 30 | } Index_t; 31 | 32 | enum CreateType { CREATE_TABLE, CREATE_INDEX }; 33 | 34 | typedef struct Create_s { 35 | enum CreateType t; 36 | union { 37 | Table_t *table; 38 | Index_t *index; 39 | }; 40 | } Create_t; 41 | 42 | Table_t * Table_make(char *name, Column_t *columns, KeyDec_t *decs); 43 | void Table_print(Table_t *table); 44 | void Table_free(void *table); /* void for generic */ 45 | Table_t * Table_addKeyDecs(Table_t *table, KeyDec_t *decs); 46 | 47 | KeyDec_t * KeyDec_append(KeyDec_t *decs, KeyDec_t *dec); 48 | KeyDec_t * ForeignKeyDec(ForeignKeyRef_t fkr); 49 | KeyDec_t * PrimaryKeyDec(StrList_t *col_names); 50 | 51 | TableReference_t *TableReference_make(char *table_name, char *alias); 52 | void TableReference_free(TableReference_t *tref); 53 | 54 | Index_t * Index_make(char *name, char *table_name, char *column_name); 55 | Index_t * Index_makeUnique(Index_t *idx); 56 | void Index_print(Index_t *idx); 57 | void Index_free(Index_t *idx); 58 | 59 | Create_t * Create_fromTable(Table_t *table); 60 | Create_t * Create_fromIndex(Index_t *idx); 61 | void Create_print(Create_t *cre); 62 | void Create_free(Create_t *cre); 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /include/condition.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONDITION_H_ 2 | #define __CONDITION_H_ 3 | 4 | #include "common.h" 5 | #include "expression.h" 6 | 7 | /* 8 | data Condition_t = Eq Expression_t Expression 9 | | Lt Expression_t Expression 10 | | Gt Expression_t Expression 11 | | And Condition_t Condition 12 | | Or Condition_t Condition 13 | | Not Condition 14 | */ 15 | 16 | typedef struct CondComp { 17 | Expression_t *expr1, *expr2; 18 | } CondComp; 19 | 20 | typedef struct CondBinary { 21 | Condition_t *cond1, *cond2; 22 | } CondBinary; 23 | 24 | typedef struct CondUnary { 25 | Condition_t *cond; 26 | } CondUnary; 27 | 28 | /* note: we might want to also let CondIn hold SELECT queries. */ 29 | typedef struct CondIn { 30 | Expression_t *expr; 31 | Literal_t *values_list; 32 | } CondIn; 33 | 34 | enum CondType { 35 | RA_COND_EQ, 36 | RA_COND_LT, 37 | RA_COND_GT, 38 | RA_COND_LEQ, 39 | RA_COND_GEQ, 40 | RA_COND_AND, 41 | RA_COND_OR, 42 | RA_COND_NOT, 43 | RA_COND_IN, 44 | }; 45 | 46 | 47 | struct Condition_t { 48 | enum CondType t; 49 | union { 50 | CondComp comp; 51 | CondBinary binary; 52 | CondUnary unary; 53 | CondIn in; 54 | } cond; 55 | }; 56 | 57 | Condition_t *Eq(Expression_t *expr1, Expression_t *expr2); 58 | Condition_t *Lt(Expression_t *expr1, Expression_t *expr2); 59 | Condition_t *Gt(Expression_t *expr1, Expression_t *expr2); 60 | Condition_t *Leq(Expression_t *expr1, Expression_t *expr2); 61 | Condition_t *Geq(Expression_t *expr1, Expression_t *expr2); 62 | Condition_t *And(Condition_t *cond1, Condition_t *cond2); 63 | Condition_t *Or(Condition_t *cond1, Condition_t *cond2); 64 | Condition_t *Not(Condition_t *cond); 65 | Condition_t *In(Expression_t *expr, Literal_t *values_list); 66 | 67 | void Condition_free(Condition_t *cond); 68 | void Condition_print(Condition_t *cond); 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /sql_grammar_condensed.txt: -------------------------------------------------------------------------------- 1 | sql_queries ::= ((create_table|insert_into|delete_from|select)? ';')+ 2 | 3 | create_table ::= CREATE TABLE table_name '(' column_dec_list ')' 4 | 5 | column_dec_list ::= column_dec (',' column_dec)* 6 | 7 | column_dec ::= column_name type ('(' INT_LITERAL ')')? (constraint)* | key_dec 8 | 9 | type ::= INT | DOUBLE | CHAR | VARCHAR | TEXT 10 | 11 | constraint ::= NOT NULL | UNIQUE | PRIMARY KEY 12 | | FOREIGN KEY REFERENCES table_name ('(' column_name ')')? 13 | | DEFAULT (literal_value | AUTO INCREMENT) 14 | | CHECK bool_expression 15 | 16 | key_dec ::= PRIMARY KEY '(' column_names_list ')' 17 | | FOREIGN KEY '(' column_name ')' REFERENCES table_name ('(' column_name ')')? 18 | 19 | insert_into ::= INSERT INTO table_name 20 | ('(' column_name (',' column_name)* ')')? 21 | VALUES '(' literal_value (',' literal_value)* ')' 22 | 23 | literal_value ::= INT_LITERAL | DOUBLE_LITERAL | STRING_LITERAL 24 | 25 | delete_from ::= DELETE FROM table_name where_condition 26 | 27 | select ::= select_statement ((UNION | INTERSECT | EXCEPT) select_statement)* 28 | 29 | select_statement ::= SELECT (DISTINCT)? expression_list FROM table (select_constraint)* 30 | | '(' select_statement ')' 31 | 32 | select_constraint ::= ON bool_expression 33 | | USING '(' column_names_list ')' 34 | | WHERE bool_expression 35 | | ORDER BY column_name (ASC | DESC)? 36 | 37 | bool_expression ::= bool_term ((AND | OR) bool_term)* 38 | 39 | bool_term ::= expression ('=' | '>' | '<' | GEQ | LEQ | NEQ) expression 40 | | expression IN '(' select ')' 41 | | '(' bool_expression ')' 42 | | NOT bool_term 43 | 44 | expression_list ::= expression (',' expression)* 45 | 46 | expression ::= term (('+'|'-'|'*'|'/') term)* 47 | 48 | term ::= literal_value 49 | | (table_name '.')? (column_name | '*' | NULL) 50 | | '(' expression ')' 51 | | (COUNT | SUM | AVG | MIN | MAX) '(' expression ')' 52 | | '-' term 53 | 54 | column_name ::= IDENTIFIER 55 | 56 | table_name ::= IDENTIFIER 57 | 58 | table ::= table_name ((AS)? IDENTIFIER)? ((',' | join) table_name)* 59 | 60 | join ::= (CROSS | INNER | (LEFT | RIGHT) (OUTER)? | NATURAL)? JOIN -------------------------------------------------------------------------------- /src/literal.c: -------------------------------------------------------------------------------- 1 | #include "../include/literal.h" 2 | 3 | Literal_t *litInt(int i) { 4 | Literal_t *lval = (Literal_t *)calloc(1, sizeof(Literal_t)); 5 | lval->t = TYPE_INT; 6 | lval->val.ival = i; 7 | return lval; 8 | } 9 | 10 | Literal_t *litDouble(double d) { 11 | Literal_t *lval = (Literal_t *)calloc(1, sizeof(Literal_t)); 12 | lval->t = TYPE_DOUBLE; 13 | lval->val.dval = d; 14 | return lval; 15 | } 16 | 17 | Literal_t *litChar(char c) { 18 | Literal_t *lval = (Literal_t *)calloc(1, sizeof(Literal_t)); 19 | lval->t = TYPE_CHAR; 20 | lval->val.cval = c; 21 | return lval; 22 | } 23 | 24 | Literal_t *litText(char *str) { 25 | Literal_t *lval = (Literal_t *)calloc(1, sizeof(Literal_t)); 26 | lval->t = TYPE_TEXT; 27 | lval->val.strval = str; 28 | return lval; 29 | } 30 | 31 | void Literal_print(Literal_t *val) { 32 | char buf[100]; 33 | printf("%s ", typeToString(val->t, buf)); 34 | switch (val->t) { 35 | case TYPE_INT: 36 | printf("%d", val->val.ival); 37 | break; 38 | case TYPE_DOUBLE: 39 | printf("%f", val->val.dval); 40 | break; 41 | case TYPE_CHAR: 42 | printf("'%c'", val->val.cval); 43 | break; 44 | case TYPE_TEXT: 45 | printf("\"%s\"", val->val.strval); 46 | break; 47 | default: 48 | printf("(unknown type)"); 49 | } 50 | } 51 | 52 | void Literal_printList(Literal_t *val_list) { 53 | int first = 1; 54 | printf("["); 55 | while (val_list) { 56 | if (first) first = 0; else printf(", "); 57 | Literal_print(val_list); 58 | val_list = val_list->next; 59 | } 60 | printf("]"); 61 | } 62 | 63 | static Literal_t *Literal_app(Literal_t *lit1, Literal_t *lit2) { 64 | lit1->next = lit2; 65 | return lit1; 66 | } 67 | 68 | Literal_t *Literal_append(Literal_t *lit1, Literal_t *lit2) { 69 | if (!lit1) return lit2; 70 | return Literal_app(lit1, Literal_append(lit1->next, lit2)); 71 | } 72 | 73 | void Literal_free(Literal_t *lval) { 74 | if (lval->t == TYPE_TEXT) 75 | free(lval->val.strval); 76 | free(lval); 77 | } 78 | 79 | void Literal_freeList(Literal_t *lval) { 80 | Literal_t *temp; 81 | while (lval) { 82 | temp = lval; 83 | lval = lval->next; 84 | free(temp); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/insert.c: -------------------------------------------------------------------------------- 1 | #include "../include/insert.h" 2 | #include "../include/ra.h" 3 | 4 | Insert_t *Insert_make(RA_t *ra, StrList_t *opt_col_names, Literal_t *values) { 5 | Insert_t *new_insert = (Insert_t *)calloc(1, sizeof(Insert_t)); 6 | new_insert->ra = ra; 7 | new_insert->col_names = opt_col_names; 8 | new_insert->values = values; 9 | if (!values) 10 | fprintf(stderr, "Warning: no values given to insert\n"); 11 | 12 | /* if there are any column names specified, ensure equal cardinality */ 13 | if (opt_col_names) { 14 | while(true) { 15 | if (opt_col_names && !values) { 16 | fprintf(stderr, "Error: more column names specified than values\n"); 17 | return NULL; 18 | } 19 | else if (!opt_col_names && values) { 20 | fprintf(stderr, "Error: more values specified than column names\n"); 21 | return NULL; 22 | } else if (!opt_col_names && !values) { 23 | /* then both are the same cardinality, OK */ 24 | break; 25 | } 26 | opt_col_names = opt_col_names->next; 27 | values = values->next; 28 | } 29 | } 30 | return new_insert; 31 | } 32 | 33 | void Insert_print(Insert_t *insert) { 34 | Literal_t *val = insert->values; 35 | int first = 1; 36 | printf("Insert "); 37 | printf("["); 38 | while (val) { 39 | if (first) { 40 | first = 0; 41 | } else { 42 | printf(", "); 43 | } 44 | Literal_print(val); 45 | val = val->next; 46 | } 47 | printf("] into "); 48 | RA_print(insert->ra); 49 | if (insert->col_names) { 50 | StrList_t *list = insert->col_names; 51 | first = 1; 52 | printf(" using columns ["); 53 | while (list) { 54 | if (first) { 55 | first = 0; 56 | } else { 57 | printf(", "); 58 | } 59 | printf("%s", list->str); 60 | list = list->next; 61 | } 62 | printf("]"); 63 | } 64 | puts(""); 65 | } 66 | 67 | 68 | void Insert_free(Insert_t *insert) { 69 | if (!insert) { 70 | fprintf(stderr, "Warning: Insert_free called on null pointer\n"); 71 | return; 72 | } 73 | RA_free(insert->ra); 74 | StrList_free(insert->col_names); 75 | Literal_free(insert->values); 76 | free(insert); 77 | } 78 | -------------------------------------------------------------------------------- /include/column.h: -------------------------------------------------------------------------------- 1 | #ifndef __COLUMN_H_ 2 | #define __COLUMN_H_ 3 | 4 | #include "common.h" 5 | #include "literal.h" 6 | 7 | enum constraint_type { 8 | CONS_NOT_NULL, 9 | CONS_UNIQUE, 10 | CONS_PRIMARY_KEY, 11 | CONS_FOREIGN_KEY, 12 | CONS_DEFAULT, 13 | CONS_AUTO_INCREMENT, 14 | CONS_CHECK, 15 | CONS_SIZE 16 | }; 17 | 18 | typedef struct ForeignKeyRef_t { 19 | const char *col_name, *table_name, *table_col_name; 20 | } ForeignKeyRef_t; 21 | 22 | typedef struct Constraint_t { 23 | enum constraint_type t; 24 | union { 25 | ForeignKeyRef_t ref; 26 | Literal_t *default_val; 27 | unsigned size; 28 | Condition_t *check; 29 | } constraint; 30 | struct Constraint_t *next; 31 | } Constraint_t; 32 | 33 | typedef struct Column_t { 34 | char *name; 35 | enum data_type type; 36 | Constraint_t *constraints; 37 | size_t offset; /* offset in bytes from the beginning of the row */ 38 | struct Column_t *next; 39 | } Column_t; 40 | 41 | typedef struct ColumnReference_t { 42 | char *tableName, *columnName, *columnAlias; 43 | } ColumnReference_t; 44 | 45 | /* constraints on single columns */ 46 | ForeignKeyRef_t ForeignKeyRef_makeFull(const char *cname, ForeignKeyRef_t fkey); 47 | ForeignKeyRef_t ForeignKeyRef_make(const char *foreign_tname, 48 | const char *foreign_cname); 49 | 50 | Constraint_t *NotNull(void); 51 | Constraint_t *AutoIncrement(void); 52 | Constraint_t *PrimaryKey(void); 53 | Constraint_t *ForeignKey(ForeignKeyRef_t fkr); 54 | Constraint_t *Default(Literal_t *val); 55 | Constraint_t *Unique(void); 56 | Constraint_t *Check(Condition_t *cond); 57 | Constraint_t *ColumnSize(unsigned size); 58 | Constraint_t *Constraint_append(Constraint_t *constraints, Constraint_t *constraint); 59 | Column_t *Column_addConstraint(Column_t *column, Constraint_t *constraints); 60 | Column_t *Column(const char *name, enum data_type type, Constraint_t *constraints); 61 | Column_t *Column_append(Column_t *columns, Column_t *column); 62 | 63 | ColumnReference_t *ColumnReference_make(const char *, const char *); 64 | 65 | int Column_compareByName(const void *col1, const void *col2); 66 | void *Column_copy(void *col); 67 | 68 | void Column_getOffsets(Column_t *cols); 69 | size_t Column_getSize(Column_t *col); 70 | 71 | void Constraint_print(void *constraint); 72 | void Constraint_printList(Constraint_t *constraints); 73 | void Column_freeList(Column_t *column); 74 | 75 | /* sets the size of the next column */ 76 | void Column_setSize(ssize_t size); 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DEPS = bin/list.o bin/create.o bin/ra.o bin/literal.o bin/common.o \ 2 | bin/insert.o bin/condition.o bin/expression.o bin/column.o \ 3 | bin/delete.o bin/sra.o bin/vector.o bin/mock_db.o 4 | OPTS = -Wall 5 | 6 | all: init bin/sql_parser 7 | @echo "Finished building!" 8 | 9 | src/lex.yy.c: src/lex/sql.l 10 | $(LEX) -i src/lex/sql.l 11 | @mv lex.yy.c src 12 | 13 | src/y.tab.c: src/yacc/sql.y 14 | $(YACC) -d -v src/yacc/sql.y 15 | @mv y.tab.c src 16 | @mv y.tab.h src 17 | @mkdir -p src/auxfiles 18 | @mv y.output src/auxfiles 19 | 20 | bin/list.o: src/list.c 21 | $(CC) $(OPTS) -c src/list.c -o bin/list.o 22 | 23 | bin/mock_db.o: src/mock_db.c 24 | $(CC) $(OPTS) -c src/mock_db.c -o bin/mock_db.o 25 | 26 | bin/vector.o: src/vector.c 27 | $(CC) $(OPTS) -c src/vector.c -o bin/vector.o 28 | 29 | bin/create.o: src/create.c 30 | $(CC) $(OPTS) -c src/create.c -o bin/create.o 31 | 32 | bin/ra.o: src/ra.c 33 | $(CC) $(OPTS) -c src/ra.c -o bin/ra.o 34 | 35 | bin/sra.o: src/sra.c 36 | $(CC) $(OPTS) -c src/sra.c -o bin/sra.o 37 | 38 | bin/literal.o: src/literal.c 39 | $(CC) $(OPTS) -c src/literal.c -o bin/literal.o 40 | 41 | bin/common.o: src/common.c 42 | $(CC) $(OPTS) -c src/common.c -o bin/common.o 43 | 44 | bin/insert.o: src/insert.c 45 | $(CC) $(OPTS) -c src/insert.c -o bin/insert.o 46 | 47 | bin/delete.o: src/delete.c 48 | $(CC) $(OPTS) -c src/delete.c -o bin/delete.o 49 | 50 | bin/condition.o: src/condition.c 51 | $(CC) $(OPTS) -c src/condition.c -o bin/condition.o 52 | 53 | bin/expression.o: src/expression.c 54 | $(CC) $(OPTS) -c src/expression.c -o bin/expression.o 55 | 56 | bin/column.o: src/column.c 57 | $(CC) $(OPTS) -c src/column.c -o bin/column.o 58 | 59 | deps: bin/list.o bin/vector.o bin/create.o bin/ra.o bin/literal.o \ 60 | bin/common.o bin/insert.o bin/condition.o bin/expression.o \ 61 | bin/column.o bin/delete.o bin/sra.o bin/mock_db.o 62 | 63 | bin/sql_parser: src/y.tab.c src/lex.yy.c deps 64 | @mkdir -p bin 65 | $(CC) $(OPTS) -o bin/sql_parser src/y.tab.c src/lex.yy.c $(DEPS) -ly -ll 66 | 67 | tests: selecttest inserttest deletetest createtest errortest 68 | 69 | selecttest: all 70 | @bin/sql_parser tests/selecttest.sql 71 | 72 | inserttest: all 73 | @bin/sql_parser tests/insertintotest.sql 74 | 75 | deletetest: all 76 | @bin/sql_parser tests/deletefromtest.sql 77 | 78 | createtest: all 79 | @bin/sql_parser tests/createtest.sql 80 | 81 | errortest: all 82 | @bin/sql_parser tests/errortest.sql 83 | 84 | createselecttest: all 85 | @bin/sql_parser tests/createselecttest.sql 86 | 87 | init: 88 | @mkdir -p bin 89 | 90 | cleanup: 91 | -rm test.tab.cacc 92 | -rm y.output 93 | 94 | clean: 95 | rm bin/* -------------------------------------------------------------------------------- /include/expression.h: -------------------------------------------------------------------------------- 1 | #ifndef __EXPRESSION_H_ 2 | #define __EXPRESSION_H_ 3 | 4 | #include "common.h" 5 | #include "literal.h" 6 | #include "column.h" 7 | /* 8 | 9 | data Expression_t = Term String 10 | | Plus Expression_t Expression 11 | | Minus Expression_t Expression 12 | | Multiply Expression_t Expression 13 | | Divide Expression_t Expression 14 | | Concat Expression_t Expression 15 | | Neg Expression 16 | 17 | */ 18 | 19 | typedef struct Expression_s Expression_t; 20 | 21 | enum TermType { 22 | TERM_LITERAL, 23 | TERM_ID, 24 | TERM_NULL, 25 | TERM_COLREF, 26 | TERM_FUNC 27 | }; 28 | 29 | enum FuncType { 30 | FUNC_MAX, 31 | FUNC_MIN, 32 | FUNC_COUNT, 33 | FUNC_AVG, 34 | FUNC_SUM 35 | }; 36 | 37 | typedef struct Func { 38 | enum FuncType t; 39 | Expression_t *expr; 40 | } Func; 41 | 42 | typedef struct ExprTerm { 43 | enum TermType t; 44 | union { 45 | char *id; 46 | Literal_t *val; 47 | ColumnReference_t *ref; 48 | Func f; 49 | }; 50 | } ExprTerm; 51 | 52 | typedef struct ExprBinary { 53 | Expression_t *expr1, *expr2; 54 | } ExprBinary; 55 | 56 | typedef struct ExprUnary { 57 | Expression_t *expr; 58 | } ExprUnary; 59 | 60 | enum ExprType { 61 | EXPR_TERM, 62 | EXPR_PLUS, 63 | EXPR_MINUS, 64 | EXPR_MULTIPLY, 65 | EXPR_DIVIDE, 66 | EXPR_CONCAT, 67 | EXPR_NEG 68 | }; 69 | 70 | struct Expression_s { 71 | enum ExprType t; 72 | union { 73 | ExprTerm term; 74 | ExprBinary binary; 75 | ExprUnary unary; 76 | } expr; 77 | char *alias; 78 | struct Expression_s *next; 79 | }; 80 | 81 | 82 | Expression_t *Term(const char *str); 83 | Expression_t *TermLiteral(Literal_t *val); 84 | Expression_t *TermNull(void); 85 | Expression_t *TermColumnReference(ColumnReference_t *ref); 86 | Expression_t *TermFunction(int type, Expression_t *expr); 87 | 88 | 89 | Expression_t *Plus(Expression_t *, Expression_t *); 90 | Expression_t *Minus(Expression_t *, Expression_t *); 91 | Expression_t *Multiply(Expression_t *, Expression_t *); 92 | Expression_t *Divide(Expression_t *, Expression_t *); 93 | Expression_t *Concat(Expression_t *, Expression_t *); 94 | Expression_t *Neg(Expression_t *); 95 | 96 | Expression_t *append_expression(Expression_t *expr_list, Expression_t *expr); 97 | Expression_t *add_alias(Expression_t *expr, const char *alias); 98 | void Expression_print(Expression_t *); 99 | void Expression_printList(Expression_t *); 100 | 101 | char *Expression_toString(Expression_t *); 102 | 103 | void Expression_free(Expression_t *expr); 104 | void Expression_freeList(Expression_t *); 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /haskell/SRA.hs: -------------------------------------------------------------------------------- 1 | module SRA where 2 | 3 | import qualified Data.Map as M 4 | 5 | data Type = Int | String deriving (Show, Eq) 6 | 7 | type Column = (String, Type) 8 | type TableMap = M.Map String [Column] 9 | type NamedExpr = (Expression, Maybe String) 10 | 11 | -- second string is optional table's name 12 | data Expression = Col String (Maybe String) 13 | | Binary String Expression Expression 14 | | Neg Expression 15 | deriving (Eq) 16 | 17 | data Condition = Bool Bool 18 | | Compare String Expression Expression 19 | | BinaryCond String Condition Condition 20 | | Not Condition 21 | deriving (Eq) 22 | 23 | data JoinType = Inner 24 | | LeftOuter 25 | | RightOuter 26 | | FullOuter 27 | deriving (Eq, Show) 28 | 29 | data SRA = TableName String (Maybe String) 30 | | Project [NamedExpr] SRA 31 | | Select Condition SRA 32 | | NaturalJoin SRA SRA 33 | | Join JoinType SRA SRA (Maybe Condition) 34 | | SRAUnion SRA SRA 35 | | Intersect SRA SRA 36 | | Except SRA SRA 37 | deriving (Eq) 38 | 39 | data RA = Table String [Column] 40 | | Pi [Expression] RA 41 | | Rho Expression String RA 42 | | RhoTable String RA 43 | | Sigma Condition RA 44 | | Union RA RA 45 | | Diff RA RA 46 | | Cross RA RA 47 | deriving (Show, Eq) 48 | 49 | instance Show Expression where 50 | show (Col name Nothing) = name 51 | show (Col name (Just tname)) = tname ++ "." ++ name 52 | show (Binary op ex1 ex2) = show ex1 ++ op ++ show ex2 53 | show (Neg ex) = "-(" ++ show ex ++ ")" 54 | 55 | instance Show SRA where 56 | show (TableName name Nothing) = name 57 | show (TableName name (Just n) ) = name ++ " as " ++ n 58 | show (Project es sra) = "select " ++ show es ++ " from (" ++ show sra ++ ")" 59 | show (Select cond sra) = show sra ++ " where " ++ show cond 60 | show (NaturalJoin l r) = "(" ++ show l ++ ") ⋈ (" ++ show r ++ ")" 61 | show (Join t l r cond) = "(" ++ show l ++ ") " ++ show t ++ 62 | " join (" ++ show r ++ ")" ++ c 63 | where c = case cond of 64 | Nothing -> "" 65 | Just co -> " on " ++ show co 66 | show (SRAUnion l r) = show l ++ " union " ++ show r 67 | show (Except l r) = show l ++ " except " ++ show r 68 | 69 | instance Show Condition where 70 | show (Bool b) = show b 71 | show (Compare op e1 e2) = show e1 ++ op ++ show e2 72 | show (BinaryCond op c1 c2) = "(" ++ show c1 ++ " " ++ 73 | op ++ " " ++ show c2 ++ ")" 74 | show (Not c) = "not (" ++ show c ++ ")" 75 | 76 | 77 | tLookup :: String -> TableMap -> Maybe [Column] 78 | tLookup = M.lookup 79 | 80 | tFromList :: [(String,[Column])] -> TableMap 81 | tFromList = M.fromList -------------------------------------------------------------------------------- /include/list.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIST_H_ 2 | #define __LIST_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | typedef struct ListNode_s { 12 | void *data; 13 | struct ListNode_s *next, *prev; 14 | } ListNode_t; 15 | 16 | typedef struct List_s { 17 | size_t size; 18 | ListNode_t *front, *back; 19 | void (*del) (void *); 20 | pthread_mutex_t lock; 21 | char *(*toString) (void *); 22 | int (*compare)(const void *, const void *); 23 | void (*print) (void *); 24 | void *(*copy) (void *); 25 | size_t elem_size; 26 | const char *name; 27 | } List_t; 28 | 29 | void list_init(List_t *, void (*del) (void *)); 30 | List_t *list_initWith(void *elem); 31 | List_t *list_initWithN(size_t n, ...); 32 | void list_destroy(List_t *l); 33 | 34 | void list_setPrintFunc(List_t *l, void (*print) (void *)); 35 | void list_setCompFunc(List_t *l, int (*compare) (const void *, const void *)); 36 | void list_setCopyFunc(List_t *l, void *(*copy)(void *)); 37 | 38 | void *list_findByInt(List_t *l, int (*toInt) (void *), int i); 39 | void *list_findByString(List_t *l, void (*toString) (char *,void *), const char *str); 40 | void *list_findByPointer(List_t *l, void *ptr); 41 | 42 | bool list_removeByInt(List_t *l, int (*toInt) (void *), int i); 43 | bool list_removeByString(List_t *l, void (*toString) (char *,void *), const char *str); 44 | void *list_removeByPointer(List_t *list, void *ptr); 45 | void *list_removeNode(List_t *list, ListNode_t *node); 46 | void list_removeNodeDelete(List_t *list, ListNode_t *node); 47 | void list_removeByPointerFree(List_t *list, void *ptr); 48 | 49 | void list_print(List_t *l, bool verbose); 50 | void list_printCustom(List_t *l, char * (*toString) (void *), bool freeAfter); 51 | 52 | /* removes all elements for which pred returns false. Does not free data. */ 53 | void list_filter(List_t *l, bool (*pred) (void *)); 54 | /* same as above, but frees data. */ 55 | void list_filterDelete(List_t *l, bool (*pred) (void *)); 56 | /* same as above, but makes a new list. */ 57 | void list_filterNew(List_t *l, bool (*pred) (void *)); 58 | /* replaces all elements e in l with f(e). Does not free original. */ 59 | void list_map(List_t *l, void *(*f) (void *)); 60 | /* same as above, but frees original. */ 61 | void list_mapDelete(List_t *l, void *(*f) (void *)); 62 | /* same as above, but creates a new list */ 63 | List_t list_mapNew(List_t *l, void *(*f) (void *)); 64 | /* sorts; requires compare function to be defined */ 65 | void list_sort(List_t *l); 66 | /* performs a union of two lists using compare function */ 67 | List_t list_union(List_t *l1, List_t *l2); 68 | List_t list_difference(List_t *l1, List_t *l2); 69 | List_t list_intersection(List_t *l1, List_t *l2); 70 | 71 | ListNode_t *listNode_init(void *data, ListNode_t *next, ListNode_t *prev); 72 | 73 | List_t list_deepCopy(List_t *l); 74 | 75 | /* Threadsafe */ 76 | bool list_addBack(List_t *l, void *data); 77 | void *list_removeBack(List_t *l); 78 | bool list_addFront(List_t *l, void *data); 79 | bool list_addInOrder(List_t *l, void *data); 80 | void *list_removeFront(List_t *l); 81 | void list_addBetween(List_t *list, void *data, ListNode_t *prev, ListNode_t *next); 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /src/common.c: -------------------------------------------------------------------------------- 1 | #include "../include/common.h" 2 | 3 | char *typeToString(enum data_type type, char *buf) { 4 | switch (type) { 5 | case TYPE_INT: 6 | sprintf(buf, "int"); 7 | break; 8 | case TYPE_DOUBLE: 9 | sprintf(buf, "double"); 10 | break; 11 | case TYPE_CHAR: 12 | sprintf(buf, "char"); 13 | break; 14 | case TYPE_TEXT: 15 | sprintf(buf, "text"); 16 | break; 17 | } 18 | return buf; 19 | } 20 | 21 | StrList_t *StrList_makeWithNext(const char *str, StrList_t *next) { 22 | StrList_t *list = (StrList_t *)calloc(1, sizeof(StrList_t)); 23 | list->str = strdup(str); 24 | list->next = next; 25 | return list; 26 | } 27 | 28 | void StrList_print(StrList_t *list) { 29 | int first = 1; 30 | printf("["); 31 | while (list) { 32 | if (first) first=0; else printf(", "); 33 | printf("%s", list->str); 34 | list = list->next; 35 | } 36 | printf("]"); 37 | } 38 | 39 | void StrList_free(StrList_t *list) { 40 | while (list) { 41 | StrList_t *next = list->next; 42 | free(list); 43 | list = next; 44 | } 45 | } 46 | 47 | int ind = 0; 48 | 49 | void upInd() { 50 | ind++; 51 | printf("\n"); 52 | } 53 | 54 | void downInd() { 55 | ind--; 56 | printf("\n"); 57 | if (ind < 0) printf("error, ind is < 0"); 58 | } 59 | 60 | #define BUF_SIZE 5000 61 | 62 | void indent_print(const char *format,...) 63 | { 64 | /* indent */ 65 | int i; 66 | va_list argptr; 67 | char buffer[BUF_SIZE]; 68 | if (ind < 1) ind = 0; 69 | for (i=0; inext = list2; 84 | return list1; 85 | } 86 | 87 | StrList_t *StrList_append(StrList_t *list1, StrList_t *list2) { 88 | if (!list1) return list2; 89 | return StrList_app(list1, StrList_append(list1->next, list2)); 90 | } 91 | 92 | StrList_t *StrList_make(char *str) { 93 | StrList_t *list = (StrList_t *)calloc(1, sizeof(StrList_t)); 94 | list->str = str; 95 | return list; 96 | } 97 | 98 | /* just so we declare them */ 99 | void SRA_free(struct SRA_s *); 100 | void Table_free(struct Table_s *); 101 | void Index_free(struct Index_s *); 102 | void Insert_free(struct Insert_s *); 103 | void Delete_free(struct Delete_s *); 104 | 105 | void Query_free(Query_t *query) { 106 | switch (query->t) { 107 | case SELECT_Q: SRA_free(query->sra); return; 108 | case CREATE_T_Q: Table_free(query->table); return; 109 | case CREATE_I_Q: Index_free(query->index); return; 110 | case INSERT_Q: Insert_free(query->insert); return; 111 | case DELETE_Q: Delete_free(query->del); return; 112 | } 113 | } 114 | 115 | Query_t *Query_fromSRA(struct SRA_s *sra) { 116 | return NULL; 117 | } 118 | 119 | Query_t *Query_fromTable(struct Table_s *table) { 120 | return NULL; 121 | } 122 | 123 | Query_t *Query_fromIndex(struct Index_s *index) { 124 | return NULL; 125 | } 126 | 127 | Query_t *Query_fromInsert(struct Insert_s *insert) { 128 | return NULL; 129 | } 130 | 131 | Query_t *Query_fromDelete(struct Delete_s *del) { 132 | return NULL; 133 | } 134 | 135 | /*#define COMMON_TEST*/ 136 | #ifdef COMMON_TEST 137 | int main(int argc, char const *argv[]) 138 | { 139 | StrList_t *list = StrList_makeWithNext("hello", NULL); 140 | const char *strs[] = {"hi", "how", "are", "you"}; 141 | int i; 142 | for (i=0; i 14 | extern int yydebug; 15 | int comment_start_lineno; 16 | %} 17 | 18 | %x BLOCK_COMMENT 19 | %x LINE_COMMENT 20 | 21 | %% 22 | 23 | create { return CREATE; } 24 | table { return TABLE; } 25 | insert { return INSERT; } 26 | into { return INTO; } 27 | select { return SELECT; } 28 | from { return FROM; } 29 | where { return WHERE; } 30 | primary { return PRIMARY; } 31 | foreign { return FOREIGN; } 32 | key { return KEY; } 33 | default { return DEFAULT; } 34 | check { return CHECK; } 35 | not { return NOT; } 36 | null { return TOKEN_NULL; } 37 | and { return AND; } 38 | or { return OR; } 39 | "!=" { return NEQ; } 40 | "<>" { return NEQ; } 41 | ">=" { return GEQ; } 42 | "<=" { return LEQ; } 43 | "||" { return CONCAT; } 44 | references { return REFERENCES; } 45 | order { return ORDER; } 46 | by { return BY; } 47 | delete { return DELETE; } 48 | as { return AS; } 49 | int { return INT; } 50 | integer { return INT; } 51 | double { return DOUBLE; } 52 | char { return CHAR; } 53 | varchar { return VARCHAR; } 54 | text { return TEXT; } 55 | join { return JOIN; } 56 | inner { return INNER; } 57 | outer { return OUTER; } 58 | full { return FULL; } 59 | left { return LEFT; } 60 | right { return RIGHT; } 61 | natural { return NATURAL; } 62 | union { return UNION; } 63 | values { return VALUES; } 64 | auto_increment { return AUTO_INCREMENT; } 65 | asc { return ASC; } 66 | desc { return DESC; } 67 | unique { return UNIQUE; } 68 | in { return IN; } 69 | count { return COUNT; } 70 | sum { return SUM; } 71 | min { return MIN; } 72 | max { return MAX; } 73 | avg { return AVG; } 74 | on { return ON; } 75 | using { return USING; } 76 | true { return TRUE; } 77 | false { return FALSE; } 78 | false { return CASE; } 79 | when { return WHEN; } 80 | bit { return BIT; } 81 | group { return GROUP; } 82 | distinct { return DISTINCT; } 83 | \/\* { BEGIN(BLOCK_COMMENT); comment_start_lineno = yylineno; } 84 | \*\/ { BEGIN(INITIAL); } 85 | <> { fprintf(stderr, "Warning: unclosed comment beginning on line %d\n", 86 | ++comment_start_lineno); return EOF; } 87 | \n { yylineno++; } 88 | . { /* ignore */ } 89 | "--" { BEGIN(LINE_COMMENT); } 90 | \n { BEGIN(INITIAL); yylineno++; } 91 | . { /* ignore */ } 92 | [a-zA-Z][a-zA-Z0-9_]* { yylval.strval = strdup(yytext); 93 | if (yydebug) printf("lexed identifier '%s'\n", yytext); 94 | return IDENTIFIER; } 95 | ((\"[^\"]*\")|(\'[^\']*\')) { yylval.strval = strndup(yytext+1, strlen(yytext) - 2); return STRING_LITERAL; } 96 | [+-]?[0-9]+ { yylval.ival = atoi(yytext); return INT_LITERAL; } 97 | ([0-9]+|([0-9]*\.[0-9]+)([eE][-+]?[0-9]+)?) { yylval.dval = atof(yytext); return DOUBLE_LITERAL; } 98 | [ \t\r]+ { /* ignore */ } 99 | \n { yylineno++; } 100 | . { if (yydebug) printf("lexed single character '%c'\n", yytext[0]); 101 | return yytext[0]; } 102 | 103 | %% -------------------------------------------------------------------------------- /src/vector.c: -------------------------------------------------------------------------------- 1 | #include "../include/vector.h" 2 | 3 | static void vector_resize(vector_t *vec, size_t new_size) { 4 | /* find the nearest power of 2 >= new_size */ 5 | size_t max_size = 1; 6 | while (max_size < new_size) 7 | max_size *= 2; 8 | 9 | void **temp = (void **)calloc(max_size, sizeof(void *)); 10 | memcpy(temp, vec->data, vec->size * sizeof(void *)); 11 | free(vec->data); 12 | vec->data = temp; 13 | vec->max_size = max_size; 14 | } 15 | 16 | vector_t *vector(size_t init_size) { 17 | vector_t *vec = (vector_t *)calloc(1, sizeof(vector_t)); 18 | 19 | /* find the nearest power of 2 >= init_size */ 20 | vec->max_size = 1; 21 | while (vec->max_size < init_size) 22 | vec->max_size *= 2; 23 | 24 | vec->data = (void **)calloc(init_size, sizeof(void *)); 25 | return vec; 26 | } 27 | 28 | vector_t *vector_withData(size_t size, ...) { 29 | vector_t *vec = vector(size); 30 | size_t i; 31 | va_list argp; 32 | va_start(argp, size); 33 | for (i=0; isize == vec->max_size) { 41 | vector_resize(vec, vec->size * 2); 42 | } 43 | vec->data[vec->size++] = elem; 44 | return vec; 45 | } 46 | 47 | void vector_setAt(vector_t *vector, size_t i, void *data) { 48 | if (i < vector->size) 49 | vector->data[i] = data; 50 | else 51 | fprintf(stderr, "vector_setAt error: index %lu out of bounds, nothing added\n", i); 52 | } 53 | 54 | void *vector_pop(vector_t *vec) { 55 | void *ret = vec->data[--vec->size]; 56 | if (vec->size <= (vec->max_size)/2) 57 | vector_resize(vec, (vec->max_size)/2); 58 | return ret; 59 | } 60 | 61 | void *vector_getAt(vector_t *vec, size_t i) { 62 | if (i < vec->size) 63 | return vec->data[i]; 64 | fprintf(stderr, "vector_getAt error: index %lu out of bounds, returning null\n", i); 65 | return NULL; 66 | } 67 | 68 | size_t vector_size(vector_t *vector) { 69 | return vector->size; 70 | } 71 | 72 | void vector_print(vector_t *vector, void (*print) (void *)) { 73 | size_t i; 74 | for (i=0; isize; ++i) { 75 | print(vector->data[i]); 76 | } 77 | } 78 | 79 | void vector_printStrings(vector_t *vector) { 80 | size_t i; 81 | printf("string_vec["); 82 | for (i=0; isize; ++i) { 83 | if (i != 0) printf(", "); 84 | printf("%s", (char *)vector_getAt(vector, i)); 85 | } 86 | printf("]\n"); 87 | } 88 | 89 | void vector_clear(vector_t *vector) { 90 | size_t i; 91 | if (!vector) return; 92 | for (i = 0; i < vector->size; ++i) { 93 | if (vector->data[i] && vector->free) { 94 | vector->free(vector->data[i]); 95 | } 96 | } 97 | vector->size = 0; 98 | vector->max_size = 0; 99 | } 100 | 101 | void vector_setDeleteFunc(vector_t *vector, void (*free) (void *)) { 102 | if (vector) 103 | vector->free = free; 104 | } 105 | 106 | int word_count = 0; 107 | 108 | char *random_str() { 109 | // printf("making a random string\n"); fflush(stdout); 110 | size_t size = rand() % 15, i; 111 | char *str = (char *)malloc(size + 6); 112 | sprintf(str, "%4d ", word_count++); 113 | for (i=5; isize; ++i) 125 | printf("%s\n", (char *)vector_getAt(vec, i)); 126 | for (i=0; i<1000; ++i) { 127 | char *str = random_str(); 128 | printf("pushing %s, size is %lu, max_size %lu\n", str, vec->size, vec->max_size); 129 | vector_push(vec, str); 130 | } 131 | printf("printing vector %lu %lu\n", vec->size, vec->max_size); fflush(stdout); 132 | for (i=0; i<500; ++i) { 133 | char *str = (char *)vector_pop(vec); 134 | printf("popped %s, size is %lu, max_size %lu\n", str, vec->size, vec->max_size); 135 | } 136 | return 0; 137 | } 138 | 139 | void vector_free(vector_t *vector) { 140 | free(vector->data); 141 | } 142 | -------------------------------------------------------------------------------- /src/mock_db.c: -------------------------------------------------------------------------------- 1 | #include "../include/mock_db.h" 2 | #include "../include/list.h" 3 | 4 | static List_t tables; 5 | 6 | static void print_table(void *table_ptr) { 7 | Table_print((Table_t *)table_ptr); 8 | } 9 | 10 | void mock_db_init() { 11 | puts("initializing..."); fflush(stdout); 12 | list_init(&tables, Table_free); 13 | list_setPrintFunc(&tables, print_table); 14 | puts("done..."); fflush(stdout); 15 | } 16 | void add_table(Table_t *table) { 17 | list_addBack(&tables, table); 18 | } 19 | 20 | void remove_table(Table_t *table) { 21 | list_removeByPointerFree(&tables, table); 22 | } 23 | 24 | static void toStringBuf (char *name, void *table) { 25 | strcpy(name, ((Table_t *)table)->name); 26 | } 27 | 28 | static char *toString (void *table) { 29 | return ((Table_t *)table)->name; 30 | } 31 | 32 | Table_t *table_by_name(const char *name) { 33 | return (Table_t *)list_findByString(&tables, toStringBuf, name); 34 | } 35 | 36 | void show_tables() { 37 | list_print(&tables, false); 38 | } 39 | 40 | List_t column_list(Table_t *table) { 41 | List_t res; 42 | Column_t *cols; 43 | cols = table->columns; 44 | /*printf("constructing column list for table %s\n", table_name);*/ 45 | list_init(&res, NULL); 46 | list_setCompFunc(&res, Column_compareByName); 47 | while (cols) { 48 | /*printf("adding column '%s'\n", cols->name);*/ 49 | list_addBack(&res, cols); 50 | cols = cols->next; 51 | } 52 | return res; 53 | } 54 | 55 | static void get_colname (char *name, void *col) { 56 | strcpy(name, ((Column_t *)col)->name); 57 | } 58 | 59 | List_t columns_in_common(Table_t *table1, Table_t *table2) { 60 | List_t cols1 = column_list(table1), 61 | cols2 = column_list(table2), 62 | res; 63 | ListNode_t *runner = cols1.front; 64 | list_init(&res, NULL); 65 | while (runner) { 66 | Column_t *col = (Column_t *)runner->data; 67 | /*printf("trying to find match for %s...\n", col->name); fflush(stdout);*/ 68 | Column_t *other_col = (Column_t *)list_findByString(&cols2, 69 | get_colname, 70 | col->name); 71 | if (other_col) { 72 | /*printf("Found matching column names: %s\n", col->name);*/ 73 | if (col->type == other_col->type) { 74 | /*printf("Types match, adding to result\n");*/ 75 | list_addBack(&res, col); 76 | } 77 | else { 78 | /*printf("Types don't match; ignoring match\n");*/ 79 | } 80 | } 81 | runner = runner->next; 82 | } 83 | return res; 84 | } 85 | 86 | List_t column_list_str(const char *table_name) { 87 | Table_t *table = list_findByString(&tables, toStringBuf, table_name); 88 | List_t res; 89 | Column_t *cols; 90 | if (!table) { 91 | fprintf(stderr, "Error: table %s was not found\n", table_name); 92 | exit(1); 93 | } 94 | cols = table->columns; 95 | /*printf("constructing column list for table %s\n", table_name);*/ 96 | list_init(&res, NULL); 97 | while (cols) { 98 | /*printf("adding column '%s'\n", cols->name);*/ 99 | list_addBack(&res, cols); 100 | cols = cols->next; 101 | } 102 | return res; 103 | } 104 | 105 | List_t columns_in_common_str(const char *table1, const char *table2) { 106 | List_t cols1 = column_list_str(table1), 107 | cols2 = column_list_str(table2), 108 | res; 109 | ListNode_t *runner = cols1.front; 110 | list_init(&res, NULL); 111 | while (runner) { 112 | Column_t *col = (Column_t *)runner->data; 113 | /*printf("trying to find match for %s...\n", col->name); fflush(stdout);*/ 114 | Column_t *other_col = (Column_t *)list_findByString(&cols2, 115 | get_colname, 116 | col->name); 117 | if (other_col) { 118 | /*printf("Found matching column names: %s\n", col->name);*/ 119 | if (col->type == other_col->type) { 120 | /*printf("Types match, adding to result\n");*/ 121 | list_addBack(&res, col); 122 | } 123 | else { 124 | /*printf("Types don't match; ignoring match\n");*/ 125 | } 126 | } 127 | runner = runner->next; 128 | } 129 | return res; 130 | } -------------------------------------------------------------------------------- /include/sra.h: -------------------------------------------------------------------------------- 1 | #ifndef __SRA_H_ 2 | #define __SRA_H_ 3 | 4 | #include "common.h" 5 | #include "expression.h" 6 | #include "create.h" 7 | #include "condition.h" 8 | #include "ra.h" 9 | #include "mock_db.h" 10 | 11 | /* 12 | SQL: 13 | select f.a as Col1, g.a as Col2 from Foo f, Foo g where Col1 != Col2; 14 | 15 | --> To SRA: 16 | Pi([(f,a,Col1), (g,a,Col2)], 17 | Sigma(Col1 != Col2, 18 | Join([(Foo,f), (Foo,g)]) 19 | ) 20 | ) 21 | 22 | --> To RA: 23 | Pi([Col1, Col2], 24 | Sigma(Col1 != Col2, 25 | Cross( 26 | Rho(Foo, f, [Col1]), 27 | Rho(Foo, g, [Col2]) 28 | ) 29 | ) 30 | ) 31 | */ 32 | 33 | /* 34 | data SRA = Table TableReference 35 | | Project SRA [Expression] 36 | | Select SRA Condition 37 | | NaturalJoin [SRA] 38 | | Join [SRA] (Maybe JoinCondition) 39 | | OuterJoin [SRA] OJType (Maybe JoinCondition) 40 | | Union SRA SRA 41 | | Except SRA SRA 42 | | Intersect SRA SRA 43 | 44 | data OJType = Left 45 | | Right 46 | | Full 47 | 48 | data ColumnReference = ColumnReference (Maybe String) String 49 | data TableReference = TableName String (Maybe String) 50 | data JoinCondition = On Condition 51 | | Using [String] 52 | 53 | */ 54 | 55 | typedef struct SRA_s SRA_t; 56 | typedef struct SRAList_s SRAList_t; 57 | typedef struct JoinCondition_s JoinCondition_t; 58 | typedef struct ProjectOption_s ProjectOption_t; 59 | 60 | enum SRAType { 61 | SRA_TABLE, 62 | SRA_PROJECT, 63 | SRA_SELECT, 64 | SRA_NATURAL_JOIN, 65 | SRA_JOIN, 66 | SRA_FULL_OUTER_JOIN, 67 | SRA_LEFT_OUTER_JOIN, 68 | SRA_RIGHT_OUTER_JOIN, 69 | SRA_UNION, 70 | SRA_EXCEPT, 71 | SRA_INTERSECT 72 | }; 73 | 74 | enum OJType { OJ_LEFT, OJ_RIGHT, OJ_FULL }; 75 | enum OrderBy {ORDER_BY_ASC, ORDER_BY_DESC}; 76 | 77 | typedef struct SRA_Table_s { 78 | TableReference_t *ref; /* TableReference_t defined in create.h */ 79 | } SRA_Table_t; 80 | 81 | typedef struct SRA_Project_s { 82 | SRA_t *sra; 83 | Expression_t *expr_list; 84 | Expression_t *order_by; 85 | int distinct; 86 | enum OrderBy asc_desc; 87 | Expression_t *group_by; 88 | } SRA_Project_t; 89 | 90 | typedef struct SRA_Select_s { 91 | SRA_t *sra; 92 | Condition_t *cond; 93 | } SRA_Select_t; 94 | 95 | typedef struct SRA_Join_s { 96 | SRA_t *sra1, *sra2; 97 | JoinCondition_t *opt_cond; 98 | } SRA_Join_t; 99 | 100 | typedef struct SRA_Binary_s { 101 | SRA_t *sra1, *sra2; 102 | } SRA_Binary_t; 103 | 104 | struct SRA_s { 105 | enum SRAType t; 106 | union { 107 | SRA_Table_t table; 108 | SRA_Project_t project; 109 | SRA_Select_t select; 110 | SRA_Join_t join; 111 | SRA_Binary_t binary; 112 | }; 113 | }; 114 | 115 | struct SRAList_s { 116 | SRA_t *sra; 117 | struct SRAList_s *next; 118 | }; 119 | 120 | enum JoinConditionType { 121 | JOIN_COND_ON, JOIN_COND_USING 122 | }; 123 | 124 | struct JoinCondition_s { 125 | enum JoinConditionType t; 126 | union { 127 | Condition_t *on; 128 | StrList_t *col_list; 129 | }; 130 | }; 131 | 132 | struct ProjectOption_s { 133 | Expression_t *order_by, *group_by; 134 | enum OrderBy asc_desc; /* not used by group by */ 135 | }; 136 | 137 | SRA_t *SRATable(TableReference_t *ref); 138 | SRA_t *SRAProject(SRA_t *sra, Expression_t *expr_list); 139 | SRA_t *SRASelect(SRA_t *sra, Condition_t *cond); 140 | SRA_t *SRANaturalJoin(SRA_t *sra1, SRA_t *sra2); 141 | SRA_t *SRAJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond); 142 | SRA_t *SRALeftOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond); 143 | SRA_t *SRARightOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond); 144 | SRA_t *SRAFullOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond); 145 | SRA_t *SRAUnion(SRA_t *sra1, SRA_t *sra2); 146 | SRA_t *SRAExcept(SRA_t *sra1, SRA_t *sra2); 147 | SRA_t *SRAIntersect(SRA_t *sra1, SRA_t *sra2); 148 | 149 | /* the folloing two only work on SRAProject */ 150 | SRA_t *SRA_applyOption(SRA_t *sra, ProjectOption_t *option); 151 | SRA_t *SRA_makeDistinct(SRA_t *sra); 152 | 153 | ProjectOption_t *OrderBy_make(Expression_t *expr, enum OrderBy o); 154 | ProjectOption_t *GroupBy_make(Expression_t *expr); 155 | ProjectOption_t *ProjectOption_combine(ProjectOption_t *order_by, 156 | ProjectOption_t *group_by); 157 | void ProjectOption_print(ProjectOption_t *sra); 158 | 159 | JoinCondition_t *On(Condition_t *cond); 160 | JoinCondition_t *Using(StrList_t *col_list); 161 | 162 | void SRA_free(SRA_t *sra); 163 | 164 | void SRA_print(SRA_t *sra); 165 | void JoinCondition_print(JoinCondition_t *cond); 166 | void JoinCondition_free(JoinCondition_t *cond); 167 | 168 | RA_t *SRA_desugar(SRA_t *sra); 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /src/condition.c: -------------------------------------------------------------------------------- 1 | #include "../include/condition.h" 2 | 3 | void Condition_print(Condition_t *cond) { 4 | if (!cond) return; /* just in case */ 5 | switch(cond->t) { 6 | case RA_COND_EQ: 7 | Expression_print(cond->cond.comp.expr1); 8 | printf(" = "); 9 | Expression_print(cond->cond.comp.expr2); 10 | break; 11 | case RA_COND_LT: 12 | Expression_print(cond->cond.comp.expr1); 13 | printf(" < "); 14 | Expression_print(cond->cond.comp.expr2); 15 | break; 16 | case RA_COND_GT: 17 | Expression_print(cond->cond.comp.expr1); 18 | printf(" > "); 19 | Expression_print(cond->cond.comp.expr2); 20 | break; 21 | case RA_COND_LEQ: 22 | Expression_print(cond->cond.comp.expr1); 23 | printf(" <= "); 24 | Expression_print(cond->cond.comp.expr2); 25 | break; 26 | case RA_COND_GEQ: 27 | Expression_print(cond->cond.comp.expr1); 28 | printf(" >= "); 29 | Expression_print(cond->cond.comp.expr2); 30 | break; 31 | case RA_COND_AND: 32 | Condition_print(cond->cond.binary.cond1); 33 | printf(" and "); 34 | Condition_print(cond->cond.binary.cond2); 35 | break; 36 | case RA_COND_OR: 37 | Condition_print(cond->cond.binary.cond1); 38 | printf(" or "); 39 | Condition_print(cond->cond.binary.cond2); 40 | break; 41 | case RA_COND_NOT: 42 | if (cond->cond.unary.cond->t == RA_COND_EQ) { 43 | Expression_print(cond->cond.unary.cond->cond.comp.expr1); 44 | printf(" != "); 45 | Expression_print(cond->cond.unary.cond->cond.comp.expr2); 46 | } else { 47 | printf("not ("); 48 | Condition_print(cond->cond.unary.cond); 49 | printf(")"); 50 | } 51 | break; 52 | case RA_COND_IN: 53 | Expression_print(cond->cond.in.expr); 54 | printf(" in "); 55 | Literal_printList(cond->cond.in.values_list); 56 | break; 57 | default: 58 | puts("Unknown condession type"); 59 | } 60 | } 61 | 62 | Condition_t *Eq(Expression_t *expr1, Expression_t *expr2) { 63 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 64 | new_cond->t = RA_COND_EQ; 65 | new_cond->cond.comp.expr1 = expr1; 66 | new_cond->cond.comp.expr2 = expr2; 67 | return new_cond; 68 | } 69 | 70 | Condition_t *Lt(Expression_t *expr1, Expression_t *expr2) { 71 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 72 | new_cond->t = RA_COND_LT; 73 | new_cond->cond.comp.expr1 = expr1; 74 | new_cond->cond.comp.expr2 = expr2; 75 | return new_cond; 76 | } 77 | 78 | Condition_t *Gt(Expression_t *expr1, Expression_t *expr2) { 79 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 80 | new_cond->t = RA_COND_GT; 81 | new_cond->cond.comp.expr1 = expr1; 82 | new_cond->cond.comp.expr2 = expr2; 83 | return new_cond; 84 | } 85 | 86 | Condition_t *Leq(Expression_t *expr1, Expression_t *expr2) { 87 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 88 | new_cond->t = RA_COND_LEQ; 89 | new_cond->cond.comp.expr1 = expr1; 90 | new_cond->cond.comp.expr2 = expr2; 91 | return new_cond; 92 | } 93 | 94 | Condition_t *Geq(Expression_t *expr1, Expression_t *expr2) { 95 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 96 | new_cond->t = RA_COND_GEQ; 97 | new_cond->cond.comp.expr1 = (expr1); 98 | new_cond->cond.comp.expr2 = (expr2); 99 | return new_cond; 100 | } 101 | 102 | Condition_t *And(Condition_t *cond1, Condition_t *cond2) { 103 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 104 | new_cond->t = RA_COND_AND; 105 | new_cond->cond.binary.cond1 = cond1; 106 | new_cond->cond.binary.cond2 = cond2; 107 | return new_cond; 108 | } 109 | 110 | Condition_t *Or(Condition_t *cond1, Condition_t *cond2) { 111 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 112 | new_cond->t = RA_COND_OR; 113 | new_cond->cond.binary.cond1 = cond1; 114 | new_cond->cond.binary.cond2 = cond2; 115 | return new_cond; 116 | } 117 | 118 | Condition_t *Not(Condition_t *cond) { 119 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 120 | new_cond->t = RA_COND_NOT; 121 | new_cond->cond.unary.cond = cond; 122 | return new_cond; 123 | } 124 | 125 | Condition_t *In(Expression_t *expr, Literal_t *values_list) { 126 | Condition_t *new_cond = (Condition_t *)calloc(1, sizeof(Condition_t)); 127 | new_cond->t = RA_COND_IN; 128 | new_cond->cond.in.expr = expr; 129 | new_cond->cond.in.values_list = values_list; 130 | return new_cond; 131 | } 132 | 133 | void Condition_free(Condition_t *cond) { 134 | switch (cond->t) { 135 | case RA_COND_EQ: 136 | case RA_COND_LEQ: 137 | case RA_COND_GEQ: 138 | case RA_COND_GT: 139 | case RA_COND_LT: 140 | free(cond->cond.comp.expr1); 141 | free(cond->cond.comp.expr2); 142 | break; 143 | case RA_COND_AND: 144 | case RA_COND_OR: 145 | Condition_free(cond->cond.binary.cond1); 146 | Condition_free(cond->cond.binary.cond2); 147 | break; 148 | case RA_COND_NOT: 149 | Condition_free(cond->cond.unary.cond); 150 | break; 151 | case RA_COND_IN: 152 | Literal_freeList(cond->cond.in.values_list); 153 | Expression_freeList(cond->cond.in.expr); 154 | break; 155 | } 156 | free(cond); 157 | } 158 | -------------------------------------------------------------------------------- /src/create.c: -------------------------------------------------------------------------------- 1 | #include "../include/create.h" 2 | #include "../include/ra.h" 3 | 4 | static Table_t *Table_addPrimaryKey(Table_t *table, const char *col_name); 5 | 6 | Table_t *Table_addForeignKey(Table_t *table, ForeignKeyRef_t fkr) { 7 | if (table != NULL) { 8 | /* find the column that matches the cname given, and add this reference */ 9 | Column_t *col = table->columns; 10 | for (; col; col=col->next) { 11 | if (!strcmp(col->name, fkr.col_name)) { 12 | Column_addConstraint(col, ForeignKey(fkr)); 13 | return table; 14 | } 15 | } 16 | fprintf(stderr, "Error: column %s not in table\n", fkr.col_name); 17 | return table; 18 | } 19 | fprintf(stderr, "Error: table is null\n"); 20 | return table; 21 | } 22 | 23 | Table_t *Table_addKeyDecs(Table_t *table, KeyDec_t *decs) { 24 | StrList_t *slist; 25 | for (; decs; decs = decs->next) { 26 | switch (decs->t) { 27 | case KEY_DEC_PRIMARY: 28 | for (slist = decs->dec.primary_keys; slist; slist = slist->next) { 29 | if (!Table_addPrimaryKey(table, slist->str)) 30 | fprintf(stderr, "Error: column '%s' not found\n", slist->str); 31 | } 32 | break; 33 | case KEY_DEC_FOREIGN: 34 | if (!Table_addForeignKey(table, decs->dec.fkey)) 35 | fprintf(stderr, "Error: column '%s' not found\n", decs->dec.fkey.col_name); 36 | break; 37 | default: 38 | fprintf(stderr, "Unknown declaration type\n"); 39 | } 40 | } 41 | return table; 42 | } 43 | 44 | KeyDec_t *ForeignKeyDec(ForeignKeyRef_t fkr) { 45 | KeyDec_t *kdec = (KeyDec_t *)calloc(1, sizeof(KeyDec_t)); 46 | kdec->t = KEY_DEC_FOREIGN; 47 | kdec->dec.fkey = fkr; 48 | return kdec; 49 | } 50 | KeyDec_t *PrimaryKeyDec(StrList_t *col_names) { 51 | KeyDec_t *kdec = (KeyDec_t *)calloc(1, sizeof(KeyDec_t)); 52 | kdec->t = KEY_DEC_PRIMARY; 53 | kdec->dec.primary_keys = col_names; 54 | return kdec; 55 | } 56 | 57 | Table_t *Table_make(char *name, Column_t *columns, KeyDec_t *decs) { 58 | Table_t *new_table = (Table_t *)calloc(1, sizeof(Table_t)); 59 | new_table->name = name; 60 | new_table->columns = columns; 61 | Column_getOffsets(columns); 62 | return Table_addKeyDecs(new_table, decs); 63 | } 64 | 65 | static Table_t *Table_addPrimaryKey(Table_t *table, const char *col_name) { 66 | Column_t *col = table->columns; 67 | for (; col; col = col->next) { 68 | if (!strcmp(col->name, col_name)) { 69 | Column_addConstraint(col, PrimaryKey()); 70 | return table; 71 | } 72 | } 73 | return NULL; 74 | } 75 | 76 | Table_t *Table_addPrimaryKeys(Table_t *table, vector_t *col_names) { 77 | unsigned i; 78 | for (i=0; i < vector_size(col_names); ++i) { 79 | const char *col_name = (const char *)vector_getAt(col_names, i); 80 | table = Table_addPrimaryKey(table, col_name); 81 | if (!table) { 82 | fprintf(stderr, "Error: column '%s' not found\n", col_name); 83 | return NULL; 84 | } 85 | } 86 | return table; 87 | } 88 | 89 | void Table_free(void *table_vptr) { 90 | Table_t *table = (Table_t *)table_vptr; 91 | Column_freeList(table->columns); 92 | free(table->name); 93 | free(table); 94 | } 95 | 96 | void TableReference_free(TableReference_t *tref) { 97 | if (!tref) { 98 | fprintf(stderr, "Warning: TableReference_free called on null pointer\n"); 99 | return; 100 | } 101 | free(tref->table_name); 102 | /* alias is optional */ 103 | if (tref->alias) 104 | free(tref->alias); 105 | free(tref); 106 | } 107 | 108 | void Table_print(Table_t *table) { 109 | Column_t *col = table->columns; 110 | int first = 1, count = 0; 111 | char buf[100]; 112 | printf("Table %s (\n", table->name); 113 | for (; col; col = col->next) { 114 | if (first) first = 0; else printf(",\n"); 115 | printf("\t%s %s", col->name, typeToString(col->type, buf)); 116 | Constraint_printList(col->constraints); 117 | if (++count == 10) break; 118 | } 119 | printf("\n)\n"); 120 | } 121 | 122 | KeyDec_t *KeyDec_append(KeyDec_t *decs, KeyDec_t *dec) { 123 | decs->next = dec; 124 | return decs; 125 | } 126 | 127 | TableReference_t *TableReference_make(char *table_name, char *alias) { 128 | TableReference_t *ref = (TableReference_t *)calloc(1, sizeof(TableReference_t)); 129 | ref->table_name = table_name; 130 | ref->alias = alias; 131 | return ref; 132 | } 133 | 134 | Index_t *Index_make(char *name, char *table_name, char *column_name) { 135 | Index_t *idx = (Index_t *)calloc(1, sizeof(Index_t)); 136 | idx->name = name; 137 | idx->table_name = table_name; 138 | idx->column_name = column_name; 139 | return idx; 140 | } 141 | 142 | Index_t *Index_makeUnique(Index_t *idx) { 143 | idx->unique = 1; 144 | return idx; 145 | } 146 | 147 | void Index_print(Index_t *idx) { 148 | printf("Index '%s' on %s (%s)", idx->column_name, 149 | idx->table_name, 150 | idx->column_name); 151 | if (idx->unique) printf(", unique"); 152 | puts(""); 153 | } 154 | 155 | void Index_free(Index_t *idx) { 156 | free(idx->name); 157 | free(idx->column_name); 158 | free(idx->table_name); 159 | free(idx); 160 | } 161 | 162 | Create_t *Create_fromTable(Table_t *table) { 163 | Create_t *c = (Create_t *)calloc(1, sizeof(Create_t)); 164 | c->t = CREATE_TABLE; 165 | c->table = table; 166 | return c; 167 | } 168 | 169 | Create_t *Create_fromIndex(Index_t *idx) { 170 | Create_t *c = (Create_t *)calloc(1, sizeof(Create_t)); 171 | c->t = CREATE_INDEX; 172 | c->index = idx; 173 | return c; 174 | } 175 | 176 | void Create_print(Create_t *cre) { 177 | if (cre->t == CREATE_TABLE) 178 | Table_print(cre->table); 179 | else 180 | Index_print(cre->index); 181 | } 182 | void Create_free(Create_t *cre) { 183 | if (cre->t == CREATE_TABLE) 184 | Table_free(cre->table); 185 | else 186 | Index_free(cre->index); 187 | free(cre); 188 | } 189 | -------------------------------------------------------------------------------- /src/ra.c: -------------------------------------------------------------------------------- 1 | #include "../include/ra.h" 2 | 3 | void RA_print(RA_t *ra) { 4 | switch(ra->t) { 5 | case RA_TABLE: 6 | indent_print("Table(%s)", ra->table.name); 7 | break; 8 | case RA_SIGMA: 9 | indent_print("Sigma("); 10 | Condition_print(ra->sigma.cond); 11 | printf(", "); 12 | upInd(); 13 | RA_print(ra->sigma.ra); 14 | downInd(); 15 | indent_print(")"); 16 | break; 17 | case RA_PI: 18 | indent_print("Pi("); 19 | Expression_printList(ra->pi.expr_list); 20 | printf(", "); 21 | upInd(); 22 | RA_print(ra->pi.ra); 23 | downInd(); 24 | indent_print(")"); 25 | break; 26 | case RA_UNION: 27 | indent_print("Union("); 28 | upInd(); 29 | RA_print(ra->binary.ra1); 30 | indent_print(", "); 31 | RA_print(ra->binary.ra2); 32 | downInd(); 33 | indent_print(")"); 34 | break; 35 | case RA_DIFFERENCE: 36 | indent_print("Difference("); 37 | upInd(); 38 | RA_print(ra->binary.ra1); 39 | indent_print(", "); 40 | RA_print(ra->binary.ra2); 41 | downInd(); 42 | indent_print(")"); 43 | break; 44 | case RA_CROSS: 45 | indent_print("Cross("); 46 | upInd(); 47 | RA_print(ra->binary.ra1); 48 | indent_print(", \n"); 49 | RA_print(ra->binary.ra2); 50 | downInd(); 51 | indent_print(")"); 52 | break; 53 | case RA_RHO_EXPR: 54 | indent_print("RhoExpr("); 55 | Expression_print(ra->rho.to_rename); 56 | printf(", \"%s\",", ra->rho.new_name); 57 | upInd(); 58 | RA_print(ra->rho.ra); 59 | downInd(); 60 | indent_print(")"); 61 | break; 62 | case RA_RHO_TABLE: 63 | indent_print("RhoTable("); 64 | upInd(); 65 | RA_print(ra->rho.ra); 66 | downInd(); 67 | printf(", \"%s\"", ra->rho.new_name); 68 | indent_print(")"); 69 | break; 70 | default: 71 | puts("Unknown RA_t type"); 72 | } 73 | } 74 | 75 | RA_t *RA_Table (const char *name) { 76 | Table_t *tbl = table_by_name(name); 77 | if (!tbl) { 78 | fprintf(stderr, "Error: table '%s' does not exist in DB.\n", name); 79 | return NULL; 80 | } 81 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 82 | new_ra->t = RA_TABLE; 83 | new_ra->table.name = strdup(name); 84 | new_ra->columns = column_list(tbl); 85 | return new_ra; 86 | } 87 | 88 | RA_t *RA_Sigma (RA_t *ra, Condition_t *cond) { 89 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 90 | new_ra->t = RA_SIGMA; 91 | new_ra->sigma.cond = cond; 92 | new_ra->sigma.ra = ra; 93 | new_ra->columns = list_deepCopy(&ra->columns); 94 | return new_ra; 95 | } 96 | 97 | RA_t *RA_Pi (RA_t *ra, Expression_t *expr_list) { 98 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 99 | new_ra->t = RA_PI; 100 | new_ra->pi.ra = ra; 101 | new_ra->pi.expr_list = expr_list; 102 | new_ra->columns = list_deepCopy(&ra->columns); 103 | return new_ra; 104 | } 105 | 106 | RA_t *RA_Union (RA_t *ra1, RA_t *ra2) { 107 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 108 | new_ra->t = RA_UNION; 109 | new_ra->binary.ra1 = ra1; 110 | new_ra->binary.ra2 = ra2; 111 | new_ra->columns = list_union(&ra1->columns, &ra2->columns); 112 | return new_ra; 113 | } 114 | 115 | RA_t *RA_Difference (RA_t *ra1, RA_t *ra2) { 116 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 117 | new_ra->t = RA_DIFFERENCE; 118 | new_ra->binary.ra1 = ra1; 119 | new_ra->binary.ra2 = ra2; 120 | new_ra->columns = list_difference(&ra1->columns, &ra2->columns); 121 | return new_ra; 122 | } 123 | 124 | RA_t *RA_Cross (RA_t *ra1, RA_t *ra2) { 125 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 126 | new_ra->t = RA_CROSS; 127 | new_ra->binary.ra1 = ra1; 128 | new_ra->binary.ra2 = ra2; 129 | new_ra->columns = list_union(&ra1->columns, &ra2->columns); 130 | return new_ra; 131 | } 132 | 133 | RA_t *RA_RhoTable (RA_t *ra, const char *new_name) { 134 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 135 | new_ra->t = RA_RHO_TABLE; 136 | new_ra->rho.new_name = strdup(new_name); 137 | new_ra->columns = list_deepCopy(&ra->columns); 138 | return new_ra; 139 | } 140 | 141 | RA_t *RA_RhoExpr (RA_t *ra, Expression_t *expr, const char *new_name) { 142 | RA_t *new_ra = (RA_t *)calloc(1, sizeof(RA_t)); 143 | new_ra->t = RA_RHO_EXPR; 144 | new_ra->rho.to_rename = expr; 145 | new_ra->rho.new_name = strdup(new_name); 146 | new_ra->columns = list_deepCopy(&ra->columns); 147 | return new_ra; 148 | } 149 | 150 | void RA_free(RA_t *ra) { 151 | switch(ra->t) { 152 | case RA_SIGMA: 153 | Condition_free(ra->sigma.cond); 154 | RA_free(ra->sigma.ra); 155 | break; 156 | case RA_PI: 157 | RA_free(ra->pi.ra); 158 | Expression_freeList(ra->pi.expr_list); 159 | break; 160 | case RA_UNION: 161 | case RA_DIFFERENCE: 162 | case RA_CROSS: 163 | RA_free(ra->binary.ra1); 164 | RA_free(ra->binary.ra2); 165 | break; 166 | case RA_RHO_EXPR: 167 | RA_free(ra->rho.ra); 168 | free(ra->rho.new_name); 169 | Expression_free(ra->rho.to_rename); 170 | break; 171 | case RA_RHO_TABLE: 172 | RA_free(ra->rho.ra); 173 | free(ra->rho.new_name); 174 | break; 175 | case RA_TABLE: 176 | free(ra->table.name); 177 | break; 178 | } 179 | free(ra); 180 | } 181 | 182 | #ifdef RA_TEST 183 | 184 | int main(int argc, char const *argv[]) 185 | { 186 | RA_t *ra1 = Pi(Sigma(Table("bazzle"), And(Eq("foo", "bar"), Lt("popo", "toto"))), 187 | 3, "foo", "bar", "baz"), 188 | *ra2 = Pi(Rho(Sigma(Cross(Rho(Table("Foo"), "f", 1, "Col1"), 189 | Rho(Table("Foo"), "g", 1, "Col2")),Not(Eq("Col1","Col2")) 190 | ),"res", 2, "Col1", "Col2"),2, "Col1", "Col2"); 191 | RA_print(ra1); 192 | RA_print(ra2); 193 | RA_free(ra1); 194 | RA_free(ra2); 195 | return 0; 196 | } 197 | 198 | #endif 199 | -------------------------------------------------------------------------------- /src/column.c: -------------------------------------------------------------------------------- 1 | #include "../include/column.h" 2 | #include "../include/condition.h" 3 | 4 | static ssize_t size_constraint = -1; 5 | 6 | Constraint_t *NotNull(void) { 7 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 8 | con->t = CONS_NOT_NULL; 9 | return con; 10 | } 11 | 12 | Constraint_t *AutoIncrement(void) { 13 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 14 | con->t = CONS_AUTO_INCREMENT; 15 | return con; 16 | } 17 | 18 | Constraint_t *PrimaryKey(void) { 19 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 20 | con->t = CONS_PRIMARY_KEY; 21 | return con; 22 | } 23 | 24 | Constraint_t *ForeignKey(ForeignKeyRef_t fkr) { 25 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 26 | con->t = CONS_FOREIGN_KEY; 27 | con->constraint.ref = fkr; 28 | return con; 29 | } 30 | 31 | Constraint_t *Default(Literal_t *val) { 32 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 33 | con->t = CONS_DEFAULT; 34 | con->constraint.default_val = val; 35 | return con; 36 | } 37 | 38 | Constraint_t *Unique(void) { 39 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 40 | con->t = CONS_UNIQUE; 41 | return con; 42 | } 43 | 44 | Constraint_t *Check(Condition_t *cond) { 45 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 46 | con->t = CONS_CHECK; 47 | con->constraint.check = cond; 48 | return con; 49 | } 50 | 51 | Constraint_t *ColumnSize(unsigned size) { 52 | Constraint_t *con = (Constraint_t *)calloc(1, sizeof(Constraint_t)); 53 | con->t = CONS_SIZE; 54 | con->constraint.size = size; 55 | return con; 56 | } 57 | 58 | static void deleteConstraint_ts(Constraint_t *constraint) { 59 | if (constraint) { 60 | Constraint_t *next = constraint->next; 61 | switch (constraint->t) { 62 | case CONS_DEFAULT: 63 | Literal_free(constraint->constraint.default_val); 64 | break; 65 | case CONS_CHECK: 66 | Condition_free(constraint->constraint.check); 67 | break; 68 | default: 69 | break; 70 | } 71 | deleteConstraint_ts(next); 72 | } 73 | } 74 | 75 | void Column_freeList(Column_t *column) { 76 | if (column) { 77 | Column_t *next = column->next; 78 | free(column->name); 79 | deleteConstraint_ts(column->constraints); 80 | free(column); 81 | Column_freeList(next); 82 | } 83 | } 84 | 85 | size_t Column_getSize(Column_t *col) { 86 | Constraint_t *cons = col->constraints; 87 | while (cons) { 88 | if (cons->t == CONS_SIZE) 89 | return cons->constraint.size; 90 | cons = cons->next; 91 | } 92 | switch (col->type) { 93 | case TYPE_CHAR: return sizeof(char); 94 | case TYPE_DOUBLE: return sizeof(double); 95 | case TYPE_INT: return sizeof(int); 96 | case TYPE_TEXT: return 250; /* default text length */ 97 | } 98 | } 99 | 100 | static void Column_getOffsets_r(Column_t *cols, size_t offset) { 101 | if (!cols) return; 102 | cols->offset = offset; 103 | Column_getOffsets_r(cols->next, Column_getSize(cols)); 104 | } 105 | 106 | void Column_getOffsets(Column_t *cols) { 107 | Column_getOffsets_r(cols, 0); 108 | } 109 | 110 | void Constraint_printList(Constraint_t *constraints) { 111 | int first = 1; 112 | if (constraints) { 113 | printf(" ["); 114 | for (; constraints; constraints = constraints->next) { 115 | if (first) { 116 | first = 0; 117 | } else { 118 | printf(", "); 119 | } 120 | Constraint_print(constraints); 121 | } 122 | printf("]"); 123 | } 124 | } 125 | 126 | ForeignKeyRef_t ForeignKeyRef_makeFull(const char *cname, ForeignKeyRef_t fkey) { 127 | fkey.col_name = cname; 128 | return fkey; 129 | } 130 | 131 | ForeignKeyRef_t ForeignKeyRef_make(const char *foreign_tname, 132 | const char *foreign_cname) { 133 | ForeignKeyRef_t fkey; 134 | fkey.col_name = NULL; 135 | fkey.table_name = foreign_tname; 136 | fkey.table_col_name = foreign_cname; 137 | return fkey; 138 | } 139 | 140 | Column_t *Column(const char *name, enum data_type type, Constraint_t *constraints) { 141 | Column_t *new_column = (Column_t *)calloc(1, sizeof(Column_t)); 142 | new_column->name = strdup(name); 143 | new_column->type = type; 144 | new_column->constraints = constraints; 145 | /* if the parser found a size constraint, then size_constraitn will be > 0 */ 146 | if (size_constraint > 0) { 147 | Constraint_append(new_column->constraints, ColumnSize(size_constraint)); 148 | size_constraint = -1; 149 | } 150 | return new_column; 151 | } 152 | 153 | Column_t *Column_addConstraint(Column_t *column, Constraint_t *constraints) { 154 | column->constraints = Constraint_append(column->constraints, constraints); 155 | return column; 156 | } 157 | 158 | Constraint_t *Constraint_append(Constraint_t *constraints, Constraint_t *constraint) { 159 | if (constraints == NULL) 160 | constraints = constraint; 161 | else 162 | constraints->next = constraint; 163 | return constraints; 164 | } 165 | 166 | void Constraint_print(void *constraint_voidp) { 167 | Constraint_t *constraint = (Constraint_t *)constraint_voidp; 168 | switch(constraint->t) { 169 | case CONS_DEFAULT: 170 | printf("Default: "); 171 | Literal_print(constraint->constraint.default_val); 172 | break; 173 | case CONS_PRIMARY_KEY: 174 | printf("Primary Key"); 175 | break; 176 | case CONS_UNIQUE: 177 | printf("Unique"); 178 | break; 179 | case CONS_FOREIGN_KEY: 180 | printf("Foreign key (%s, %s)", constraint->constraint.ref.table_name, 181 | constraint->constraint.ref.table_col_name); 182 | break; 183 | case CONS_AUTO_INCREMENT: 184 | printf("Auto increment"); 185 | break; 186 | case CONS_NOT_NULL: 187 | printf("Not null"); 188 | break; 189 | case CONS_CHECK: 190 | printf("Check: "); 191 | Condition_print(constraint->constraint.check); 192 | break; 193 | case CONS_SIZE: 194 | printf("Size: %u", constraint->constraint.size); 195 | break; 196 | default: 197 | printf("Unknown constraint type"); 198 | } 199 | } 200 | 201 | void Column_setSize(ssize_t size) { 202 | size_constraint = size; 203 | } 204 | 205 | static Column_t *app_col(Column_t *col1, Column_t *col2) { 206 | col1->next = col2; 207 | return col1; 208 | } 209 | 210 | Column_t *Column_append(Column_t *col1, Column_t *col2) { 211 | if (!col1) return col2; 212 | return app_col(col1, Column_append(col1->next, col2)); 213 | } 214 | 215 | ColumnReference_t *ColumnReference_make(const char *tname, const char *cname) { 216 | ColumnReference_t *ref = (ColumnReference_t *)calloc(1, sizeof(ColumnReference_t)); 217 | if (tname) ref->tableName = strdup(tname); 218 | if (cname) ref->columnName = strdup(cname); 219 | return ref; 220 | } 221 | 222 | int Column_compareByName(const void *c1, const void *c2) { 223 | return strcmp(((Column_t *)c1)->name, ((Column_t *)c2)->name); 224 | } 225 | 226 | void *Column_copy(void *col) { 227 | Column_t *copy = (Column_t *)malloc(sizeof(Column_t)); 228 | memcpy(copy, col, sizeof(Column_t)); 229 | copy->name = strdup(((Column_t *)col)->name); 230 | copy->next = NULL; /* just in case */ 231 | return copy; 232 | } 233 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chiDB SQL Compiler Front-End 2 | 3 | This is a SQL Parser and compiler frontent which can parse a reasonably large subset of SQL. The parser is generated by lex and yacc, and all other code is in C. The parser generates an abstract syntax tree (AST) representation of SQL, which is based on Relational Algebra, extended to fit closer with SQL. This extended relational algebra is termed SRA (sugared relational algebra), as it itself can be compiled into more or less pure relational algebra. 4 | 5 | Since relational algebra primarily deals with only queries, there are separate data structures to deal with `Create Table`, `Create Index`, `Insert Into`, and `Delete From` commands (and possibly other things in the future). These don't use RA, but they do, for example, use the Expression part of the abstract syntax tree. 6 | 7 | ## Methodologies 8 | 9 | The parser is is generated by Lex (a lexer generator) and Yacc (a parser generator). All other code is written in C, along with prototypes in Haskell. 10 | 11 | ### Lex 12 | 13 | The lexer contains a series of regular expressions defining tokens of the language and associating them with instructions to be performed for when a given regular expression is found. The Lex utility converts a Lex file into C code which will scan an input for the next token and perform whatever instructions are to be performed when that token is found. These can be as simple as returning some integral value indicating what type of token it is (which is drawn out of an enum generated by yacc), or more complicated instructions such as dealing with comments, converting escape sequences, or storing the value of a constant expression (like an integer or string) or name of a variable. 14 | 15 | ### Yacc 16 | 17 | Yacc is used to generate the parser. A yacc file is similar to a lex file in that it has a series of definitions of structures, and instructions for when those structures are encountered. However, in Yacc the structures are grammatical rules, and the instructions that accompany them are usually to build a parse tree (abstract syntax tree), which is a representation of the language in data structures. Yacc allows us to rapidly construct a correct and efficient parser, which gives us detailed error messages (either in parsing or in writing the parser itself), and saves us lots of time compared to a hand-written parser. 18 | 19 | ## Sugared Relational Algebra 20 | 21 | In the ChiDB SQL parser, the instructions in the Yacc file produce a representation which we call Sugared Relational Algebra (SRA). This is an extended form of relational algebra; with several differences. For example: 22 | 23 | * it contains as primitives multiple join types (Inner Join, Full/Left/Right outer join, Natural Join) and Intersection 24 | 25 | * it has no rho operator, because all renaming and aliasing is contained alongside the expressions. For example in SRA, a `Project` structure has a list of expressions which can optionally have aliases, but in RA, a `Pi` structure has only expressions, and any aliases must be done with a Rho operator. 26 | 27 | * SRA is also allowed to use `*` as in SQL to stand for "all columns in the table". The step which translates from SRA to RA will expand all `*`s into the actual list of columns. 28 | 29 | The translation from SRA to RA is called desugaring. Here's an example. Say we have a table `t` which has columns `w`, `x`, and `y`: 30 | 31 | ``` 32 | SQL: 33 | SELECT *, x+y as z from t; 34 | 35 | SRA: 36 | Project([*, (Add(x, y), z)], 37 | Table(t)) 38 | 39 | RA: 40 | Pi([w, x, y, z], 41 | Rho(Add(x,y), z, 42 | Pi([w, x, y, Add(x,y)], 43 | Table(t)))) 44 | ``` 45 | 46 | A more complicated example: 47 | 48 | ``` 49 | SQL: 50 | select f.a as Col1, g.a as Col2 from Foo f, Foo g where Col1 != Col2; 51 | 52 | SRA: 53 | Project([(f,a,Col1), (g,a,Col2)], 54 | Select(Col1 != Col2, 55 | Join( 56 | Table(Foo,f), 57 | Table(Foo,g) 58 | ) 59 | ) 60 | ) 61 | 62 | Pi([Col1, Col2], 63 | Sigma(Col1 != Col2, 64 | Cross( 65 | Rho(a, Col1, 66 | RhoTable(f, 67 | Table(Foo) 68 | ) 69 | ), 70 | Rho(a, Col2, 71 | RhoTable(g, 72 | Table(Foo) 73 | ) 74 | ) 75 | ) 76 | ) 77 | ) 78 | ``` 79 | 80 | ## Current Status 81 | 82 | Currently, we have a good deal of machinery in place. The parser and lexer are finished, and all of the data structures for both SRA and RA are written, along with constructors, destructors, pretty printers, etc. There is a doubly-linked list library which is quite robust (though not fully thread-safe, but this could be reasonably easily accomplished). There is also a vector library which might be useful, for example, for string-building, or if a vector-based instead of list-based representation of columns, expressions, etc. is desired. 83 | 84 | ## Future Directions 85 | 86 | The largest thing missing at this point is the desugarer. However, we have one written up in the Haskell language (`Desugar.hs`, `SRA.hs`, examples are in `Tests.hs`), and the translation of this code into C should be fairly straightforward. Haskell allows us to represent and manipulate structures of RA and SRA with ease, conciseness and correctness, and I humbly recommend any modification of the code to be prototyped in Haskell prior to coding it in C. 87 | 88 | Finally, certain things like `ORDER BY`, `GROUP BY`, `SELECT DISTINCT`, sized types, etc are supported by the parser, but have little or nothing in the underlying C code to apply these. The implementation of these and other features are left to future programmers, who may wish to simply disallow them and throw errors when they are used. 89 | 90 | ## Installation and Usage 91 | 92 | This has only been run on Mac OS X, although it should work without incident on Linux as well. Windows will presumably require installation of Lex and Yacc, along with some meddling with the makefile (if indeed make can be run on Windows), to account for different filename conventions. 93 | 94 | Clone into the git repository: 95 | 96 | ``` 97 | > git clone https://github.com/thinkpad20/sql.git 98 | ``` 99 | 100 | Compile: 101 | 102 | ``` 103 | > cd sql 104 | > make 105 | ``` 106 | 107 | See some example tests with 108 | 109 | ``` 110 | > make test 111 | ``` 112 | 113 | Parse an arbitrary SQL file with 114 | 115 | ``` 116 | > bin/sql_parser 117 | ``` 118 | 119 | ## Condensed chiSQL grammar 120 | 121 | Here's a summary of the SQL subset we parse in chiSQL: 122 | 123 | ``` 124 | sql_queries ::= ((create_table|insert_into|delete_from|select)? ';')+ 125 | 126 | create_table ::= CREATE TABLE table_name '(' column_dec_list ')' 127 | 128 | column_dec_list ::= column_dec (',' column_dec)* 129 | 130 | column_dec ::= column_name type ('(' INT_LITERAL ')')? (constraint)* | key_dec 131 | 132 | type ::= INT | DOUBLE | CHAR | VARCHAR | TEXT 133 | 134 | constraint ::= NOT NULL | UNIQUE | PRIMARY KEY 135 | | FOREIGN KEY REFERENCES table_name ('(' column_name ')')? 136 | | DEFAULT (literal_value | AUTO INCREMENT) 137 | | CHECK bool_expression 138 | 139 | key_dec ::= PRIMARY KEY '(' column_names_list ')' 140 | | FOREIGN KEY '(' column_name ')' REFERENCES table_name ('(' column_name ')')? 141 | 142 | insert_into ::= INSERT INTO table_name 143 | ('(' column_name (',' column_name)* ')')? 144 | VALUES '(' literal_value (',' literal_value)* ')' 145 | 146 | literal_value ::= INT_LITERAL | DOUBLE_LITERAL | STRING_LITERAL 147 | 148 | delete_from ::= DELETE FROM table_name where_condition 149 | 150 | select ::= select_statement ((UNION | INTERSECT | EXCEPT) select_statement)* 151 | 152 | select_statement ::= SELECT (DISTINCT)? expression_list FROM table (select_constraint)* 153 | | '(' select_statement ')' 154 | 155 | select_constraint ::= ON bool_expression 156 | | USING '(' column_names_list ')' 157 | | WHERE bool_expression 158 | | ORDER BY column_name (ASC | DESC)? 159 | 160 | bool_expression ::= bool_term ((AND | OR) bool_term)* 161 | 162 | bool_term ::= expression ('=' | '>' | '<' | GEQ | LEQ | NEQ) expression 163 | | expression IN '(' select ')' 164 | | '(' bool_expression ')' 165 | | NOT bool_term 166 | 167 | expression_list ::= expression (',' expression)* 168 | 169 | expression ::= term (('+'|'-'|'*'|'/') term)* 170 | 171 | term ::= literal_value 172 | | (table_name '.')? (column_name | '*' | NULL) 173 | | '(' expression ')' 174 | | (COUNT | SUM | AVG | MIN | MAX) '(' expression ')' 175 | | '-' term 176 | 177 | column_name ::= IDENTIFIER 178 | 179 | table_name ::= IDENTIFIER 180 | 181 | table ::= table_name ((AS)? IDENTIFIER)? ((',' | join) table_name)* 182 | 183 | join ::= (CROSS | INNER | (LEFT | RIGHT) (OUTER)? | NATURAL)? JOIN 184 | ``` -------------------------------------------------------------------------------- /haskell/Desugar.hs: -------------------------------------------------------------------------------- 1 | module Desugar where 2 | 3 | import SRA 4 | import Data.List 5 | 6 | desugar :: TableMap -> SRA -> RA 7 | -- desugaring a table name means looking up the table, which 8 | -- will be a list of (name, type) pairs, and creating a list 9 | -- of expressions from those pairs, where each expression is 10 | -- a single column (toExpr) 11 | desugar tables (TableName name Nothing) = case tLookup name tables of 12 | Just cols -> Table name cols 13 | Nothing -> error $ "No table named " ++ name 14 | 15 | desugar tables (TableName name (Just n)) = 16 | -- if there is a table alias given, we wrap our table in a Rho 17 | -- operator. 18 | RhoTable n $ desugar tables (TableName name Nothing) 19 | 20 | desugar tables (Project nes sra) = 21 | -- desugaring a projection, we need to get the list of expressions 22 | -- we want to project, and then apply a rho operator for each one 23 | -- in the list that has an alias. We also need to check for *s. 24 | let 25 | ra = desugar tables sra 26 | -- get whatever we need to project, meaning fst of each (String, Type) 27 | toProject = (map fst nes) 28 | expand :: RA -> [Expression] 29 | -- expand returns a list of all of the expressions in an RA 30 | expand ra = map (\(n,_) -> Col n Nothing) $ getCols ra 31 | -- starExpand replaces any stars in a list with their expressions 32 | starExpand [] = [] 33 | -- later we'll specifically address t.* vs u.* 34 | starExpand ((Col "*" _):es) = expand ra ++ starExpand es 35 | starExpand (e:es) = e : starExpand es 36 | -- build the full star-expanded Pi statement 37 | project = Pi (starExpand toProject) ra 38 | doRename :: [NamedExpr] -> RA -> RA 39 | -- if we have a renaming, we need to apply a rho operator 40 | doRename ((e, Just n):nes) ra = Rho e n (doRename nes ra) 41 | -- if we have an expression which is not renamed, don't add anything 42 | doRename ((_, Nothing):nes) ra = doRename nes ra 43 | -- if no more expressions to do, return RA 44 | doRename [] ra = ra 45 | in 46 | doRename nes project 47 | 48 | -- desugaring a select operator is simple 49 | desugar tables (Select cond sra) = Sigma cond (desugar tables sra) 50 | 51 | desugar tables (Join Inner l r jc) = 52 | -- for an Inner join, it's a cross product but it might have a join 53 | -- condition specified, which would mean a Sigma operator. 54 | let (raL, raR) = (desugar tables l, desugar tables r) in 55 | case jc of 56 | Nothing -> Cross raL raR 57 | Just cond -> Sigma cond $ Cross raL raR 58 | 59 | desugar tables (NaturalJoin l r) = 60 | -- we have this formula from wikipedia for the natural join: 61 | -- L ⋈ R = 62 | -- π([l1,..lN ∪ r1,..,rN], 63 | -- σ(l.a1 = r.a1 ^ l.a2 = r.a2 ^ ... ^ r.aN = l.aN, 64 | -- (L × R))) 65 | -- in other words, we find all of the columns that they have in common, 66 | -- and select on equality on those columns, and then project the union of 67 | -- columns (so no repetition) 68 | let 69 | (raL, raR) = (desugar tables l, desugar tables r) 70 | cross = (Cross raL raR) 71 | (colsL, colsR) = (getCols raL, getCols raR) 72 | -- get list of columns in both and those in either 73 | inBoth = colsL `intersect` colsR 74 | inEither = colsL `union` colsR 75 | -- to construct the equalities, we need to perform this transformation 76 | -- Col "foo" Nothing -> 77 | -- Compare "=" (Col "foo" (Just "l")) (Col "foo" (Just "r")) 78 | -- this means that we need to know the names of the L and R tables 79 | (lName, rName) = (getName raL, getName raR) 80 | trans (n, _) = Compare "=" (Col n $ Just lName) (Col n $ Just rName) 81 | eqs = map trans inBoth 82 | in 83 | -- if no columns in common, then it's just a cross product 84 | if inBoth == [] then cross 85 | -- otherwise, AND all of the equality statements together, put it in a 86 | -- Sigma statement, and project whichever columns are in either 87 | else Pi (map colToExpr $ inEither) 88 | (Sigma (foldr1 (BinaryCond "and") eqs) cross) 89 | 90 | -- for left/right outer joins, we have some magic juju identities... 91 | desugar tables (Join LeftOuter l r jc) = 92 | -- Wikipedia states that L (left outer join) R can be expressed as 93 | -- L loj R = (L ⋈ R) ∪ ((L - π([l1,..,lN], L ⋈ R))×{(ω,..,ω)}) 94 | -- where [l1,..,lN] are attributes of L, and ωs are a relation consisting 95 | -- of the columns which are in R but not in L. 96 | let 97 | (raL, raR) = (desugar tables l, desugar tables r) 98 | (colsL, colsR) = (getCols raL, getCols raR) 99 | exprsL = map colToExpr colsL 100 | inRightOnly = colsR \\ colsL -- (\\) is set difference 101 | ω = Table "ω" inRightOnly 102 | natJoin = (desugar tables $ NaturalJoin l r) 103 | res = Union natJoin (Cross (Diff (raL) (Pi exprsL natJoin)) ω) 104 | in 105 | case jc of Nothing -> res 106 | Just cond -> Sigma cond res 107 | 108 | -- right outer same as above but swapped 109 | desugar tables (Join RightOuter l r jc) = 110 | let 111 | (raL, raR) = (desugar tables l, desugar tables r) 112 | (colsL, colsR) = (getCols raL, getCols raR) 113 | exprsR = map colToExpr colsR 114 | inLeftOnly = colsL \\ colsR 115 | ω = Table "ω" inLeftOnly 116 | natJoin = (desugar tables $ NaturalJoin l r) 117 | res = Union natJoin (Cross (Diff (raR) (Pi exprsR natJoin)) ω) 118 | in 119 | case jc of Nothing -> res 120 | Just cond -> Sigma cond res 121 | 122 | -- full outer join can be defined as (L foj R) = (L loj R) U (L roj R) 123 | desugar tables (Join FullOuter l r jc) = 124 | let 125 | oj typ = desugar tables (Join typ l r Nothing) 126 | res = (oj LeftOuter) `Union` (oj RightOuter) 127 | in 128 | case jc of Nothing -> res 129 | Just cond -> Sigma cond res 130 | 131 | desugar tables (SRAUnion l r) = (desugar tables l) `Union` (desugar tables r) 132 | desugar tables (Intersect l r) = 133 | let (raL, raR) = (desugar tables l, desugar tables r) in 134 | (raL `Union` raR) `Diff` (raL `Diff` raR) `Diff` (raR `Diff` raL) 135 | desugar tables (Except l r) = (desugar tables l) `Diff` (desugar tables r) 136 | 137 | 138 | -- convenience function which creates an expression out of a column 139 | colToExpr :: (String, Type) -> Expression 140 | colToExpr (n,_) = Col n Nothing 141 | 142 | getName :: RA -> String 143 | -- throws an error if RA is not either a Table or a RhoTable 144 | getName (Table name _) = name 145 | getName (RhoTable name _) = name 146 | getName _ = error "Every derived table must have its own alias" 147 | 148 | getCols :: RA -> [Column] 149 | -- getCols will return a list of (String, Type) from any table; 150 | -- a.k.a. the names and types of the columns to be found in this table. 151 | 152 | -- in the simplest case, we already know the answer 153 | getCols (Table name cols) = cols 154 | 155 | -- when we have a project, we are specifying the names of columns to be 156 | -- carried through. We simply use the show function to get a string 157 | -- representation of the expression, and we use the getType function to 158 | -- find the type; from this we can construct a (String, Type) tuple, which 159 | -- is a column. Table name qualifiers will be kept. 160 | getCols (Pi exprs ra) = map toCols exprs where 161 | toCols expr = (show expr, getType expr ra) 162 | 163 | -- to get the columns of a renamed table, we first get the columns 164 | -- of its subtable. Then, say we're renaming x+y to z. We find the column 165 | -- called x+y and store it instead as z. Others, we don't touch. 166 | getCols (Rho e s ra) = 167 | let 168 | cols = getCols ra 169 | rename :: Column -> Column 170 | rename (cName, t) = if cName == show e then (show e, t) else (cName, t) 171 | in 172 | map rename cols 173 | 174 | -- renaming a table doesn't change the name of its columns, so we leave it 175 | getCols (RhoTable _ ra) = getCols ra 176 | 177 | -- similarly with a selection 178 | getCols (Sigma _ ra) = getCols ra 179 | 180 | -- in a union or difference, we require the columns be the same, so we make 181 | -- sure that's the case and if so, we return it. 182 | getCols (Union l r) = 183 | let (colsL, colsR) = (getCols l, getCols r) in 184 | if colsL == colsR then colsL 185 | else error $ "Mismatched attributes: " ++ show (getCols l) ++ 186 | " != " ++ show (getCols r) 187 | 188 | -- difference is same as union here. 189 | getCols (Diff l r) = getCols (Union l r) 190 | 191 | -- with a cross product, the columns will be concatenated. 192 | getCols (Cross l r) = getCols l ++ getCols r 193 | 194 | 195 | getType :: Expression -> RA -> Type 196 | -- Small type checking function for getCols; also will report type 197 | -- mismatch errors or non-existent columns. 198 | getType (Col name _) ra = case lookup name (getCols ra) of 199 | -- todo: examine cases where table name is specified 200 | Just t -> t 201 | Nothing -> error $ "Column " ++ name ++ " does not exist" 202 | getType (Neg e) ra = case getType e ra of 203 | Int -> Int 204 | String -> error "Can't apply (-) to a string" 205 | getType (Binary "||" e1 e2) ra = 206 | case (getType e1 ra, getType e2 ra) of 207 | (String, String) -> String 208 | otherwise -> error "Can't concat anything but strings" 209 | getType (Binary op e1 e2) ra = 210 | let (t1, t2) = (getType e1 ra, getType e2 ra) in 211 | if t1 == t2 then t1 else error $ "Mismatched types in " ++ op -------------------------------------------------------------------------------- /src/expression.c: -------------------------------------------------------------------------------- 1 | #include "../include/expression.h" 2 | 3 | Expression_t *Term(const char *str) { 4 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 5 | new_expr->t = EXPR_TERM; 6 | new_expr->expr.term.t = TERM_ID; 7 | new_expr->expr.term.id = strdup(str); 8 | return new_expr; 9 | } 10 | 11 | Expression_t *TermLiteral(Literal_t *val) { 12 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 13 | new_expr->t = EXPR_TERM; 14 | new_expr->expr.term.t = TERM_LITERAL; 15 | new_expr->expr.term.val = val; 16 | return new_expr; 17 | } 18 | 19 | Expression_t *TermNull(void) { 20 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 21 | new_expr->t = EXPR_TERM; 22 | new_expr->expr.term.t = TERM_NULL; 23 | return new_expr; 24 | } 25 | 26 | Expression_t *TermColumnReference(ColumnReference_t *ref) { 27 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 28 | new_expr->t = EXPR_TERM; 29 | new_expr->expr.term.t = TERM_COLREF; 30 | new_expr->expr.term.ref = ref; 31 | return new_expr; 32 | } 33 | 34 | Expression_t *TermFunction(int functype, Expression_t *expr) { 35 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 36 | new_expr->t = EXPR_TERM; 37 | new_expr->expr.term.t = TERM_FUNC; 38 | new_expr->expr.term.f.t = functype; 39 | new_expr->expr.term.f.expr = expr; 40 | return new_expr; 41 | } 42 | 43 | static void Term_print(ExprTerm term) { 44 | switch (term.t) { 45 | case TERM_ID: 46 | printf("%s", term.id); 47 | break; 48 | case TERM_LITERAL: 49 | Literal_print(term.val); 50 | break; 51 | case TERM_NULL: 52 | printf("NULL"); 53 | break; 54 | case TERM_COLREF: 55 | if (term.ref->tableName) 56 | printf("%s.", term.ref->tableName); 57 | printf("%s", term.ref->columnName); 58 | break; 59 | case TERM_FUNC: 60 | switch (term.f.t) { 61 | case FUNC_AVG: 62 | printf("AVG("); 63 | Expression_print(term.f.expr); 64 | printf(")"); 65 | break; 66 | case FUNC_COUNT: 67 | printf("COUNT("); 68 | Expression_print(term.f.expr); 69 | printf(")"); 70 | break; 71 | case FUNC_MAX: 72 | printf("MAX("); 73 | Expression_print(term.f.expr); 74 | printf(")"); 75 | break; 76 | case FUNC_MIN: 77 | printf("MIN("); 78 | Expression_print(term.f.expr); 79 | printf(")"); 80 | break; 81 | case FUNC_SUM: 82 | printf("SUM("); 83 | Expression_print(term.f.expr); 84 | printf(")"); 85 | break; 86 | default: 87 | printf("Unknown function"); 88 | } 89 | break; 90 | default: 91 | printf("Unknown term type"); 92 | } 93 | } 94 | 95 | static void Term_free(ExprTerm term) { 96 | switch (term.t) { 97 | case TERM_ID: 98 | free(term.id); 99 | break; 100 | case TERM_LITERAL: 101 | Literal_free(term.val); 102 | break; 103 | case TERM_NULL: 104 | break; 105 | case TERM_COLREF: 106 | if (term.ref->tableName) 107 | free(term.ref->tableName); 108 | free(term.ref->columnName); 109 | break; 110 | case TERM_FUNC: 111 | switch (term.f.t) { 112 | case FUNC_AVG: 113 | Expression_free(term.f.expr); 114 | break; 115 | case FUNC_COUNT: 116 | Expression_free(term.f.expr); 117 | break; 118 | case FUNC_MAX: 119 | Expression_free(term.f.expr); 120 | break; 121 | case FUNC_MIN: 122 | Expression_free(term.f.expr); 123 | break; 124 | case FUNC_SUM: 125 | Expression_free(term.f.expr); 126 | break; 127 | default: 128 | printf("Can't delete unknown function\n"); 129 | } 130 | default: 131 | printf("Can't delete, unknown term type"); 132 | } 133 | } 134 | 135 | Expression_t *Binary(Expression_t *expr1, Expression_t *expr2, enum ExprType t) { 136 | Expression_t *expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 137 | expr->t = t; 138 | expr->expr.binary.expr1 = expr1; 139 | expr->expr.binary.expr2 = expr2; 140 | return expr; 141 | } 142 | 143 | Expression_t *Plus(Expression_t *expr1, Expression_t *expr2) { 144 | return Binary(expr1, expr2, EXPR_PLUS); 145 | } 146 | 147 | Expression_t *Minus(Expression_t *expr1, Expression_t *expr2) { 148 | return Binary(expr1, expr2, EXPR_MINUS); 149 | } 150 | 151 | Expression_t *Multiply(Expression_t *expr1, Expression_t *expr2) { 152 | return Binary(expr1, expr2, EXPR_MULTIPLY); 153 | } 154 | 155 | Expression_t *Divide(Expression_t *expr1, Expression_t *expr2) { 156 | return Binary(expr1, expr2, EXPR_DIVIDE); 157 | } 158 | 159 | Expression_t *Concat(Expression_t *expr1, Expression_t *expr2) { 160 | return Binary(expr1, expr2, EXPR_CONCAT); 161 | } 162 | 163 | Expression_t *Neg(Expression_t *expr) { 164 | Expression_t *new_expr = (Expression_t *)calloc(1, sizeof(Expression_t)); 165 | new_expr->t = EXPR_NEG; 166 | new_expr->expr.unary.expr = expr; 167 | return new_expr; 168 | } 169 | 170 | enum TermType Expression_type(Expression_t *expr) { 171 | enum TermType t1, t2; 172 | switch (expr->t) { 173 | case EXPR_TERM: 174 | return expr->expr.term.t; 175 | case EXPR_NEG: 176 | return Expression_type(expr->expr.unary.expr); 177 | default: 178 | t1 = Expression_type(expr->expr.binary.expr1); 179 | t2 = Expression_type(expr->expr.binary.expr2); 180 | if (t1 != t2) { 181 | fprintf(stderr, "Type mismatch on binary expression:\n"); 182 | Expression_print(expr->expr.binary.expr1); 183 | fprintf(stderr, "\nis not the same type as\n"); 184 | Expression_print(expr->expr.binary.expr2); 185 | exit(1); 186 | } 187 | return t1; 188 | } 189 | } 190 | 191 | void Expression_print(Expression_t *expr) { 192 | if (expr->t != EXPR_TERM) printf("("); 193 | switch (expr->t) { 194 | case EXPR_CONCAT: 195 | Expression_print(expr->expr.binary.expr1); 196 | printf(" || "); 197 | Expression_print(expr->expr.binary.expr2); 198 | break; 199 | case EXPR_PLUS: 200 | Expression_print(expr->expr.binary.expr1); 201 | printf(" + "); 202 | Expression_print(expr->expr.binary.expr2); 203 | break; 204 | case EXPR_MINUS: 205 | Expression_print(expr->expr.binary.expr1); 206 | printf(" - "); 207 | Expression_print(expr->expr.binary.expr2); 208 | break; 209 | case EXPR_MULTIPLY: 210 | Expression_print(expr->expr.binary.expr1); 211 | printf(" * "); 212 | Expression_print(expr->expr.binary.expr2); 213 | break; 214 | case EXPR_DIVIDE: 215 | Expression_print(expr->expr.binary.expr1); 216 | printf(" / "); 217 | Expression_print(expr->expr.binary.expr2); 218 | break; 219 | case EXPR_NEG: 220 | printf("-"); 221 | Expression_print(expr->expr.unary.expr); 222 | break; 223 | case EXPR_TERM: 224 | Term_print(expr->expr.term); 225 | break; 226 | default: 227 | printf("(Unknown expression type '%d')", expr->t); 228 | } 229 | if (expr->t != EXPR_TERM) printf(")"); 230 | if (expr->alias) printf(" as %s", expr->alias); 231 | } 232 | 233 | static Expression_t *app_exp(Expression_t *e1, Expression_t *e2) { 234 | e1->next = e2; 235 | return e1; 236 | } 237 | 238 | Expression_t *append_expression(Expression_t *e1, Expression_t *e2) { 239 | if (!e1) return e2; 240 | return app_exp(e1, append_expression(e1->next, e2)); 241 | } 242 | 243 | void Expression_printList (Expression_t *expr) { 244 | int first = 1; 245 | printf("["); 246 | while (expr) { 247 | if (first) first = 0; else printf(", "); 248 | Expression_print(expr); 249 | expr = expr->next; 250 | } 251 | printf("]"); 252 | } 253 | 254 | Expression_t *add_alias(Expression_t *expr, const char *alias) { 255 | if (alias) expr->alias = strdup(alias); 256 | return expr; 257 | } 258 | 259 | void Expression_free(Expression_t *expr) { 260 | switch (expr->t) { 261 | case EXPR_CONCAT: 262 | Expression_free(expr->expr.binary.expr1); 263 | Expression_free(expr->expr.binary.expr2); 264 | break; 265 | case EXPR_PLUS: 266 | Expression_free(expr->expr.binary.expr1); 267 | Expression_free(expr->expr.binary.expr2); 268 | break; 269 | case EXPR_MINUS: 270 | Expression_free(expr->expr.binary.expr1); 271 | Expression_free(expr->expr.binary.expr2); 272 | break; 273 | case EXPR_MULTIPLY: 274 | Expression_free(expr->expr.binary.expr1); 275 | Expression_free(expr->expr.binary.expr2); 276 | break; 277 | case EXPR_DIVIDE: 278 | Expression_free(expr->expr.binary.expr1); 279 | Expression_free(expr->expr.binary.expr2); 280 | break; 281 | case EXPR_NEG: 282 | Expression_free(expr->expr.unary.expr); 283 | break; 284 | case EXPR_TERM: 285 | Term_free(expr->expr.term); 286 | break; 287 | default: 288 | printf("Can't delete unknown expression type '%d')", expr->t); 289 | } 290 | if (expr->alias) free(expr->alias); 291 | free(expr); 292 | } 293 | 294 | void Expression_freeList(Expression_t *expr) { 295 | 296 | } 297 | 298 | /*#define EXPRESSION_TEST*/ 299 | #ifdef EXPRESSION_TEST 300 | int main(int argc, char const *argv[]) 301 | { 302 | Expression_t *a = Term("a"), 303 | *b = Term("b"), 304 | *c = TermLiteral(litInt(5)), 305 | *plus = Plus(a,b), 306 | *mult = Multiply(plus, c); 307 | Expression_print(mult); 308 | append_expression(mult, plus); 309 | puts(""); 310 | Expression_printList(mult); 311 | puts(""); 312 | return 0; 313 | } 314 | #endif 315 | -------------------------------------------------------------------------------- /src/yacc/sql.y: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | #include 4 | #include 5 | #include "../include/common.h" 6 | #include "../include/create.h" 7 | #include "../include/vector.h" 8 | #include "../include/literal.h" 9 | #include "../include/insert.h" 10 | #include "../include/ra.h" 11 | #include "../include/sra.h" 12 | #include "../include/condition.h" 13 | #include "../include/expression.h" 14 | #include "../include/delete.h" 15 | #include "../include/mock_db.h" 16 | 17 | #define YYERROR_VERBOSE 18 | 19 | void yyerror(const char *s); 20 | int yylex(void); 21 | extern int yylineno; 22 | #define YYDEBUG 0 23 | int yydebug=0; 24 | int to_print = 0; 25 | int num_stmts = 0; 26 | 27 | %} 28 | 29 | %union { 30 | double dval; 31 | int ival; 32 | char *strval; 33 | Literal_t *lval; 34 | Constraint_t *constr; 35 | ForeignKeyRef_t fkeyref; 36 | Column_t *col; 37 | KeyDec_t *kdec; 38 | StrList_t *slist; 39 | Insert_t *ins; 40 | Condition_t *cond; 41 | Expression_t *expr; 42 | ColumnReference_t *colref; 43 | Delete_t *del; 44 | SRA_t *sra; 45 | ProjectOption_t *opt; 46 | TableReference_t *tref; 47 | Table_t *tbl; 48 | JoinCondition_t *jcond; 49 | Index_t *idx; 50 | Create_t *cre; 51 | } 52 | 53 | %token CREATE TABLE INSERT INTO SELECT FROM WHERE FULL 54 | %token PRIMARY FOREIGN KEY DEFAULT CHECK NOT TOKEN_NULL 55 | %token AND OR NEQ GEQ LEQ REFERENCES ORDER BY DELETE 56 | %token AS INT DOUBLE CHAR VARCHAR TEXT USING CONSTRAINT 57 | %token JOIN INNER OUTER LEFT RIGHT NATURAL CROSS UNION 58 | %token VALUES AUTO_INCREMENT ASC DESC UNIQUE IN ON 59 | %token COUNT SUM AVG MIN MAX INTERSECT EXCEPT DISTINCT 60 | %token CONCAT TRUE FALSE CASE WHEN DECLARE BIT GROUP 61 | %token INDEX 62 | %token IDENTIFIER 63 | %token STRING_LITERAL 64 | %token DOUBLE_LITERAL 65 | %token INT_LITERAL 66 | 67 | %type column_type bool_op comp_op select_combo 68 | %type function_name opt_distinct join opt_unique 69 | %type column_name table_name opt_alias 70 | %type index_name column_name_or_star 71 | %type column_names_list opt_column_names 72 | %type opt_constraints constraints constraint 73 | %type literal_value values_list in_statement 74 | %type references_stmt 75 | %type column_dec column_dec_list 76 | %type key_dec opt_key_dec_list key_dec_list 77 | %type insert_into 78 | %type condition bool_term where_condition opt_where_condition 79 | %type expression mulexp primary expression_list term 80 | %type column_reference 81 | %type delete_from 82 | %type select select_statement table 83 | %type order_by group_by opt_options 84 | %type table_ref 85 | %type create_table 86 | %type join_condition opt_join_condition 87 | %type create_index 88 | %type create 89 | 90 | %start sql_queries 91 | 92 | %% 93 | 94 | sql_queries 95 | : sql_query 96 | | sql_queries sql_query 97 | ; 98 | 99 | sql_query 100 | : sql_line ';' { /*printf("parsed %d valid SQL statements\n", ++num_stmts);*/ } 101 | ; 102 | 103 | sql_line 104 | : create { /*Create_print($1);*/ } 105 | | select { SRA_print($1); puts(""); } 106 | | insert_into { Insert_print($1); } 107 | | delete_from { Delete_print($1); } 108 | | /* empty */ 109 | ; 110 | 111 | create 112 | : create_table { $$ = Create_fromTable($1); } 113 | | create_index { $$ = Create_fromIndex($1); } 114 | ; 115 | 116 | create_index 117 | : CREATE opt_unique INDEX index_name ON table_name '(' column_name ')' 118 | { 119 | $$ = Index_make($4, $6, $8); 120 | if ($2 == UNIQUE) $$ = Index_makeUnique($$); 121 | } 122 | ; 123 | 124 | opt_unique 125 | : UNIQUE { $$ = UNIQUE; } 126 | | /* empty */ { $$ = 0; } 127 | ; 128 | 129 | index_name 130 | : IDENTIFIER 131 | ; 132 | 133 | create_table 134 | : CREATE TABLE table_name '(' column_dec_list opt_key_dec_list ')' 135 | { 136 | $$ = Table_make($3, $5, $6); 137 | add_table($$); 138 | } 139 | ; 140 | 141 | column_dec_list 142 | : column_dec 143 | | column_dec_list ',' column_dec { $$ = Column_append($1, $3); } 144 | ; 145 | 146 | column_dec 147 | : column_name column_type opt_constraints 148 | { 149 | /*printf("column '%s' (%d)\n", $1, $2);*/ 150 | $$ = Column($1, $2, $3); 151 | } 152 | ; 153 | 154 | column_type 155 | : INT { $$ = TYPE_INT; } 156 | | DOUBLE { $$ = TYPE_DOUBLE; } 157 | | CHAR { $$ = TYPE_CHAR; } 158 | | VARCHAR { $$ = TYPE_TEXT; } 159 | | TEXT { $$ = TYPE_TEXT; } 160 | | column_type '(' INT_LITERAL ')' 161 | { 162 | $$ = $1; 163 | if ($3 <= 0) { 164 | fprintf(stderr, "Error: sizes must be greater than 0 (line %d).\n", yylineno); 165 | exit(1); 166 | } 167 | Column_setSize($3); 168 | } 169 | ; 170 | 171 | opt_key_dec_list 172 | : ',' key_dec_list {$$ = $2;} 173 | | /* empty */ {$$ = NULL; } 174 | ; 175 | 176 | key_dec_list 177 | : key_dec 178 | | key_dec_list ',' key_dec { $$ = KeyDec_append($1, $3); } 179 | ; 180 | 181 | key_dec 182 | : PRIMARY KEY '(' column_names_list ')' 183 | { $$ = PrimaryKeyDec($4); } 184 | | FOREIGN KEY '(' column_name ')' references_stmt 185 | {$$ = ForeignKeyDec(ForeignKeyRef_makeFull($4, $6)); } 186 | 187 | references_stmt 188 | : REFERENCES table_name { $$ = ForeignKeyRef_make($2, NULL); } 189 | | REFERENCES table_name '(' column_name ')' { $$ = ForeignKeyRef_make($2, $4); } 190 | ; 191 | 192 | opt_constraints 193 | : constraints 194 | | /* empty */ { $$ = NULL; } 195 | ; 196 | 197 | constraints 198 | : constraint { $$ = Constraint_append(NULL, $1); 199 | /*printf("new constraint:"); 200 | Constraint_print($1); 201 | printf("created a vector of constraints\n"); 202 | Constraint_printList($$);*/ 203 | } 204 | | constraint constraints { $$ = Constraint_append($2, $1); 205 | /*printf("appended a constraint\n"); 206 | Constraint_printList($$);*/ 207 | } 208 | ; 209 | 210 | constraint 211 | : NOT TOKEN_NULL { $$ = NotNull(); } 212 | | UNIQUE { $$ = Unique(); } 213 | | PRIMARY KEY { $$ = PrimaryKey(); } 214 | | FOREIGN KEY references_stmt { $$ = ForeignKey($3); } 215 | | DEFAULT literal_value { $$ = Default($2); } 216 | | AUTO_INCREMENT { $$ = AutoIncrement(); } 217 | | CHECK condition { $$ = Check($2); } 218 | ; 219 | 220 | select 221 | : select_statement 222 | | select select_combo select_statement 223 | { 224 | $$ = ($2 == UNION) ? SRAUnion($1, $3) : 225 | ($2 == INTERSECT) ? SRAIntersect($1, $3) : 226 | SRAExcept($1, $3); 227 | } 228 | ; 229 | 230 | select_combo 231 | : UNION {$$ = UNION;} 232 | | INTERSECT {$$ = INTERSECT;} 233 | | EXCEPT {$$ = EXCEPT;} 234 | ; 235 | 236 | select_statement 237 | : SELECT opt_distinct expression_list FROM table opt_where_condition opt_options 238 | { 239 | if ($6 != NULL) 240 | $$ = SRAProject(SRASelect($5, $6), $3); 241 | else 242 | $$ = SRAProject($5, $3); 243 | if ($7 != NULL) 244 | $$ = SRA_applyOption($$, $7); 245 | if ($2 == DISTINCT) 246 | $$ = SRA_makeDistinct($$); 247 | } 248 | | '(' select_statement ')' { $$ = $2; } 249 | ; 250 | 251 | opt_distinct 252 | : DISTINCT { $$ = DISTINCT;} 253 | | /* empty */ { $$ = 0; } 254 | ; 255 | 256 | opt_options 257 | : order_by {$$ = $1; } 258 | | group_by {$$ = $1; } 259 | | order_by group_by {$$ = ProjectOption_combine($1, $2);} 260 | | group_by order_by {$$ = ProjectOption_combine($1, $2);} 261 | | /* empty */ { $$ = NULL; } 262 | ; 263 | 264 | opt_where_condition 265 | : where_condition {$$ = $1;} 266 | | /* empty */ {$$ = NULL;} 267 | ; 268 | 269 | where_condition 270 | : WHERE condition { $$ = $2; } 271 | ; 272 | 273 | group_by 274 | : GROUP BY expression { $$ = GroupBy_make($3); } 275 | ; 276 | 277 | order_by 278 | : ORDER BY expression { $$ = OrderBy_make($3, ORDER_BY_ASC); } 279 | | ORDER BY expression ASC { $$ = OrderBy_make($3, ORDER_BY_ASC); } 280 | | ORDER BY expression DESC { $$ = OrderBy_make($3, ORDER_BY_DESC); } 281 | ; 282 | 283 | condition 284 | : bool_term { $$ = $1; /*printf("Found condition: \n"); Condition_print($$); puts(""); */} 285 | | bool_term bool_op condition 286 | { 287 | $$ = ($2 == AND) ? And($1, $3) : Or($1, $3); 288 | /* printf("Found condition: \n"); Condition_print($$); puts(""); */ 289 | } 290 | ; 291 | 292 | bool_term 293 | : expression comp_op expression 294 | { 295 | $$ = ($2 == '=') ? Eq($1, $3) : 296 | ($2 == '>') ? Gt($1, $3) : 297 | ($2 == '<') ? Lt($1, $3) : 298 | ($2 == GEQ) ? Leq($1, $3) : 299 | ($2 == LEQ) ? Geq($1, $3) : 300 | Not(Eq($1, $3)); 301 | } 302 | | expression in_statement { $$ = In($1, $2); } 303 | | '(' condition ')' { $$ = $2; } 304 | | NOT bool_term { $$ = Not($2); } 305 | ; 306 | 307 | in_statement 308 | : IN '(' values_list ')' { $$ = $3; } 309 | | IN '(' select ')' 310 | { 311 | fprintf(stderr, "****WARNING: IN SELECT statement not yet supported\n"); 312 | } 313 | ; 314 | 315 | bool_op 316 | : AND { $$ = AND; } 317 | | OR { $$ = OR; } 318 | ; 319 | 320 | comp_op 321 | : '=' { $$ = '='; } 322 | | '>' { $$ = '>'; } 323 | | '<' { $$ = '<'; } 324 | | GEQ { $$ = GEQ; } 325 | | LEQ { $$ = LEQ; } 326 | | NEQ { $$ = NEQ; } 327 | ; 328 | 329 | expression_list 330 | : expression opt_alias { $$ = add_alias($1, $2); } 331 | | expression_list ',' expression opt_alias { $$ = append_expression($1, add_alias($3, $4)); } 332 | ; 333 | 334 | expression 335 | : expression '+' mulexp { $$ = Plus($1, $3); } 336 | | expression '-' mulexp { $$ = Minus($1, $3); } 337 | | mulexp { $$ = $1; } 338 | ; 339 | 340 | mulexp 341 | : mulexp '*' primary { $$ = Multiply($1, $3); } 342 | | mulexp '/' primary { $$ = Divide($1, $3); } 343 | | mulexp CONCAT primary { $$ = Concat($1, $3); } 344 | | primary { $$ = $1; } 345 | ; 346 | 347 | primary 348 | : '(' expression ')' { $$ = $2; } 349 | | '-' primary { $$ = Neg($2); } 350 | | term { $$ = $1; } 351 | ; 352 | 353 | term 354 | : literal_value { $$ = TermLiteral($1); } 355 | | TOKEN_NULL { $$ = TermNull(); } 356 | | column_reference { $$ = TermColumnReference($1); } 357 | | function_name '(' expression ')' { $$ = TermFunction($1, $3); } 358 | ; 359 | 360 | column_reference 361 | : column_name_or_star { $$ = ColumnReference_make(NULL, $1); } 362 | | table_name '.' column_name_or_star 363 | { $$ = ColumnReference_make($1, $3); } 364 | ; 365 | 366 | opt_alias 367 | : AS IDENTIFIER { $$ = $2; } 368 | | IDENTIFIER 369 | | /* empty */ { $$ = NULL; } 370 | ; 371 | 372 | function_name 373 | : COUNT { $$ = FUNC_COUNT; } 374 | | SUM { $$ = FUNC_SUM; } 375 | | AVG { $$ = FUNC_AVG; } 376 | | MIN { $$ = FUNC_MIN; } 377 | | MAX{ $$ = FUNC_MAX; } 378 | ; 379 | 380 | column_name_or_star 381 | : '*' { $$ = strdup("*"); } 382 | | column_name 383 | ; 384 | 385 | column_name 386 | : IDENTIFIER 387 | ; 388 | 389 | table_name 390 | : IDENTIFIER 391 | ; 392 | 393 | table 394 | : table_ref { $$ = SRATable($1); } 395 | | table default_join table_ref opt_join_condition { $$ = SRAJoin($1, SRATable($3), $4); } 396 | | table join table_ref opt_join_condition 397 | { 398 | switch ($2) { 399 | case SRA_NATURAL_JOIN: 400 | $$ = SRANaturalJoin($1, SRATable($3)); 401 | if ($4) { 402 | fprintf(stderr, 403 | "Line %d: WARNING: a NATURAL join cannot have an ON " 404 | "or USING clause. This will be ignored.\n", yylineno); 405 | } 406 | break; 407 | case SRA_LEFT_OUTER_JOIN: 408 | $$ = SRALeftOuterJoin($1, SRATable($3), $4); break; 409 | case SRA_RIGHT_OUTER_JOIN: 410 | $$ = SRARightOuterJoin($1, SRATable($3), $4); break; 411 | case SRA_FULL_OUTER_JOIN: 412 | $$ = SRAFullOuterJoin($1, SRATable($3), $4); break; 413 | } 414 | } 415 | ; 416 | 417 | opt_join_condition 418 | : join_condition 419 | | /* empty */ { $$ = NULL; } 420 | ; 421 | 422 | join_condition 423 | : ON condition { $$ = On($2); } 424 | | USING '(' column_names_list ')' { $$ = Using($3); } 425 | ; 426 | 427 | table_ref 428 | : table_name opt_alias { $$ = TableReference_make($1, $2);} 429 | ; 430 | 431 | join 432 | : LEFT opt_outer JOIN {$$ = SRA_LEFT_OUTER_JOIN; } 433 | | RIGHT opt_outer JOIN { $$ = SRA_RIGHT_OUTER_JOIN; } 434 | | FULL opt_outer JOIN { $$ = SRA_FULL_OUTER_JOIN; } 435 | | NATURAL JOIN { $$ = SRA_NATURAL_JOIN; } 436 | ; 437 | 438 | default_join 439 | : ',' | JOIN | CROSS JOIN | INNER JOIN 440 | ; 441 | 442 | opt_outer 443 | : OUTER 444 | | /* empty */ 445 | ; 446 | 447 | insert_into 448 | : INSERT INTO table_name opt_column_names VALUES '(' values_list ')' 449 | { 450 | $$ = Insert_make(RA_Table($3), $4, $7); 451 | } 452 | ; 453 | 454 | opt_column_names 455 | : '(' column_names_list ')' { $$ = $2; } 456 | | /* empty */ { $$ = NULL; } 457 | ; 458 | 459 | column_names_list 460 | : column_name { $$ = StrList_make($1); } 461 | | column_names_list ',' column_name { $$ = StrList_append($1, StrList_make($3)); } 462 | ; 463 | 464 | values_list 465 | : literal_value 466 | | values_list ',' literal_value 467 | { 468 | $$ = Literal_append($1, $3); 469 | 470 | } 471 | ; 472 | 473 | literal_value 474 | : INT_LITERAL { $$ = litInt($1); } 475 | | DOUBLE_LITERAL { $$ = litDouble($1); } 476 | | STRING_LITERAL 477 | { 478 | if (strlen($1) == 1) 479 | $$ = litChar($1[0]); 480 | else 481 | $$ = litText($1); 482 | } 483 | ; 484 | 485 | delete_from 486 | : DELETE FROM table_name where_condition 487 | { 488 | $$ = Delete_make($3, $4); 489 | } 490 | ; 491 | 492 | %% 493 | 494 | void yyerror(const char *s) { 495 | fprintf(stderr, "%s (line %d)\n", s, yylineno); 496 | } 497 | 498 | List_t *tables = NULL; 499 | 500 | int main(int argc, char **argv) { 501 | int i; 502 | puts("Welcome to chiSQL! :)"); 503 | puts("calling init"); 504 | mock_db_init(); 505 | for (i=1; i 1 ? "cols" : "col"); 527 | 528 | puts("Thanks for using chiSQL :)\n"); 529 | return 0; 530 | } -------------------------------------------------------------------------------- /src/list.c: -------------------------------------------------------------------------------- 1 | #include "../include/list.h" 2 | 3 | 4 | void printNode(ListNode_t *node); 5 | void changeSize(List_t *list, size_t change); 6 | void setFront(List_t *list, ListNode_t *node); 7 | void setBack(List_t *list, ListNode_t *node); 8 | void changeSize(List_t *l, size_t change); 9 | void setFront(List_t *list, ListNode_t *node); 10 | void setBack(List_t *list, ListNode_t *node); 11 | void list_addNode(List_t *list, ListNode_t *node, ListNode_t *prev, ListNode_t *next); 12 | 13 | void list_init(List_t *list, void (*del) (void *)) { 14 | memset(list, 0, sizeof(List_t)); 15 | pthread_mutex_init(&list->lock, NULL); 16 | list->del = del; 17 | } 18 | 19 | void setFront(List_t *list, ListNode_t *node) { 20 | list->front = node; 21 | if (node) node->prev = NULL; 22 | } 23 | 24 | void setBack(List_t *list, ListNode_t *node) { 25 | list->back = node; 26 | if (node) node->next = NULL; 27 | } 28 | 29 | void *list_removeNode(List_t *list, ListNode_t *node) { 30 | pthread_mutex_lock (&list->lock); 31 | 32 | /* if there's a previous pointer on this node, then set it to the next */ 33 | if (node->prev) node->prev->next = node->next; 34 | /* if there isn't, then this was the front of the list */ 35 | else setFront(list, node->next); 36 | 37 | /* if there's a next pointer, then set it to the previous */ 38 | if (node->next) node->next->prev = node->prev; 39 | /* if there isn't, then this was the back of the list */ 40 | else setBack(list, node->prev); 41 | 42 | void *data = node->data; 43 | free(node); 44 | 45 | list->size--; 46 | 47 | if (list->size == 1) { 48 | if (list->front) 49 | list->back = list->front; 50 | else 51 | list->front = list->back; 52 | } 53 | 54 | pthread_mutex_unlock(&list->lock); 55 | 56 | return data; 57 | } 58 | 59 | void list_removeNodeDelete(List_t *list, ListNode_t *node) { 60 | void *data = list_removeNode(list, node); 61 | if (list->del) list->del(data); 62 | } 63 | 64 | void list_addBetween(List_t *list, void *data, ListNode_t *prev, ListNode_t *next) { 65 | ListNode_t *node = listNode_init(data, prev, next); 66 | if (node) list_addNode(list, node, prev, next); 67 | } 68 | 69 | 70 | bool list_addInOrder(List_t *l, void *data){ 71 | ListNode_t *curr; 72 | 73 | /* If no compare function, return false */ 74 | if (!l->compare) return false; 75 | curr = l->front; 76 | while (curr) { 77 | if (l->compare(data,curr->data) < 0){ 78 | list_addBetween(l, data, curr->prev, curr); 79 | return true; 80 | } 81 | curr = curr->next; 82 | } /* finished w/o adding anything, biggest, add to end */ 83 | return list_addBack(l,data); 84 | } 85 | 86 | void list_addNode(List_t *list, ListNode_t *node, ListNode_t *prev, ListNode_t *next) { 87 | pthread_mutex_lock (&list->lock); 88 | 89 | if (prev) 90 | prev->next = node; 91 | else 92 | setFront(list, node); 93 | 94 | if (next) 95 | next->prev = node; 96 | else 97 | setBack(list, node); 98 | 99 | list->size++; 100 | pthread_mutex_unlock(&list->lock); 101 | } 102 | 103 | void changeSize(List_t *list, size_t change) { 104 | pthread_mutex_lock (&list->lock); 105 | list->size += change; 106 | pthread_mutex_unlock(&list->lock); 107 | } 108 | 109 | void *list_removeFront(List_t *list) { 110 | if (list->size > 0) 111 | return list_removeNode(list, list->front); 112 | return NULL; 113 | } 114 | 115 | void list_destroy(List_t *list) { 116 | ListNode_t *p = list->front, *tmp; 117 | while (p) { 118 | if (list->del) { list->del(p->data); } 119 | tmp = p; 120 | p = p->next; 121 | free(tmp); 122 | } 123 | } 124 | 125 | ListNode_t *listNode_init(void *data, ListNode_t *prev, ListNode_t *next) { 126 | ListNode_t *node = (ListNode_t *)malloc(sizeof(ListNode_t)); 127 | if (node) { 128 | node->data = data; 129 | node->next = next; 130 | node->prev = prev; 131 | } 132 | return node; 133 | } 134 | 135 | bool list_addBack(List_t *list, void *data) { 136 | ListNode_t *node = listNode_init(data, list->back, NULL); 137 | 138 | if (node) { 139 | list_addNode(list, node, list->back, NULL); 140 | return true; 141 | } 142 | return false; 143 | } 144 | 145 | bool list_addFront(List_t *list, void *data) { 146 | ListNode_t *node = listNode_init(data, NULL, list->front); 147 | if (node) { 148 | list_addNode(list, node, NULL, list->back); 149 | return true; 150 | } 151 | return false; 152 | } 153 | 154 | void *list_removeBack(List_t *list) { 155 | if (list->size > 0) 156 | return list_removeNode(list, list->back); 157 | return NULL; 158 | } 159 | 160 | void *list_findByInt(List_t *list, int (*toInt) (void *), int i) { 161 | ListNode_t *node = list->front; 162 | while (node) { 163 | if (i == toInt(node->data)) 164 | return node->data; 165 | node = node->next; 166 | } 167 | return NULL; 168 | } 169 | 170 | bool list_removeByInt(List_t *list, int (*toInt) (void *), int i) { 171 | ListNode_t *node = list->front, *prev = NULL; 172 | while (node) { 173 | if (i == toInt(node->data)) { 174 | if (prev) 175 | prev->next = node->next; 176 | else 177 | list->front = node->next; 178 | if (!node->next) 179 | list->back = prev; 180 | if (list->del) 181 | list->del(node->data); 182 | list->size--; 183 | free(node); 184 | return true; 185 | } 186 | prev = node; 187 | node = node->next; 188 | } 189 | return false; 190 | } 191 | 192 | void *list_findByString(List_t *list, void (*toString) (char *,void *), const char *str) { 193 | char buf[1024]; 194 | ListNode_t *node = list->front; 195 | if (!str) return NULL; 196 | while (node) { 197 | toString(buf, node->data); 198 | if (!strcmp(str, buf)) 199 | return node->data; 200 | node = node->next; 201 | } 202 | return NULL; 203 | } 204 | 205 | void printNode(ListNode_t *node) { 206 | printf("Node %p, prev = %p, next = %p, data = %p\n", node, node->prev, node->next, node->data); 207 | } 208 | 209 | void list_printCustom(List_t *list, char * (*toString) (void *), bool freeAfter) { 210 | ListNode_t *node = list->front; 211 | printf("List_t of size %lu, front = %p, back = %p\n", list->size, list->front, list->back); 212 | while (node) { 213 | printNode(node); 214 | if (toString) { 215 | char *str = toString(node->data); 216 | printf("string = %s\n", str); 217 | if (freeAfter) free(str); 218 | } 219 | node = node->next; 220 | } 221 | } 222 | 223 | void list_print(List_t *list, bool verbose) { 224 | ListNode_t *node = list->front; 225 | if (verbose) 226 | printf("List_t of size %lu, front = %p, back = %p\n", 227 | list->size, list->front, list->back); 228 | while (node) { 229 | if (verbose) 230 | printNode(node); 231 | if (list->toString) { 232 | char *str = list->toString(node->data); 233 | printf("%s\n", str); 234 | } else if (list->print) { 235 | list->print(node->data); 236 | } else { 237 | printf("(no print function defined)\n"); 238 | } 239 | node = node->next; 240 | } 241 | } 242 | 243 | bool list_removeByString(List_t *list, void (*toString) (char *,void *), const char *str) { 244 | ListNode_t *node = list->front, *prev = NULL; 245 | char buf[50]; 246 | while (node) { 247 | toString(buf, node->data); 248 | if (!strcmp(str, buf)) { 249 | if (prev) 250 | prev->next = node->next; 251 | else 252 | list->front = node->next; 253 | if (!node->next) 254 | list->back = prev; 255 | if (list->del) 256 | list->del(node->data); 257 | list->size--; 258 | free(node); 259 | return true; 260 | } 261 | prev = node; 262 | node = node->next; 263 | } 264 | return false; 265 | } 266 | 267 | void *list_findByPointer(List_t *list, void *data) { 268 | ListNode_t *cur = list->front; 269 | while (cur) { 270 | if (cur->data == data) 271 | return data; 272 | cur = cur->next; 273 | } 274 | return NULL; 275 | } 276 | 277 | void *list_removeByPointer(List_t *list, void *ptr) { 278 | ListNode_t *cur = list->front; 279 | while (cur) { 280 | if (cur->data == ptr) 281 | return list_removeNode(list, cur); 282 | cur = cur->next; 283 | } 284 | return NULL; 285 | } 286 | 287 | 288 | void list_removeByPointerFree(List_t *list, void *ptr) { 289 | void *data = list_removeByPointer(list, ptr); 290 | if (data && list->del) 291 | list->del(data); 292 | } 293 | 294 | void list_setPrintFunc(List_t *l, void (*print) (void *)) { 295 | l->print = print; 296 | } 297 | 298 | void list_filter(List_t *l, bool (*pred) (void *)) { 299 | ListNode_t *node = l->front; 300 | while (node) { 301 | ListNode_t *next = node->next; 302 | if (!pred(node->data)) { 303 | list_removeNode(l, node); 304 | } 305 | node = next; 306 | } 307 | } 308 | 309 | void list_filterDelete(List_t *l, bool (*pred) (void *)) { 310 | ListNode_t *node = l->front; 311 | while (node) { 312 | ListNode_t *next = node->next; 313 | if (!pred(node->data)) { 314 | list_removeNodeDelete(l, node); 315 | } 316 | node = next; 317 | } 318 | } 319 | 320 | void list_filterNew(List_t *l, bool (*pred) (void *)) { 321 | List_t res; 322 | ListNode_t *node; 323 | memcpy(&res, l, sizeof(List_t)); /* for convenience */ 324 | res.size = 0; /* reset size and front/back pointers */ 325 | res.front = res.back = NULL; 326 | assert(l->copy && "copy method must be defined"); 327 | for (node = l->front; node; node = node->next) { 328 | if (pred(node->data)) { 329 | list_addBack(&res, l->copy(node)); 330 | } 331 | } 332 | } 333 | 334 | void list_map(List_t *l, void *(*f) (void *)) { 335 | ListNode_t *node = l->front; 336 | while (node) { 337 | node->data = f(node->data); 338 | node = node->next; 339 | } 340 | } 341 | 342 | void list_mapDelete(List_t *l, void *(*f) (void *)) { 343 | ListNode_t *node = l->front; 344 | while (node) { 345 | void *tmp = node->data; 346 | node->data = f(node->data); 347 | if (l->del) l->del(tmp); 348 | node = node->next; 349 | } 350 | } 351 | 352 | List_t list_mapNew(List_t *l, void *(*f) (void *)) { 353 | List_t res; 354 | ListNode_t *node; 355 | memcpy(&res, l, sizeof(List_t)); /* for convenience */ 356 | res.size = 0; /* reset size and front/back pointers */ 357 | res.front = res.back = NULL; 358 | for (node = l->front; node; node = node->next) { 359 | list_addBack(&res, f(node->data)); 360 | } 361 | return res; 362 | } 363 | 364 | void list_sort(List_t *l) { 365 | if (!l->compare) { 366 | fprintf(stderr, "Error: no compare function defined. Cannot sort.\n"); 367 | return; 368 | } 369 | if (!l->elem_size) { 370 | fprintf(stderr, "Error: element size is not defined. Cannot sort.\n"); 371 | return; 372 | } 373 | if (l->size > 0) { 374 | /* copy all elements into an array */ 375 | void **arr = calloc(l->size, sizeof(void *)); 376 | ListNode_t *node = l->front; 377 | size_t idx = 0; 378 | while (node) { 379 | ListNode_t *next = node->next; 380 | arr[idx++] = node->data; 381 | list_removeNode(l, node); /* frees the node, not the data*/ 382 | node = next; 383 | } 384 | 385 | assert(l->size == 0 && "List is not empty for some reason"); 386 | /* use quicksort to sort the array */ 387 | qsort(arr, l->size, l->elem_size, l->compare); 388 | 389 | /* copy the elements back */ 390 | for (idx = 0; idx < l->size; ++idx) { 391 | list_addBack(l, arr[idx]); 392 | } 393 | } 394 | } 395 | 396 | List_t list_union(List_t *l1, List_t *l2) { 397 | if (!l1->copy || l1->copy != l2->copy) { 398 | fprintf(stderr, "Error: copy function not defined. Can't perform union\n"); 399 | exit(1); 400 | } 401 | if (!l1->compare || (l1->compare != l2->compare)) { 402 | fprintf(stderr, "Error: compare not defined, or not the same comparison" 403 | "function. Can't perform union\n"); 404 | exit(1); 405 | } else if (l1->size == 0) { 406 | return list_deepCopy(l2); 407 | } else if (l2->size == 0) { 408 | return list_deepCopy(l1); 409 | } else { 410 | /* this isn't a very efficient algorithm, but it gets the job done. If 411 | you want something better, go write a hash table :) 412 | */ 413 | List_t res = list_deepCopy(l1); 414 | res.name = NULL; /* take away name, just in case */ 415 | ListNode_t *node; 416 | /* scan through all of the nodes in second list and take any that 417 | aren't already in our result 418 | */ 419 | for (node = l2->front; node; node = node->next) { 420 | ListNode_t *node2; 421 | bool found = false; 422 | for (node2 = res.front; node2; node2 = node2->next) { 423 | /* if compare == 0, we already have it */ 424 | if (!l2->compare(node->data, node2->data)) { 425 | found = true; 426 | break; 427 | } 428 | } 429 | /* if we didn't find it, add it */ 430 | if (!found) 431 | list_addBack(&res, l2->copy(node->data)); 432 | } 433 | return res; 434 | } 435 | } 436 | 437 | List_t list_intersection(List_t *l1, List_t *l2) { 438 | if (!l1->copy || l1->copy != l2->copy) { 439 | fprintf(stderr, "Error: copy function not defined. Can't " 440 | "perform intersection\n"); 441 | exit(1); 442 | } else if (!l1->compare || (l1->compare != l2->compare)) { 443 | fprintf(stderr, "Error: compare not defined, or not the same comparison " 444 | "function. Can't perform intersection\n"); 445 | exit(1); 446 | } else if (l1->size == 0 || l2->size == 0) { 447 | List_t res; 448 | list_init(&res, l1->del); 449 | return res; 450 | } else { 451 | List_t res; 452 | ListNode_t *node; 453 | list_init(&res, l1->del); 454 | res.compare = l1->compare; 455 | res.print = l1->print; 456 | res.copy = l1->copy; 457 | res.toString = l1->toString; 458 | /* scan through all of the nodes in first list and take any that 459 | are also in second list. once again, not very efficient.... 460 | */ 461 | for (node = l1->front; node; node = node->next) { 462 | ListNode_t *node2; 463 | for (node2 = res.front; node2; node2 = node2->next) { 464 | /* if compare == 0, it's a match so we add it */ 465 | if (!l2->compare(node->data, node2->data)) { 466 | list_addBack(&res, l2->copy(node->data)); 467 | } 468 | } 469 | } 470 | return res; 471 | } 472 | } 473 | 474 | List_t list_difference(List_t *l1, List_t *l2) { 475 | if (!l1->copy || l1->copy != l2->copy) { 476 | fprintf(stderr, "Error: copy function not defined. Can't " 477 | "perform difference\n"); 478 | exit(1); 479 | } else if (!l1->compare || (l1->compare != l2->compare)) { 480 | fprintf(stderr, "Error: compare not defined, or not the same comparison " 481 | "function. Can't perform intersection\n"); 482 | exit(1); 483 | } else { 484 | /* copy the first list */ 485 | List_t res = list_deepCopy(l1); 486 | ListNode_t *node; 487 | /* find all of the pairs and remove them */ 488 | for (node = l1->front; node; node = node->next) { 489 | ListNode_t *node2; 490 | for (node2 = res.front; node2; node2 = node2->next) { 491 | /* if compare == 0, it's a match so we remove it */ 492 | if (!l2->compare(node->data, node2->data)) { 493 | list_removeNode(&res, node); 494 | } 495 | } 496 | } 497 | return res; 498 | } 499 | } 500 | 501 | List_t list_deepCopy(List_t *l) { 502 | if (!l->copy) { 503 | fprintf(stderr, "Error: no copy function defined. Can't deepCopy\n"); 504 | exit(1); 505 | } else { 506 | List_t res; 507 | ListNode_t *node = l->front; 508 | list_init(&res, l->del); 509 | res.copy = l->copy; 510 | res.toString = l->toString; 511 | res.print = l->print; 512 | res.elem_size = l->elem_size; 513 | res.name = l->name; 514 | while (node) { 515 | list_addBack(&res, l->copy(node->data)); 516 | node = node->next; 517 | } 518 | assert(res.size == l->size && "Sizes don't match for some reason"); 519 | return res; 520 | } 521 | } 522 | 523 | void list_setCopyFunc(List_t *l, void *(*copy)(void *)) { 524 | l->copy = copy; 525 | } 526 | 527 | void list_setCompFunc(List_t *l, int (*comp)(const void *, const void *)) { 528 | l->compare = comp; 529 | } 530 | -------------------------------------------------------------------------------- /src/sra.c: -------------------------------------------------------------------------------- 1 | #include "../include/sra.h" 2 | 3 | static SRA_t *SRABinary(SRA_t *sra1, SRA_t *sra2, enum SRAType t); 4 | 5 | SRA_t *SRATable(TableReference_t *ref) { 6 | SRA_t *sra = (SRA_t *)calloc(1, sizeof(SRA_t)); 7 | sra->t = SRA_TABLE; 8 | sra->table.ref = ref; 9 | return sra; 10 | } 11 | 12 | SRA_t *SRAProject(SRA_t *sra, Expression_t *expr) { 13 | SRA_t *new_sra = (SRA_t *)calloc(1, sizeof(SRA_t)); 14 | new_sra->t = SRA_PROJECT; 15 | new_sra->project.sra = sra; 16 | new_sra->project.expr_list = expr; 17 | return new_sra; 18 | } 19 | 20 | SRA_t *SRASelect(SRA_t *sra, Condition_t *cond) { 21 | if (!cond) { 22 | return sra; 23 | } else { 24 | SRA_t *new_sra = (SRA_t *)calloc(1, sizeof(SRA_t)); 25 | new_sra->t = SRA_SELECT; 26 | new_sra->select.sra = sra; 27 | new_sra->select.cond = cond; 28 | return new_sra; 29 | } 30 | } 31 | 32 | SRA_t *SRAJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond) { 33 | SRA_t *new_sra = (SRA_t *)calloc(1, sizeof(SRA_t)); 34 | new_sra->t = SRA_JOIN; 35 | new_sra->join.sra1 = sra1; 36 | new_sra->join.sra2 = sra2; 37 | new_sra->join.opt_cond = cond; 38 | return new_sra; 39 | } 40 | 41 | SRA_t *SRALeftOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond) { 42 | SRA_t *res = SRAJoin(sra1, sra2, cond); 43 | res->t = SRA_LEFT_OUTER_JOIN; 44 | return res; 45 | } 46 | 47 | SRA_t *SRARightOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond) { 48 | SRA_t *res = SRAJoin(sra1, sra2, cond); 49 | res->t = SRA_RIGHT_OUTER_JOIN; 50 | return res; 51 | } 52 | SRA_t *SRAFullOuterJoin(SRA_t *sra1, SRA_t *sra2, JoinCondition_t *cond) { 53 | SRA_t *res = SRAJoin(sra1, sra2, cond); 54 | res->t = SRA_FULL_OUTER_JOIN; 55 | return res; 56 | } 57 | 58 | static SRA_t *SRABinary(SRA_t *sra1, SRA_t *sra2, enum SRAType t) { 59 | SRA_t *sra = (SRA_t *)calloc(1, sizeof(SRA_t)); 60 | sra->t = t; 61 | sra->binary.sra1 = sra1; 62 | sra->binary.sra2 = sra2; 63 | return sra; 64 | } 65 | 66 | SRA_t *SRAUnion(SRA_t *sra1, SRA_t *sra2) { 67 | return SRABinary(sra1, sra2, SRA_UNION); 68 | } 69 | 70 | SRA_t *SRAExcept(SRA_t *sra1, SRA_t *sra2) { 71 | return SRABinary(sra1, sra2, SRA_EXCEPT); 72 | } 73 | 74 | SRA_t *SRAIntersect(SRA_t *sra1, SRA_t *sra2) { 75 | return SRABinary(sra1, sra2, SRA_INTERSECT); 76 | } 77 | 78 | SRA_t *SRANaturalJoin(SRA_t *sra1, SRA_t *sra2) { 79 | return SRABinary(sra1, sra2, SRA_NATURAL_JOIN); 80 | } 81 | 82 | void SRA_print(SRA_t *sra) { 83 | if (!sra) return; 84 | switch(sra->t) { 85 | case SRA_TABLE: 86 | indent_print("Table(%s", sra->table.ref->table_name); 87 | if (sra->table.ref->alias) printf(" as %s", sra->table.ref->alias); 88 | printf(")"); 89 | break; 90 | case SRA_SELECT: 91 | indent_print("Select("); 92 | Condition_print(sra->select.cond); 93 | printf(", "); 94 | upInd(); 95 | SRA_print(sra->select.sra); 96 | downInd(); 97 | indent_print(")"); 98 | break; 99 | case SRA_PROJECT: 100 | indent_print("Project("); 101 | Expression_printList(sra->project.expr_list); 102 | printf(", "); 103 | upInd(); 104 | SRA_print(sra->project.sra); 105 | if (sra->project.distinct || 106 | sra->project.group_by || 107 | sra->project.order_by) { 108 | printf(",\n"); 109 | indent_print("Options: "); 110 | if (sra->project.distinct) 111 | printf("Distinct "); 112 | if (sra->project.group_by) { 113 | printf("Group by "); 114 | Expression_print(sra->project.group_by); 115 | printf(" "); 116 | } 117 | if (sra->project.order_by) { 118 | printf("Order by "); 119 | Expression_print(sra->project.order_by); 120 | printf(sra->project.asc_desc == ORDER_BY_ASC ? " a" : " de"); 121 | printf("scending"); 122 | } 123 | } 124 | downInd(); 125 | indent_print(")"); 126 | break; 127 | case SRA_UNION: 128 | indent_print("Union("); 129 | upInd(); 130 | SRA_print(sra->binary.sra1); 131 | indent_print(", "); 132 | SRA_print(sra->binary.sra2); 133 | downInd(); 134 | indent_print(")"); 135 | break; 136 | case SRA_EXCEPT: 137 | indent_print("Except("); 138 | upInd(); 139 | SRA_print(sra->binary.sra1); 140 | indent_print(", "); 141 | SRA_print(sra->binary.sra2); 142 | downInd(); 143 | indent_print(")"); 144 | break; 145 | case SRA_INTERSECT: 146 | indent_print("Intersect("); 147 | upInd(); 148 | SRA_print(sra->binary.sra1); 149 | indent_print(", "); 150 | SRA_print(sra->binary.sra2); 151 | downInd(); 152 | indent_print(")"); 153 | break; 154 | case SRA_JOIN: 155 | indent_print("Join("); 156 | upInd(); 157 | SRA_print(sra->binary.sra1); 158 | printf(", \n"); 159 | SRA_print(sra->binary.sra2); 160 | if (sra->join.opt_cond) { 161 | printf(",\n"); 162 | indent_print(""); 163 | JoinCondition_print(sra->join.opt_cond); 164 | } 165 | downInd(); 166 | indent_print(")"); 167 | break; 168 | case SRA_NATURAL_JOIN: 169 | indent_print("NaturalJoin("); 170 | upInd(); 171 | SRA_print(sra->binary.sra1); 172 | printf(", \n"); 173 | SRA_print(sra->binary.sra2); 174 | downInd(); 175 | indent_print(")"); 176 | break; 177 | case SRA_LEFT_OUTER_JOIN: 178 | case SRA_RIGHT_OUTER_JOIN: 179 | case SRA_FULL_OUTER_JOIN: 180 | if (sra->t == SRA_LEFT_OUTER_JOIN) indent_print("Left"); 181 | else if (sra->t == SRA_RIGHT_OUTER_JOIN) indent_print("Right"); 182 | else indent_print("Full"); 183 | printf("OuterJoin("); 184 | upInd(); 185 | SRA_print(sra->join.sra1); 186 | printf(",\n"); 187 | SRA_print(sra->join.sra2); 188 | if (sra->join.opt_cond) { 189 | printf(",\n"); 190 | indent_print(""); 191 | JoinCondition_print(sra->join.opt_cond); 192 | } 193 | downInd(); 194 | indent_print(")"); 195 | break; 196 | default: 197 | puts("Unknown SRA type"); 198 | } 199 | } 200 | 201 | void JoinCondition_print(JoinCondition_t *cond) { 202 | if (cond->t == JOIN_COND_ON) { 203 | printf("On: "); 204 | Condition_print(cond->on); 205 | } 206 | else if (cond->t == JOIN_COND_USING) { 207 | printf("Using: "); 208 | StrList_print(cond->col_list); 209 | } 210 | else { 211 | printf("(Unknown JoinCondition type)"); 212 | } 213 | } 214 | 215 | SRA_t *SRA_applyOption(SRA_t *sra, ProjectOption_t *option) { 216 | if (sra->t != SRA_PROJECT) { 217 | fprintf(stderr, "Error: can't apply order by to anything except project.\n"); 218 | exit(1); 219 | } else if (option != NULL) { 220 | if (option->order_by) { 221 | sra->project.order_by = option->order_by; 222 | sra->project.asc_desc = option->asc_desc; 223 | } 224 | if (option->group_by) { 225 | sra->project.group_by = option->group_by; 226 | } 227 | } 228 | return sra; 229 | } 230 | 231 | void ProjectOption_free(ProjectOption_t *opt) { 232 | if (opt->group_by) 233 | Expression_free(opt->group_by); 234 | if (opt->order_by) 235 | Expression_free(opt->order_by); 236 | free(opt); 237 | } 238 | 239 | ProjectOption_t *OrderBy_make(Expression_t *expr, enum OrderBy asc_desc) { 240 | ProjectOption_t *ob = (ProjectOption_t *)calloc(1, sizeof(ProjectOption_t)); 241 | ob->asc_desc = asc_desc; 242 | ob->order_by = expr; 243 | return ob; 244 | } 245 | 246 | ProjectOption_t *GroupBy_make(Expression_t *expr) { 247 | ProjectOption_t *gb = (ProjectOption_t *)calloc(1, sizeof(ProjectOption_t)); 248 | gb->group_by = expr; 249 | return gb; 250 | } 251 | 252 | ProjectOption_t *ProjectOption_combine(ProjectOption_t *op1, 253 | ProjectOption_t *op2) { 254 | if (op1->group_by && op2->group_by) { 255 | fprintf(stderr, "Error: can't combine two group_bys.\n"); 256 | exit(1); 257 | } 258 | if (op1->order_by && op2->order_by) { 259 | fprintf(stderr, "Error: can't combine two order_bys.\n"); 260 | exit(1); 261 | } 262 | if (op2->group_by) { 263 | op1->group_by = op2->group_by; 264 | op2->group_by = NULL; 265 | ProjectOption_free(op2); 266 | return op1; 267 | } else { 268 | op2->group_by = op1->group_by; 269 | op1->group_by = NULL; 270 | ProjectOption_free(op1); 271 | return op2; 272 | } 273 | } 274 | 275 | SRA_t *SRA_makeDistinct(SRA_t *sra) { 276 | if (sra->t != SRA_PROJECT) { 277 | fprintf(stderr, "Error: distinct property only applies to Project\n"); 278 | } else { 279 | sra->project.distinct = 1; 280 | } 281 | return sra; 282 | } 283 | 284 | JoinCondition_t *On(Condition_t *cond) { 285 | JoinCondition_t *jc = (JoinCondition_t *)calloc(1, sizeof(JoinCondition_t)); 286 | jc->t = JOIN_COND_ON; 287 | jc->on = cond; 288 | return jc; 289 | } 290 | 291 | JoinCondition_t *Using(StrList_t *col_list) { 292 | JoinCondition_t *jc = (JoinCondition_t *)calloc(1, sizeof(JoinCondition_t)); 293 | jc->t = JOIN_COND_USING; 294 | jc->col_list = col_list; 295 | return jc; 296 | } 297 | 298 | void ProjectOption_print(ProjectOption_t *op) { 299 | if (op->order_by) { 300 | printf("Order by: (%p) ", op->order_by); 301 | Expression_print(op->order_by); 302 | printf(op->asc_desc == ORDER_BY_ASC ? " ascending" : " descending"); 303 | } 304 | if (op->group_by) { 305 | printf("Group by: (%p) ", op->group_by); 306 | Expression_print(op->group_by); 307 | } 308 | if (!op->order_by && !op->group_by) { 309 | printf("Empty ProjectOption\n"); 310 | } 311 | } 312 | 313 | void JoinCondition_free(JoinCondition_t *cond) { 314 | switch (cond->t) { 315 | case JOIN_COND_ON: 316 | Condition_free(cond->on); 317 | break; 318 | case JOIN_COND_USING: 319 | StrList_free(cond->col_list); 320 | break; 321 | } 322 | } 323 | 324 | void SRA_free(SRA_t *sra) { 325 | switch (sra->t) { 326 | case SRA_TABLE: 327 | TableReference_free(sra->table.ref); 328 | break; 329 | case SRA_PROJECT: 330 | SRA_free(sra->project.sra); 331 | Expression_freeList(sra->project.expr_list); 332 | Expression_free(sra->project.order_by); 333 | Expression_free(sra->project.group_by); 334 | break; 335 | case SRA_SELECT: 336 | SRA_free(sra->select.sra); 337 | Condition_free(sra->select.cond); 338 | break; 339 | case SRA_FULL_OUTER_JOIN: 340 | case SRA_LEFT_OUTER_JOIN: 341 | case SRA_RIGHT_OUTER_JOIN: 342 | case SRA_JOIN: 343 | SRA_free(sra->join.sra1); 344 | SRA_free(sra->join.sra2); 345 | if (sra->join.opt_cond) 346 | JoinCondition_free(sra->join.opt_cond); 347 | break; 348 | case SRA_NATURAL_JOIN: 349 | case SRA_UNION: 350 | case SRA_EXCEPT: 351 | case SRA_INTERSECT: 352 | SRA_free(sra->binary.sra1); 353 | SRA_free(sra->binary.sra2); 354 | break; 355 | } 356 | free(sra); 357 | } 358 | 359 | static RA_t *desugar_table(SRA_t *sra) { 360 | /* 361 | an SRA table contains a TableReference_t, which can optionally 362 | have an alias. If there's no alias, we can simply generate 363 | an RA table with that name; otherwise, we'll apply a Rho operator 364 | to change its name. 365 | */ 366 | if (!sra->table.ref->alias) { 367 | return RA_Table(sra->table.ref->table_name); 368 | } else { 369 | return 370 | RA_RhoTable( 371 | RA_Table(sra->table.ref->table_name), 372 | sra->table.ref->alias 373 | ); 374 | } 375 | } 376 | 377 | static RA_t *desugar_project(SRA_t *sra) { 378 | /* 379 | SRA project will have a list of expressions and a table to 380 | take them from, along with some things like order by, 381 | group by etc. We'll be ignoring all of these for now, as our 382 | RA machinery as-is doesn't support it. But it can be modified later. 383 | */ 384 | return RA_Pi(SRA_desugar(sra->project.sra), sra->project.expr_list); 385 | } 386 | 387 | static RA_t *desugar_select(SRA_t *sra) { 388 | /* 389 | This conversion is straightforward, since the structure is identical. 390 | */ 391 | return RA_Sigma(SRA_desugar(sra->select.sra), sra->select.cond); 392 | } 393 | 394 | static RA_t *desugar_natural_join(SRA_t *sra) { 395 | /* 396 | The natural join means joining on the condition that for all columns which 397 | have the same name, their values must also be equal. 398 | Consider tables t1, t2, with column sets cs1, cs2. 399 | Then let cs = cs1 intersect cs2, so cs is the set of columns they have in 400 | common. 401 | Then for every column c in cs, we want to make a condition: t1.c = t2.c. 402 | We apply this condition to the cross product of t1 and t2. 403 | Further, any pair of shared columns will be collapsed into one. 404 | Pseudocode: 405 | // start with cross product 406 | res = Cross(t1, t2) 407 | 408 | // find distinct and shared columns 409 | distinct1 = list of columns from t1 that are not in t2 410 | distinct2 = list of columns from t2 that are not in t1 411 | shared = list of columns that are shared 412 | 413 | // initial condition 414 | cond = Eq("t1." + shared[0], "t2." + shared[0]) 415 | // all remaining columns ANDed on 416 | for i == 1 .. shared.length: 417 | cond = And(cond, Eq(t1.shared[i], t2.shared[i])) 418 | // update result with conditions 419 | res = Sigma(res, cond) 420 | 421 | // build columns to project 422 | to_project = [] 423 | for col in distinct1: 424 | to_project += Term(col) 425 | for col in shared: 426 | to_project += Term(col) 427 | for col in distinct2: 428 | to_project += Term(col) 429 | */ 430 | 431 | // /* start with cross product */ 432 | // RA_t *res = RA_Cross(SRA_desugar(sra->binary.sra1), 433 | // SRA_desugar(sra->binary.sra2)); 434 | // /* get t1\t2, t2\t1, t1 intersect t2 */ 435 | // List_t distinct1 = columns_except(sra->binary.sra1->cols, 436 | // sra->binary.sra2->cols); 437 | // List_t distinct2 = columns_except(sra->binary.sra2->cols, 438 | // sra->binary.sra1->cols); 439 | // List_t shared = columns_intersect(sra->binary.sra1->cols, 440 | // sra->binary.sra2->cols); 441 | // /* for later use in choosing which columns to project */ 442 | // List_t to_project; 443 | // /* create a condition that shared columns have equal values */ 444 | // Condition_t *cond = NULL; 445 | // int first = 1; /* see below for purpose */ 446 | // while (shared.size > 0) { 447 | // Column_t *shared_col = (Column_t *)list_removeFront(&shared); 448 | // if (first) { 449 | // cond = Eq(Term(shared_col->name) 450 | // } 451 | // } 452 | return NULL; 453 | } 454 | 455 | RA_t *desugar_join(SRA_t *sra) { 456 | return NULL; 457 | } 458 | 459 | RA_t *SRA_desugar(SRA_t *sra) { 460 | RA_t *t1, *t2, *res; 461 | switch (sra->t) { 462 | case SRA_TABLE: 463 | res = desugar_table(sra); break; 464 | case SRA_PROJECT: 465 | res = desugar_project(sra); break; 466 | case SRA_SELECT: 467 | res = desugar_select(sra); break; 468 | case SRA_NATURAL_JOIN: 469 | res = desugar_natural_join(sra); break; 470 | case SRA_JOIN: 471 | res = desugar_join(sra); break; 472 | case SRA_FULL_OUTER_JOIN: 473 | res = desugar_join(sra); break; 474 | case SRA_LEFT_OUTER_JOIN: 475 | res = desugar_join(sra); break; 476 | case SRA_RIGHT_OUTER_JOIN: 477 | res = desugar_join(sra); break; 478 | case SRA_UNION: 479 | return RA_Union(SRA_desugar(sra->binary.sra1), 480 | SRA_desugar(sra->binary.sra2)); 481 | case SRA_EXCEPT: 482 | return RA_Union(SRA_desugar(sra->binary.sra1), 483 | SRA_desugar(sra->binary.sra2)); 484 | case SRA_INTERSECT: 485 | t1 = SRA_desugar(sra->binary.sra1); 486 | t2 = SRA_desugar(sra->binary.sra2); 487 | return RA_Difference( 488 | RA_Union(t1, t2), 489 | RA_Difference( 490 | RA_Difference(t1, t2), 491 | RA_Difference(t2, t1))); 492 | default: 493 | fprintf(stderr, "Error: unhandled SRA type\n"); 494 | exit(1); 495 | } 496 | SRA_free(sra); 497 | return res; 498 | } 499 | /* 500 | RA_t *SRA_desugar(SRA_t *sra) { 501 | List_t temp_tables; 502 | RA_t *res; 503 | list_init(&temp_tables, Table_free); 504 | res = SRA_desugar_r(sra, temp_tables); 505 | list_destroy(&temp_tables); 506 | return res; 507 | }*/ --------------------------------------------------------------------------------