├── src ├── databases │ ├── __init__.py │ ├── tables.py │ └── operations.py └── test.py ├── .gitignore └── README.md /src/databases/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # relational-databases-from-scratch -------------------------------------------------------------------------------- /src/databases/tables.py: -------------------------------------------------------------------------------- 1 | from typing import Set, List 2 | 3 | 4 | class Record(dict): 5 | def __hash__(self): 6 | proxy = tuple(self.items()) 7 | return hash(proxy) 8 | 9 | def __setitem__(self, key, value): 10 | raise NotImplemented("Modifying values is not supported.") 11 | 12 | 13 | def make_employee(id: int, name: str, position: str, salary: int): 14 | return Record({"id": id, "name": name, "position": position, "salary": salary}) 15 | 16 | 17 | def make_task(id: int, employee_id: int, completed: bool): 18 | return Record({"id": id, "employee_id": employee_id, "completed": completed}) 19 | 20 | 21 | def make_client(id: int, name: str, contact_id: int): 22 | return Record({"id": id, "name": name, "contact_id": contact_id}) 23 | 24 | 25 | def _columns_in_table(table: Set[Record]) -> set: 26 | return set.union(*[set(record.keys()) for record in table]) 27 | 28 | 29 | def _prefix_record(row: dict, prefix: str) -> Record: 30 | return Record({f"{prefix}.{key}": value for key, value in row.items()}) 31 | 32 | 33 | def _prefix_columns(table: Set[Record], prefix: str) -> Set[Record]: 34 | return {_prefix_record(row, prefix) for row in table} 35 | 36 | 37 | def _pad_table(table: Set[Record], with_cols: List): 38 | padding_row = {col: None for col in with_cols} 39 | padded_table = {Record({**row, **padding_row}) for row in table} 40 | return padded_table 41 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | from databases.tables import * 2 | from databases.operations import * 3 | 4 | 5 | employees = {make_employee(0, "Michael Scott", "Regional Manager", 100000), 6 | make_employee(1, "Dwight K. Schrute", "Assistant to the Regional Manager", 65000), 7 | make_employee(2, "Pamela Beesly", "Sales", 40000), 8 | make_employee(3, "James Halpert", "Sales", 55000), 9 | make_employee(4, "Stanley Hudson", "Sales", 55000)} 10 | 11 | 12 | tasks = {make_task(0, 0, False), 13 | make_task(1, 0, False), 14 | make_task(2, 1, True), 15 | make_task(3, 1, True), 16 | make_task(4, 1, True), 17 | make_task(5, 2, True), 18 | make_task(6, 3, False), 19 | make_task(7, 3, False), 20 | make_task(8, 3, True), 21 | make_task(9, 3, False),} 22 | 23 | 24 | clients = {make_client(0, "Dunmore High School", 3), 25 | make_client(1, "Lackawanna County", 0), 26 | make_client(2, "Mr. Deckert", 1), 27 | make_client(3, "Phil Maguire", 3), 28 | make_client(4, "Harper Collins", 1), 29 | make_client(5, "Apex Technology"), 1} 30 | 31 | 32 | project(employees, ["salary"]) 33 | select(employees, [lambda x: x["salary"] > 60000]) 34 | rename(employees, {"name": "full name"}) 35 | cross_product(left=employees, right=tasks) 36 | natural_join(left=employees, right=tasks) 37 | theta_join(left=employees, right=tasks, conditions=[lambda x, y: x["id"] == y["employee_id"]]) 38 | union(employees, tasks) 39 | difference(employees, tasks) 40 | intersection(employees, tasks) -------------------------------------------------------------------------------- /src/databases/operations.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from collections import ChainMap 3 | from functools import reduce 4 | from typing import Set, List, Callable 5 | 6 | from .tables import _columns_in_table, _prefix_columns, _prefix_record, _pad_table, Record 7 | 8 | 9 | def select(table: Set[Record], conditions: List[Callable]) -> Set[Record]: 10 | """ 11 | Selects the record in the table which satisfy the conditions. 12 | 13 | Args: 14 | table: Set[Row] 15 | conditions: List[Callable], a list of functions. Each function takes a record 16 | from the table as input and returns a boolean. 17 | 18 | Returns: 19 | table_out: Set[Row] with instances satisfying the conditions. 20 | """ 21 | table_out = {record for record in table if all(cond(record) for cond in conditions)} 22 | return table_out 23 | 24 | 25 | def project(table: Set[Record], columns: List[str]) -> Set[Record]: 26 | """ 27 | Selects the given columns in the table. 28 | 29 | Args: 30 | table: Set[Row] 31 | columns: List[str], column names to select 32 | 33 | Returns: 34 | table_out: Set[Row] with only the selected columns. 35 | """ 36 | table_out = {Record({column: record[column] for column in columns}) for record in table} 37 | return table_out 38 | 39 | 40 | def rename(table: Set[Record], columns: dict) -> Set[Record]: 41 | """ 42 | Renames columns in a Set[Row]. 43 | WARNING: rename is destructive. If the new name of a column is an existing column, 44 | contents will be overwritten! 45 | 46 | Args: 47 | table: Set[Row], with columns to be renamed. 48 | columns: dict, with old_name - new_name pairs. 49 | 50 | Returns: 51 | table_out: Set[Row] with renamed columns. 52 | """ 53 | table_columns = _columns_in_table(table) 54 | table_out = { 55 | Record({columns.get(old_name, old_name): record[old_name] for old_name in table_columns}) 56 | for record in table 57 | } 58 | return table_out 59 | 60 | 61 | def cross_product(left: Set[Record], right: Set[Record]) -> Set[Record]: 62 | """ 63 | Constructs the cross product of tables. Each columnn name will be prefixed with 64 | the source table name. 65 | 66 | Args: 67 | **tables: Set[Row]s for which cross-product is to be taken. 68 | 69 | Returns: 70 | table_out: Set[Row], cross-product of the tables. 71 | """ 72 | # prefixing columns with table name 73 | left = _prefix_columns(left, "left") 74 | right = _prefix_columns(right, "right") 75 | 76 | table_out = {Record({**row_l, **row_r}) for row_l, row_r in product(left, right)} 77 | 78 | return table_out 79 | 80 | 81 | def theta_join(left: Set[Record], right: Set[Record], conditions: List[Callable]) -> Set[Record]: 82 | """ 83 | Joins the table according to conditions. 84 | 85 | Args: 86 | left: Set[Row]. 87 | right: Set[Row]. 88 | conditions: List[Callable], list of conditions to join on. Each condition 89 | should be a function mapping a tuple of a row from left and right to a Boolean. 90 | Example: lambda (x, y): x['id'] == y['employee_id'] 91 | 92 | Returns: 93 | joined_table: Set[Row], theta_join of left and right along the conditions. 94 | """ 95 | # determining the pair of rows which satisfy the conditions 96 | joined_table = { 97 | Record({**_prefix_record(row_l, "left"), **_prefix_record(row_r, "right")}) 98 | for row_l, row_r in product(left, right) 99 | if all([cond(row_l, row_r) for cond in conditions]) 100 | } 101 | 102 | return joined_table 103 | 104 | 105 | def natural_join(left: Set[Record], right: Set[Record]) -> Set[Record]: 106 | """ 107 | Natural join of the left and right tables. It is the same as a theta join with 108 | the condition that matching columns should be equal. 109 | 110 | Args: 111 | left: Set[Row]. 112 | right: Set[Row]. 113 | 114 | Returns: 115 | joined_table: Set[Row], natural join of left and right. 116 | """ 117 | common_cols = _columns_in_table(left).intersection(_columns_in_table(right)) 118 | conditions = [lambda x, y: x[col] == y[col] for col in common_cols] 119 | joined_table = theta_join(left, right, conditions) 120 | return joined_table 121 | 122 | 123 | def union(left: Set[Record], right: Set[Record]) -> Set[Record]: 124 | """ 125 | Returns the union of the tables. 126 | Note: this is not the usual set-theoretic union, since duplicates are allowed. 127 | 128 | Args: 129 | left: Set[Row]. 130 | right: Set[Row]. 131 | 132 | Returns: 133 | table_out: Set[Row], union of the input Set[Row]s. 134 | """ 135 | # padding 136 | left_cols = _columns_in_table(left) 137 | right_cols = _columns_in_table(right) 138 | 139 | left = _pad_table(left, right_cols.difference(left_cols)) 140 | right = _pad_table(right, left_cols.difference(right_cols)) 141 | 142 | table_out = left.union(right) 143 | 144 | return table_out 145 | 146 | 147 | def difference(left: Set[Record], right: Set[Record]) -> Set[Record]: 148 | """ 149 | Returns the difference of the tables. 150 | 151 | Args: 152 | left: Set[Row], the table to make difference from. 153 | right: Set[Row], table to make difference to. 154 | 155 | Returns: 156 | table_out: Set[Row], union of the input Set[Row]s. 157 | """ 158 | return left.difference(right) 159 | 160 | 161 | def intersection(left: Set[Record], right: Set[Record]) -> Set[Record]: 162 | """ 163 | Returns the intersection of the tables. 164 | Note: this does not add more expressive power to our already existing operations. 165 | Intersection can be written as the repeated application of the difference 166 | operator. 167 | 168 | Args: 169 | left: Set[Row]. 170 | right: Set[Row]. 171 | 172 | Returns: 173 | table_out: Set[Row], intersection of the input Set[Row]s 174 | """ 175 | table_out = difference(left, difference(left, right)) 176 | return table_out 177 | --------------------------------------------------------------------------------