├── src
    ├── databases
    │   ├── __init__.py
    │   ├── tables.py
    │   └── operations.py
    └── test.py
├── .gitignore
└── README.md


/src/databases/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # relational-databases-from-scratch


--------------------------------------------------------------------------------
/src/databases/tables.py:
--------------------------------------------------------------------------------
 1 | from typing import Set, List
 2 | 
 3 | 
 4 | class Record(dict):
 5 |     def __hash__(self):
 6 |         proxy = tuple(self.items())
 7 |         return hash(proxy)
 8 | 
 9 |     def __setitem__(self, key, value):
10 |         raise NotImplemented("Modifying values is not supported.")
11 | 
12 | 
13 | def make_employee(id: int, name: str, position: str, salary: int):
14 |     return Record({"id": id, "name": name, "position": position, "salary": salary})
15 | 
16 | 
17 | def make_task(id: int, employee_id: int, completed: bool):
18 |     return Record({"id": id, "employee_id": employee_id, "completed": completed})
19 | 
20 | 
21 | def make_client(id: int, name: str, contact_id: int):
22 |     return Record({"id": id, "name": name, "contact_id": contact_id})
23 | 
24 | 
25 | def _columns_in_table(table: Set[Record]) -> set:
26 |     return set.union(*[set(record.keys()) for record in table])
27 | 
28 | 
29 | def _prefix_record(row: dict, prefix: str) -> Record:
30 |     return Record({f"{prefix}.{key}": value for key, value in row.items()})
31 | 
32 | 
33 | def _prefix_columns(table: Set[Record], prefix: str) -> Set[Record]:
34 |     return {_prefix_record(row, prefix) for row in table}
35 | 
36 | 
37 | def _pad_table(table: Set[Record], with_cols: List):
38 |     padding_row = {col: None for col in with_cols}
39 |     padded_table = {Record({**row, **padding_row}) for row in table}
40 |     return padded_table
41 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | from databases.tables import *
 2 | from databases.operations import *
 3 | 
 4 | 
 5 | employees = {make_employee(0, "Michael Scott", "Regional Manager", 100000),
 6 |              make_employee(1, "Dwight K. Schrute", "Assistant to the Regional Manager", 65000),
 7 |              make_employee(2, "Pamela Beesly", "Sales", 40000),
 8 |              make_employee(3, "James Halpert", "Sales", 55000),
 9 |              make_employee(4, "Stanley Hudson", "Sales", 55000)}
10 | 
11 | 
12 | tasks = {make_task(0, 0, False),
13 |          make_task(1, 0, False),
14 |          make_task(2, 1, True),
15 |          make_task(3, 1, True),
16 |          make_task(4, 1, True),
17 |          make_task(5, 2, True),
18 |          make_task(6, 3, False),
19 |          make_task(7, 3, False),
20 |          make_task(8, 3, True),
21 |          make_task(9, 3, False),}
22 | 
23 | 
24 | clients = {make_client(0, "Dunmore High School", 3),
25 |            make_client(1, "Lackawanna County", 0),
26 |            make_client(2, "Mr. Deckert", 1),
27 |            make_client(3, "Phil Maguire", 3),
28 |            make_client(4, "Harper Collins", 1),
29 |            make_client(5, "Apex Technology"), 1}
30 | 
31 | 
32 | project(employees, ["salary"])
33 | select(employees, [lambda x: x["salary"] > 60000])
34 | rename(employees, {"name": "full name"})
35 | cross_product(left=employees, right=tasks)
36 | natural_join(left=employees, right=tasks)
37 | theta_join(left=employees, right=tasks, conditions=[lambda x, y: x["id"] == y["employee_id"]])
38 | union(employees, tasks)
39 | difference(employees, tasks)
40 | intersection(employees, tasks)


--------------------------------------------------------------------------------
/src/databases/operations.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from collections import ChainMap
  3 | from functools import reduce
  4 | from typing import Set, List, Callable
  5 | 
  6 | from .tables import _columns_in_table, _prefix_columns, _prefix_record, _pad_table, Record
  7 | 
  8 | 
  9 | def select(table: Set[Record], conditions: List[Callable]) -> Set[Record]:
 10 |     """
 11 |     Selects the record in the table which satisfy the conditions.
 12 | 
 13 |     Args:
 14 |         table: Set[Row]
 15 |         conditions: List[Callable], a list of functions. Each function takes a record
 16 |             from the table as input and returns a boolean.
 17 | 
 18 |     Returns:
 19 |         table_out: Set[Row] with instances satisfying the conditions.
 20 |     """
 21 |     table_out = {record for record in table if all(cond(record) for cond in conditions)}
 22 |     return table_out
 23 | 
 24 | 
 25 | def project(table: Set[Record], columns: List[str]) -> Set[Record]:
 26 |     """
 27 |     Selects the given columns in the table.
 28 | 
 29 |     Args:
 30 |         table: Set[Row]
 31 |         columns: List[str], column names to select
 32 | 
 33 |     Returns:
 34 |         table_out: Set[Row] with only the selected columns.
 35 |     """
 36 |     table_out = {Record({column: record[column] for column in columns}) for record in table}
 37 |     return table_out
 38 | 
 39 | 
 40 | def rename(table: Set[Record], columns: dict) -> Set[Record]:
 41 |     """
 42 |     Renames columns in a Set[Row].
 43 |     WARNING: rename is destructive. If the new name of a column is an existing column,
 44 |     contents will be overwritten!
 45 | 
 46 |     Args:
 47 |         table: Set[Row], with columns to be renamed.
 48 |         columns: dict, with old_name - new_name pairs.
 49 | 
 50 |     Returns:
 51 |         table_out: Set[Row] with renamed columns.
 52 |     """
 53 |     table_columns = _columns_in_table(table)
 54 |     table_out = {
 55 |         Record({columns.get(old_name, old_name): record[old_name] for old_name in table_columns})
 56 |         for record in table
 57 |     }
 58 |     return table_out
 59 | 
 60 | 
 61 | def cross_product(left: Set[Record], right: Set[Record]) -> Set[Record]:
 62 |     """
 63 |     Constructs the cross product of tables. Each columnn name will be prefixed with
 64 |     the source table name.
 65 | 
 66 |     Args:
 67 |         **tables: Set[Row]s for which cross-product is to be taken.
 68 | 
 69 |     Returns:
 70 |         table_out: Set[Row], cross-product of the tables.
 71 |     """
 72 |     # prefixing columns with table name
 73 |     left = _prefix_columns(left, "left")
 74 |     right = _prefix_columns(right, "right")
 75 | 
 76 |     table_out = {Record({**row_l, **row_r}) for row_l, row_r in product(left, right)}
 77 | 
 78 |     return table_out
 79 | 
 80 | 
 81 | def theta_join(left: Set[Record], right: Set[Record], conditions: List[Callable]) -> Set[Record]:
 82 |     """
 83 |     Joins the table according to conditions.
 84 | 
 85 |     Args:
 86 |         left: Set[Row].
 87 |         right: Set[Row].
 88 |         conditions: List[Callable], list of conditions to join on. Each condition
 89 |             should be a function mapping a tuple of a row from left and right to a Boolean.
 90 |             Example: lambda (x, y): x['id'] == y['employee_id']
 91 | 
 92 |     Returns:
 93 |         joined_table: Set[Row], theta_join of left and right along the conditions.
 94 |     """
 95 |     # determining the pair of rows which satisfy the conditions
 96 |     joined_table = {
 97 |         Record({**_prefix_record(row_l, "left"), **_prefix_record(row_r, "right")})
 98 |         for row_l, row_r in product(left, right)
 99 |         if all([cond(row_l, row_r) for cond in conditions])
100 |     }
101 | 
102 |     return joined_table
103 | 
104 | 
105 | def natural_join(left: Set[Record], right: Set[Record]) -> Set[Record]:
106 |     """
107 |     Natural join of the left and right tables. It is the same as a theta join with
108 |     the condition that matching columns should be equal.
109 | 
110 |     Args:
111 |          left: Set[Row].
112 |          right: Set[Row].
113 | 
114 |     Returns:
115 |         joined_table: Set[Row], natural join of left and right.
116 |     """
117 |     common_cols = _columns_in_table(left).intersection(_columns_in_table(right))
118 |     conditions = [lambda x, y: x[col] == y[col] for col in common_cols]
119 |     joined_table = theta_join(left, right, conditions)
120 |     return joined_table
121 | 
122 | 
123 | def union(left: Set[Record], right: Set[Record]) -> Set[Record]:
124 |     """
125 |     Returns the union of the tables.
126 |     Note: this is not the usual set-theoretic union, since duplicates are allowed.
127 | 
128 |     Args:
129 |         left: Set[Row].
130 |         right: Set[Row].
131 | 
132 |     Returns:
133 |         table_out: Set[Row], union of the input Set[Row]s.
134 |     """
135 |     # padding
136 |     left_cols = _columns_in_table(left)
137 |     right_cols = _columns_in_table(right)
138 | 
139 |     left = _pad_table(left, right_cols.difference(left_cols))
140 |     right = _pad_table(right, left_cols.difference(right_cols))
141 | 
142 |     table_out = left.union(right)
143 | 
144 |     return table_out
145 | 
146 | 
147 | def difference(left: Set[Record], right: Set[Record]) -> Set[Record]:
148 |     """
149 |     Returns the difference of the tables.
150 | 
151 |     Args:
152 |         left: Set[Row], the table to make difference from.
153 |         right: Set[Row], table to make difference to.
154 | 
155 |     Returns:
156 |         table_out: Set[Row], union of the input Set[Row]s.
157 |     """
158 |     return left.difference(right)
159 | 
160 | 
161 | def intersection(left: Set[Record], right: Set[Record]) -> Set[Record]:
162 |     """
163 |     Returns the intersection of the tables.
164 |     Note: this does not add more expressive power to our already existing operations.
165 |         Intersection can be written as the repeated application of the difference
166 |         operator.
167 | 
168 |     Args:
169 |         left: Set[Row].
170 |         right: Set[Row].
171 | 
172 |     Returns:
173 |         table_out: Set[Row], intersection of the input Set[Row]s
174 |     """
175 |     table_out = difference(left, difference(left, right))
176 |     return table_out
177 | 


--------------------------------------------------------------------------------