├── .gitignore ├── AsyncDB ├── __init__.py ├── AsyncDB.py ├── AsyncFile.py ├── Allocator.py ├── Node.py ├── TaskQue.py └── Engine.py ├── ManualTest.py ├── README.md └── Test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.db -------------------------------------------------------------------------------- /AsyncDB/__init__.py: -------------------------------------------------------------------------------- 1 | from .AsyncDB import AsyncDB 2 | -------------------------------------------------------------------------------- /ManualTest.py: -------------------------------------------------------------------------------- 1 | from asyncio import get_event_loop 2 | 3 | from AsyncDB import AsyncDB 4 | 5 | M = 10000 6 | FILE = 'Test.db' 7 | 8 | 9 | async def write(): 10 | db = AsyncDB(FILE) 11 | for i in range(M): 12 | db[i] = i 13 | print('set', i) 14 | 15 | 16 | async def read(): 17 | db = AsyncDB(FILE) 18 | for i in range(M): 19 | value = await db[i] 20 | print('get', value) 21 | 22 | 23 | def main(): 24 | loop = get_event_loop() 25 | loop.run_until_complete(write()) 26 | # loop.run_until_complete(read()) 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /AsyncDB/AsyncDB.py: -------------------------------------------------------------------------------- 1 | from collections import UserDict 2 | 3 | from .Engine import Engine 4 | 5 | 6 | class Cache(UserDict): 7 | def __init__(self, max_len=128): 8 | super().__init__() 9 | self.max_len = max_len 10 | 11 | def __setitem__(self, key, value): 12 | self.data[key] = value 13 | if len(self.data) > self.max_len: 14 | self.data.popitem() 15 | 16 | 17 | class AsyncDB: 18 | def __init__(self, filename: str): 19 | self.cache = Cache() 20 | self.engine = Engine(filename) 21 | 22 | def __getitem__(self, key): 23 | async def coro(): 24 | return self.cache[key] if key in self.cache else await self.engine.get(key) 25 | 26 | return coro() 27 | 28 | def __setitem__(self, key, value): 29 | if key in self.cache and self.cache[key] == value: 30 | return 31 | self.cache[key] = value 32 | self.engine.set(key, value) 33 | 34 | def pop(self, key): 35 | if key in self.cache: 36 | del self.cache[key] 37 | return self.engine.pop(key) 38 | 39 | async def items(self, item_from=None, item_to=None, max_len=0, reverse=False): 40 | return await self.engine.items(item_from, item_to, max_len, reverse) 41 | 42 | async def close(self): 43 | if self.engine.task_que.que: 44 | await self.engine.lock.acquire() 45 | await self.engine.lock.acquire() 46 | self.engine.close() 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AsyncDB 2 | When traditional databases encounter concurrency problems, multiple threads and SyncIO is a reasonable solution and is 3 | widely used. However, the DB tries to solve the problem by Python Coroutine and AsyncIO. 4 | 5 | The DB can get some optimizations that cannot be done with traditional databases, by modifying B-Tree algorithm. 6 | 7 | ## Attention 8 | * Python 3.5.1 or later is required and no other dependency. 9 | * Only compatible with coroutine environment. 10 | * All keys must be "bisectable" i.e. can be sorted by bisect.insort. 11 | * There is cache inside. The result can be a reference of the previous result. 12 | * The DB guarantees ACID properties with software failure but hardware. 13 | * If the DB gets closed unexpectedly, it will repair itself next time, which takes time. 14 | * The source code is shared under MIT license. 15 | 16 | ## Usage 17 | Copy "AsyncDB" folder to the root of your program. 18 | 19 | ```Python 20 | from AsyncDB import AsyncDB 21 | 22 | # open/create 23 | db = AsyncDB('Test.db') 24 | 25 | # get 26 | val = await db['key'] 27 | 28 | # set 29 | db['any_key_bisectable_picklable'] = 'any_value_picklable' 30 | 31 | # pop/del 32 | val = db.pop('key') 33 | 34 | # iter 35 | items = await db.items(item_from='begin', item_to='end', max_len=1024, reverse=False) 36 | 37 | # safely close before exit 38 | await db.close() 39 | ``` 40 | 41 | ### 中文 42 | 传统数据库面对高并发,采用多线程和同步IO。本作采用协程和异步IO,基于Python开发。 43 | 44 | 通过对B-Tree进行调整,能做到许多传统数据库做不到的优化。 45 | 46 | ## 注意事项 47 | * Python 3.5.1以上版本,没有其余依赖。 48 | * 只能在协程环境下使用。 49 | * 所有key必须可以使用bisect排序,建议使用bisect.insort测试。 50 | * 内置缓存,得到的结果有可能是之前结果的引用。 51 | * ACID,软件可以在任意时刻崩溃,数据都是安全的,但对硬件断电之类的情况不做保证。 52 | * 非正常关闭,下一次启动,程序会自动修复数据库,这会相当费时。 53 | * MIT协议发布。 54 | -------------------------------------------------------------------------------- /AsyncDB/AsyncFile.py: -------------------------------------------------------------------------------- 1 | from asyncio import get_event_loop 2 | from collections import deque 3 | from concurrent.futures import ThreadPoolExecutor 4 | from os.path import getsize 5 | 6 | loop = get_event_loop() 7 | 8 | 9 | class FastIO: 10 | def __init__(self, filename: str): 11 | self.cursor = 0 12 | self.file = open(filename, 'rb+', buffering=0) 13 | 14 | def seek(self, offset: int): 15 | if offset != self.cursor: 16 | self.file.seek(offset) 17 | 18 | def read(self, offset: int, length: int): 19 | self.seek(offset) 20 | self.cursor = offset + length 21 | return self.file.read(length) 22 | 23 | def write(self, offset: int, data: bytes): 24 | self.seek(offset) 25 | self.cursor = offset + len(data) 26 | self.file.write(data) 27 | 28 | def exec(self, offset: int, action): 29 | self.seek(offset) 30 | result = action(self.file) 31 | self.cursor = self.file.tell() 32 | return result 33 | 34 | 35 | class AsyncFile: 36 | def __init__(self, filename: str, io_num=4): 37 | self.size = getsize(filename) 38 | self.executor = ThreadPoolExecutor(io_num) 39 | self.io_que = deque((FastIO(filename) for _ in range(io_num)), io_num) 40 | 41 | async def read(self, offset: int, length: int): 42 | def async_call(): 43 | io = self.io_que.popleft() 44 | result = io.read(offset, length) 45 | self.io_que.append(io) 46 | return result 47 | 48 | return await loop.run_in_executor(self.executor, async_call) 49 | 50 | async def write(self, offset: int, data: bytes): 51 | assert self.size >= offset + len(data) 52 | 53 | def async_call(): 54 | io = self.io_que.popleft() 55 | io.write(offset, data) 56 | self.io_que.append(io) 57 | 58 | await loop.run_in_executor(self.executor, async_call) 59 | 60 | async def exec(self, offset: int, action): 61 | def async_call(): 62 | io = self.io_que.popleft() 63 | result = io.exec(offset, action) 64 | self.io_que.append(io) 65 | return result 66 | 67 | return await loop.run_in_executor(self.executor, async_call) 68 | 69 | def close(self): 70 | for io in self.io_que: 71 | io.file.close() 72 | -------------------------------------------------------------------------------- /Test.py: -------------------------------------------------------------------------------- 1 | from asyncio import get_event_loop, sleep, ensure_future 2 | from os import remove 3 | from os.path import isfile 4 | from random import randint 5 | 6 | from AsyncDB import AsyncDB 7 | 8 | T = 200000 9 | M = 200000 10 | FILE = 'Test.db' 11 | 12 | 13 | async def acid_t(): 14 | if isfile(FILE): 15 | remove(FILE) 16 | 17 | std = {} 18 | db = AsyncDB(FILE) 19 | 20 | async def compare(key, expect_value): 21 | db_value = await db[key] 22 | assert db_value == expect_value 23 | 24 | for _ in range(T): 25 | # 增改 26 | if randint(0, 1): 27 | rand_key = randint(0, M) 28 | rand_value = randint(0, M) 29 | print('set', rand_key) 30 | std[rand_key] = rand_value 31 | db[rand_key] = rand_value 32 | 33 | # 删 34 | if randint(0, 1): 35 | rand_key = randint(0, M) 36 | print('pop', rand_key) 37 | if rand_key in std: 38 | del std[rand_key] 39 | db.pop(rand_key) 40 | 41 | # 读 42 | if randint(0, 1): 43 | rand_key = randint(0, M) 44 | expect_value = std.get(rand_key) 45 | ensure_future(compare(rand_key, expect_value)) 46 | await sleep(0) 47 | 48 | # 遍历 49 | std_items = list(std.items()) 50 | for key, value in std_items: 51 | db_value = await db[key] 52 | assert db_value == value 53 | 54 | items = await db.items() 55 | for key, value in items: 56 | assert value == std[key] 57 | assert len(items) == len(std_items) 58 | print('iter OK') 59 | 60 | # 参数 61 | std_items.sort() 62 | max_i = len(std_items) 63 | i_from = randint(0, max_i - 1) 64 | i_to = randint(i_from + 1, max_i) 65 | sub_items = std_items[i_from:i_to] 66 | 67 | items = await db.items(item_from=sub_items[0][0], item_to=sub_items[-1][0]) 68 | assert items == sub_items 69 | items = await db.items(item_from=sub_items[0][0], item_to=sub_items[-1][0], reverse=True) 70 | assert items == sorted(sub_items, reverse=True) 71 | max_len = randint(1, M) 72 | items = await db.items(item_from=sub_items[0][0], item_to=sub_items[-1][0], max_len=max_len) 73 | assert len(items) == min(max_len, len(sub_items)) 74 | print('params OK') 75 | await db.close() 76 | 77 | db = AsyncDB(FILE) 78 | for key, value in std_items: 79 | db_value = await db[key] 80 | assert db_value == value 81 | await db.close() 82 | print('ACID OK') 83 | 84 | 85 | def main(): 86 | loop = get_event_loop() 87 | for _ in range(50): 88 | loop.run_until_complete(acid_t()) 89 | remove(FILE) 90 | 91 | 92 | if __name__ == '__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /AsyncDB/Allocator.py: -------------------------------------------------------------------------------- 1 | from bisect import insort, bisect_left 2 | from collections import UserList, UserDict 3 | 4 | 5 | class SizeQue(UserList): 6 | def __init__(self, max_len=1024): 7 | super().__init__() 8 | self.max_len = max_len 9 | 10 | def append(self, size: int) -> int: 11 | insort(self.data, size) 12 | if len(self.data) > self.max_len: 13 | return self.data.pop(0) 14 | 15 | def find(self, size: int) -> int: 16 | # 默认非空 17 | index = bisect_left(self.data, size) 18 | if index < len(self.data): 19 | return index 20 | 21 | 22 | class SizeMap(UserDict): 23 | def discard(self, size: int) -> int: 24 | ptrs = self.data.get(size) 25 | if ptrs: 26 | ptr = ptrs.pop() 27 | if not ptrs: 28 | del self.data[size] 29 | return ptr 30 | 31 | def add(self, size: int, ptr: int): 32 | ptrs = self.data.setdefault(size, []) 33 | insort(ptrs, ptr) 34 | 35 | 36 | class Allocator: 37 | def __init__(self): 38 | # [..., size] 39 | self.size_que = SizeQue() 40 | # {..., size: [..., ptr]} 41 | self.size_map = SizeMap() 42 | # {..., ptr: size} 43 | self.ptr_map = {} 44 | 45 | def malloc(self, size: int) -> int: 46 | if self.size_que: 47 | index = self.size_que.find(size) 48 | if index is not None: 49 | size_exist = self.size_que[index] 50 | ptr = self.size_map.discard(size_exist) 51 | if size_exist not in self.size_map: 52 | del self.size_que[index] 53 | del self.ptr_map[ptr] 54 | # 空间写回 55 | self.free(ptr + size, size_exist - size) 56 | return ptr 57 | 58 | def free(self, ptr: int, size: int): 59 | assert size >= 0 60 | if size == 0: 61 | return 62 | 63 | # 尝试合并 64 | tail_ptr = ptr + size 65 | while tail_ptr in self.ptr_map: 66 | tail_size = self.ptr_map.pop(tail_ptr) 67 | ptrs = self.size_map[tail_size] 68 | del ptrs[bisect_left(ptrs, tail_ptr)] 69 | if not self.size_map[tail_size]: 70 | del self.size_map[tail_size] 71 | del self.size_que[self.size_que.find(tail_size)] 72 | tail_ptr += tail_size 73 | size = tail_ptr - ptr 74 | 75 | if size in self.size_map: 76 | if len(self.size_map[size]) < self.size_que.max_len: 77 | self.ptr_map[ptr] = size 78 | self.size_map.add(size, ptr) 79 | else: 80 | size_remove = self.size_que.append(size) 81 | if size_remove == size: 82 | return 83 | # 未被移除 84 | else: 85 | self.ptr_map[ptr] = size 86 | self.size_map.add(size, ptr) 87 | # 溢出 88 | if size_remove: 89 | for ptr in self.size_map[size_remove]: 90 | del self.ptr_map[ptr] 91 | del self.size_map[size_remove] 92 | -------------------------------------------------------------------------------- /AsyncDB/Node.py: -------------------------------------------------------------------------------- 1 | from io import FileIO 2 | from pickle import dumps, load 3 | from struct import pack, unpack 4 | 5 | OP = b'\x00' 6 | ED = b'\x01' 7 | 8 | 9 | class IndexNode: 10 | def __init__(self, is_leaf=True, file: FileIO = None): 11 | self.ptr = 0 12 | self.size = 0 13 | 14 | if file is None: 15 | self.is_leaf = is_leaf 16 | self.keys = [] 17 | self.ptrs_value = [] 18 | if not is_leaf: 19 | self.ptrs_child = [] 20 | else: 21 | self.load(file) 22 | 23 | def __bytes__(self): 24 | result = dumps((self.is_leaf, self.keys)) + b''.join(pack('Q', ptr) for ptr in self.ptrs_value) 25 | if not self.is_leaf: 26 | result += b''.join(pack('Q', ptr) for ptr in self.ptrs_child) 27 | self.size = len(result) 28 | return result 29 | 30 | def load(self, file: FileIO): 31 | self.ptr = file.tell() 32 | self.is_leaf, self.keys = load(file) 33 | 34 | ptr_num = len(self.keys) 35 | if not self.is_leaf: 36 | ptr_num += (ptr_num + 1) 37 | ptrs = unpack('Q' * ptr_num, file.read(8 * ptr_num)) 38 | 39 | if self.is_leaf: 40 | self.ptrs_value = list(ptrs) 41 | else: 42 | ptr_num //= 2 43 | self.ptrs_value = list(ptrs[:ptr_num]) 44 | self.ptrs_child = list(ptrs[ptr_num:]) 45 | self.size = file.tell() - self.ptr 46 | 47 | def dump(self, file: FileIO): 48 | self.ptr = file.tell() 49 | file.write(bytes(self)) 50 | 51 | def clone(self): 52 | result = IndexNode(is_leaf=self.is_leaf) 53 | result.ptr = self.ptr 54 | result.size = self.size 55 | 56 | result.keys = self.keys[:] 57 | result.ptrs_value = self.ptrs_value[:] 58 | if not result.is_leaf: 59 | result.ptrs_child = self.ptrs_child[:] 60 | return result 61 | 62 | def nth_child_ads(self, n: int) -> int: 63 | assert self.ptr > 0 and self.size > 0 and not self.is_leaf 64 | # children_num = val_num + 1 65 | return self.ptr + self.size - (len(self.keys) + 1 - n) * 8 66 | 67 | def nth_value_ads(self, n: int) -> int: 68 | assert self.ptr > 0 and self.size > 0 69 | tail = self.ptr + self.size if self.is_leaf else self.nth_child_ads(0) 70 | return tail - (len(self.keys) - n) * 8 71 | 72 | 73 | class ValueNode: 74 | def __init__(self, key=None, value=None, file: FileIO = None): 75 | self.ptr = 0 76 | self.size = 0 77 | 78 | if file is None: 79 | self.key = key 80 | self.value = value 81 | else: 82 | self.load(file) 83 | 84 | def __bytes__(self): 85 | assert self.key is not None 86 | result = ED + dumps((self.key, self.value)) 87 | self.size = len(result) 88 | return result 89 | 90 | def load(self, file: FileIO): 91 | self.ptr = file.tell() 92 | indicator = file.read(1) 93 | assert indicator in (OP, ED) 94 | self.key, self.value = load(file) 95 | self.size = file.tell() - self.ptr 96 | 97 | def dump(self, file: FileIO): 98 | self.ptr = file.tell() 99 | file.write(bytes(self)) 100 | -------------------------------------------------------------------------------- /AsyncDB/TaskQue.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect 2 | from collections import deque, namedtuple 3 | 4 | Memo = namedtuple('Memo', 'head tail') 5 | 6 | 7 | class Task: 8 | # Query有对应Task,用于维护映射 9 | def __init__(self, task_id: int, is_active: bool, command_num=0): 10 | self.id = task_id 11 | self.is_active = is_active 12 | self.command_num = command_num 13 | 14 | if is_active: 15 | self.ptrs = [] 16 | self.free_param = None 17 | 18 | def __lt__(self, other: 'Task'): 19 | return self.id < other.id 20 | 21 | 22 | class TaskQue: 23 | # 通过Queue确保异步下的ACID 24 | def __init__(self): 25 | self.next_id = 0 26 | self.que = deque() 27 | # {..., ptr: ([..., id], [..., memo])} 28 | self.virtual_map = {} 29 | 30 | def create(self, is_active: bool) -> Task: 31 | if is_active or not self.que or self.que[-1].is_active: 32 | token = Task(self.next_id, is_active) 33 | self.next_id += 1 34 | self.que.append(token) 35 | else: 36 | token = self.que[-1] 37 | return token 38 | 39 | def set(self, token: Task, ptr: int, head, tail): 40 | if ptr == 0: 41 | return 42 | 43 | memo = Memo(head, tail) 44 | if ptr in self.virtual_map: 45 | id_list, memo_list = self.virtual_map[ptr] 46 | else: 47 | id_list = [] 48 | memo_list = [] 49 | self.virtual_map[ptr] = (id_list, memo_list) 50 | 51 | # 复用 52 | if id_list and id_list[-1] == token.id: 53 | memo_list[-1] = memo 54 | else: 55 | id_list.append(token.id) 56 | memo_list.append(memo) 57 | token.ptrs.append(ptr) 58 | 59 | def get(self, token: Task, ptr: int, depend=0, is_active=True): 60 | def get_depend_id(): 61 | if depend in self.virtual_map: 62 | id_list, _ = self.virtual_map[depend] 63 | index = bisect(id_list, token.id) 64 | if index - 1 >= 0: 65 | return id_list[index - 1] 66 | elif index < len(id_list): 67 | return id_list[index] 68 | else: 69 | return 0 70 | 71 | # 查询 72 | if ptr in self.virtual_map: 73 | depend_id = get_depend_id() 74 | id_list, memo_list = self.virtual_map[ptr] 75 | index = bisect(id_list, token.id) 76 | 77 | result = None 78 | if index - 1 >= 0 and depend_id <= id_list[index - 1]: 79 | result = memo_list[index - 1].tail 80 | elif index < len(id_list) and depend_id <= id_list[index]: 81 | result = memo_list[index].head 82 | 83 | if is_active and not (isinstance(result, int) or result is None): 84 | result = result.clone() 85 | return result 86 | 87 | def is_canceled(self, token: Task, ptr: int) -> bool: 88 | if ptr in self.virtual_map: 89 | id_list, memo_list = self.virtual_map[ptr] 90 | if id_list[-1] > token.id or not memo_list[-1].tail: 91 | return True 92 | 93 | def clean(self): 94 | while self.que: 95 | head = self.que.popleft() 96 | if head.command_num > 0: 97 | self.que.appendleft(head) 98 | break 99 | else: 100 | if head.is_active: 101 | head.free_param() 102 | for ptr in head.ptrs: 103 | id_list, memo_list = self.virtual_map[ptr] 104 | del id_list[0] 105 | del memo_list[0] 106 | if not id_list: 107 | del self.virtual_map[ptr] 108 | else: 109 | # 重置 110 | self.next_id = 0 111 | -------------------------------------------------------------------------------- /AsyncDB/Engine.py: -------------------------------------------------------------------------------- 1 | from asyncio import ensure_future, Lock, sleep, get_event_loop 2 | from bisect import insort, bisect, bisect_left 3 | from collections import UserList 4 | from contextlib import suppress 5 | from multiprocessing import Process 6 | from os import rename, remove 7 | from os.path import getsize, isfile 8 | from pickle import load, UnpicklingError 9 | from struct import pack, unpack 10 | 11 | from .Allocator import Allocator 12 | from .AsyncFile import AsyncFile 13 | from .Node import IndexNode, ValueNode 14 | from .TaskQue import TaskQue, Task 15 | 16 | 17 | class SortedList(UserList): 18 | def append(self, item): 19 | insort(self.data, item) 20 | 21 | 22 | OP = b'\x00' 23 | ED = b'\x01' 24 | MIN_DEGREE = 128 25 | 26 | 27 | class BasicEngine: 28 | # 基础事务 29 | def __init__(self, filename: str): 30 | if not isfile(filename): 31 | with open(filename, 'wb') as file: 32 | # indicator 33 | file.write(OP) 34 | # root 35 | file.write(pack('Q', 9)) 36 | self.root = IndexNode(is_leaf=True) 37 | self.root.dump(file) 38 | else: 39 | with open(filename, 'rb+') as file: 40 | if file.read(1) == OP: 41 | file.close() 42 | p = Process(target=repair, args=(filename,)) 43 | p.start() 44 | p.join() 45 | return self.__init__(filename) 46 | else: 47 | ptr = unpack('Q', file.read(8))[0] 48 | file.seek(ptr) 49 | self.root = IndexNode(file=file) 50 | file.seek(0) 51 | file.write(OP) 52 | 53 | self.allocator = Allocator() 54 | self.async_file = AsyncFile(filename) 55 | self.command_que = SortedList() 56 | self.file = open(filename, 'rb+', buffering=0) 57 | self.lock = Lock() 58 | self.on_interval = (0, 1) 59 | self.on_write = False 60 | self.task_que = TaskQue() 61 | 62 | def malloc(self, size: int) -> int: 63 | def is_inside(ptr: int) -> bool: 64 | if self.on_write: 65 | begin, end = self.on_interval 66 | return min(ptr + size, end) - max(ptr, begin) >= 0 67 | 68 | ptr = self.allocator.malloc(size) 69 | if ptr and is_inside(ptr): 70 | self.free(ptr, size) 71 | ptr = 0 72 | if not ptr: 73 | ptr = self.async_file.size 74 | if is_inside(ptr): 75 | ptr += 1 76 | self.async_file.size += 1 77 | self.async_file.size += size 78 | return ptr 79 | 80 | def free(self, ptr: int, size: int): 81 | self.allocator.free(ptr, size) 82 | 83 | def time_travel(self, token: Task, node: IndexNode): 84 | address = node.nth_value_ads(0) 85 | for i in range(len(node.ptrs_value)): 86 | ptr = self.task_que.get(token, address, node.ptr) 87 | if ptr: 88 | node.ptrs_value[i] = ptr 89 | address += 8 90 | if not node.is_leaf: 91 | for i in range(len(node.ptrs_child)): 92 | ptr = self.task_que.get(token, address, node.ptr) 93 | if ptr: 94 | node.ptrs_child[i] = ptr 95 | address += 8 96 | 97 | def a_command_done(self, token: Task): 98 | token.command_num -= 1 99 | if token.command_num == 0: 100 | self.task_que.clean() 101 | if not self.task_que.que and self.lock.locked(): 102 | self.lock.release() 103 | 104 | # cumulation 105 | def do_cum(self, token: Task, free_nodes, command_map): 106 | def func(): 107 | for node in free_nodes: 108 | self.free(node.ptr, node.size) 109 | 110 | token.free_param = func 111 | for ptr, param in command_map.items(): 112 | data, depend = param if isinstance(param, tuple) else (param, 0) 113 | self.ensure_write(token, ptr, data, depend) 114 | self.time_travel(token, self.root) 115 | self.root = self.root.clone() 116 | 117 | def ensure_write(self, token: Task, ptr: int, data: bytes, depend=0): 118 | async def coro(): 119 | while self.command_que: 120 | ptr, token, data, depend = self.command_que.pop(0) 121 | cancel = depend and self.task_que.is_canceled(token, depend) 122 | if not cancel: 123 | cancel = self.task_que.is_canceled(token, ptr) 124 | if not cancel: 125 | # 确保边界不相连 126 | self.on_interval = (ptr - 1, ptr + len(data) + 1) 127 | await self.async_file.write(ptr, data) 128 | self.a_command_done(token) 129 | self.on_write = False 130 | 131 | if not self.on_write: 132 | self.on_write = True 133 | ensure_future(coro()) 134 | # 按ptr和token.id排序 135 | self.command_que.append((ptr, token, data, depend)) 136 | token.command_num += 1 137 | 138 | def close(self): 139 | self.file.seek(0) 140 | self.file.write(ED) 141 | self.file.close() 142 | self.async_file.close() 143 | 144 | 145 | def repair(filename: str): 146 | async def coro(): 147 | temp = '__' + filename 148 | engine = Engine(temp) 149 | 150 | size = getsize(filename) 151 | with open(filename, 'rb') as file: 152 | file.seek(9) 153 | while file.tell() != size: 154 | indicator = file.read(1) 155 | if indicator != ED: 156 | continue 157 | with suppress(EOFError, UnpicklingError): 158 | item = load(file) 159 | if isinstance(item, tuple) and len(item) == 2: 160 | engine.set(*item) 161 | await sleep(0) 162 | 163 | if engine.task_que.que: 164 | await engine.lock.acquire() 165 | await engine.lock.acquire() 166 | engine.close() 167 | remove(filename) 168 | rename(temp, filename) 169 | 170 | loop = get_event_loop() 171 | loop.run_until_complete(coro()) 172 | 173 | 174 | class Engine(BasicEngine): 175 | # B-Tree核心 176 | async def get(self, key): 177 | token = self.task_que.create(is_active=False) 178 | token.command_num += 1 179 | 180 | async def travel(ptr: int): 181 | init = self.task_que.get(token, ptr, is_active=False) 182 | if not init: 183 | init = await self.async_file.exec(ptr, lambda f: IndexNode(file=f)) 184 | 185 | index = bisect(init.keys, key) 186 | if init.keys[index - 1] == key: 187 | ptr = self.task_que.get(token, init.nth_value_ads(index - 1), init.ptr) or init.ptrs_value[index - 1] 188 | val = await self.async_file.exec(ptr, lambda f: ValueNode(file=f)) 189 | assert val.key == key 190 | self.a_command_done(token) 191 | return val.value 192 | 193 | elif not init.is_leaf: 194 | ptr = self.task_que.get(token, init.nth_child_ads(index), init.ptr) or init.ptrs_child[index] 195 | return await travel(ptr) 196 | else: 197 | return self.a_command_done(token) 198 | 199 | # root ptrs实时更新 200 | index = bisect(self.root.keys, key) 201 | if index - 1 >= 0 and self.root.keys[index - 1] == key: 202 | ptr = self.root.ptrs_value[index - 1] 203 | val = await self.async_file.exec(ptr, lambda f: ValueNode(file=f)) 204 | assert val.key == key 205 | self.a_command_done(token) 206 | return val.value 207 | 208 | elif not self.root.is_leaf: 209 | return await travel(self.root.ptrs_child[index]) 210 | else: 211 | return self.a_command_done(token) 212 | 213 | def set(self, key, value): 214 | token = self.task_que.create(is_active=True) 215 | free_nodes = [] 216 | # {..., ptr: data OR (data, depend)} 217 | command_map = {} 218 | 219 | def replace(address: int, ptr: int, depend: int): 220 | self.file.seek(ptr) 221 | org_val = ValueNode(file=self.file) 222 | if org_val.value != value: 223 | # 写入新Val 224 | val = ValueNode(key, value) 225 | self.file.seek(self.async_file.size) 226 | val.dump(self.file) 227 | self.async_file.size += val.size 228 | # 状态设为0 229 | self.file.seek(org_val.ptr) 230 | self.file.write(OP) 231 | 232 | # 释放 233 | free_nodes.append(org_val) 234 | # 同步 235 | self.task_que.set(token, address, org_val.ptr, val.ptr) 236 | # 命令 237 | self.ensure_write(token, address, pack('Q', val.ptr), depend) 238 | self.do_cum(token, free_nodes, command_map) 239 | 240 | def split(address: int, par: IndexNode, child_index: int, child: IndexNode, depend: int): 241 | org_par = par.clone() 242 | org_child = child.clone() 243 | 244 | # 一半数据给sibling 245 | mi = (len(child.keys) - 1) // 2 + 1 246 | sibling = IndexNode(is_leaf=child.is_leaf) 247 | sibling.keys = child.keys[mi:] 248 | sibling.ptrs_value = child.ptrs_value[mi:] 249 | del child.keys[mi:] 250 | del child.ptrs_value[mi:] 251 | if not sibling.is_leaf: 252 | sibling.ptrs_child = child.ptrs_child[mi:] 253 | del child.ptrs_child[mi:] 254 | 255 | # parent需一个值 256 | par.keys.insert(child_index, child.keys.pop()) 257 | par.ptrs_value.insert(child_index, child.ptrs_value.pop()) 258 | 259 | # 分配空间 260 | child_b = bytes(child) 261 | sibling_b = bytes(sibling) 262 | child.ptr = self.malloc(child.size) 263 | sibling.ptr = self.malloc(sibling.size) 264 | 265 | par.ptrs_child[child_index] = child.ptr 266 | par.ptrs_child.insert(child_index + 1, sibling.ptr) 267 | par_b = bytes(par) 268 | par.ptr = self.malloc(par.size) 269 | # 更新完毕 270 | 271 | # 释放 272 | free_nodes.extend((org_par, org_child)) 273 | # 同步 274 | _ = None 275 | for ptr, head, tail in ((address, org_par.ptr, par.ptr), 276 | (org_par.ptr, org_par, _), (org_child.ptr, org_child, _), 277 | (par.ptr, _, par), (child.ptr, _, child), (sibling.ptr, _, sibling)): 278 | self.task_que.set(token, ptr, head, tail) 279 | # 命令 280 | command_map.update({address: (pack('Q', par.ptr), depend), 281 | par.ptr: par_b, child.ptr: child_b, sibling.ptr: sibling_b}) 282 | 283 | cursor = self.root 284 | address = 1 285 | depend = 0 286 | # root准满载 287 | if len(cursor.keys) == 2 * MIN_DEGREE - 1: 288 | # 新建root 289 | root = IndexNode(is_leaf=False) 290 | root.ptrs_child.append(self.root.ptr) 291 | split(address, root, 0, self.root, depend) 292 | self.root = cursor = root 293 | index = bisect(cursor.keys, key) 294 | # 检查key是否已存在 295 | if cursor.keys and cursor.keys[index - 1] == key: 296 | return replace(cursor.nth_value_ads(index - 1), cursor.ptrs_value[index - 1], cursor.ptr) 297 | 298 | # 向下循环直到叶节点 299 | while not cursor.is_leaf: 300 | index = bisect(cursor.keys, key) 301 | ptr = cursor.ptrs_child[index] 302 | child = self.task_que.get(token, ptr) 303 | if not child: 304 | self.file.seek(ptr) 305 | child = IndexNode(file=self.file) 306 | self.time_travel(token, child) 307 | 308 | i = bisect_left(child.keys, key) 309 | if i < len(child.keys) and child.keys[i] == key: 310 | return replace(child.nth_value_ads(i), child.ptrs_value[i], child.ptr) 311 | 312 | if len(child.keys) == 2 * MIN_DEGREE - 1: 313 | split(address, cursor, index, child, depend) 314 | if cursor.keys[index] < key: 315 | # 路径转移至sibling,且必存在于task_que 316 | index += 1 317 | ptr = cursor.ptrs_child[index] 318 | child = self.task_que.get(token, ptr) 319 | address = cursor.nth_child_ads(index) 320 | depend = cursor.ptr 321 | cursor = child 322 | 323 | # 到达叶节点 324 | val = ValueNode(key, value) 325 | val_b = bytes(val) 326 | val.ptr = self.malloc(val.size) 327 | self.file.seek(val.ptr) 328 | self.file.write(val_b) 329 | 330 | org_cursor = cursor.clone() 331 | index = bisect(cursor.keys, key) 332 | cursor.keys.insert(index, val.key) 333 | cursor.ptrs_value.insert(index, val.ptr) 334 | cursor_b = bytes(cursor) 335 | cursor.ptr = self.malloc(cursor.size) 336 | # 更新完毕 337 | 338 | # 释放 339 | free_nodes.append(org_cursor) 340 | # 同步 341 | _ = None 342 | for ptr, head, tail in ((address, org_cursor.ptr, cursor.ptr), 343 | (org_cursor.ptr, org_cursor, _), (cursor.ptr, _, cursor)): 344 | self.task_que.set(token, ptr, head, tail) 345 | # 命令 346 | command_map.update({address: (pack('Q', cursor.ptr), depend), cursor.ptr: cursor_b}) 347 | self.do_cum(token, free_nodes, command_map) 348 | 349 | def pop(self, key): 350 | token = self.task_que.create(is_active=True) 351 | free_nodes = [] 352 | command_map = {} 353 | 354 | def indicate(val: ValueNode): 355 | self.file.seek(val.ptr) 356 | self.file.write(OP) 357 | free_nodes.append(val) 358 | 359 | def fetch(ptr: int) -> IndexNode: 360 | result = self.task_que.get(token, ptr) 361 | if not result: 362 | self.file.seek(ptr) 363 | result = IndexNode(file=self.file) 364 | self.time_travel(token, result) 365 | return result 366 | 367 | def left_to_right(address: int, par: IndexNode, val_index: int, 368 | left_child: IndexNode, right_child: IndexNode, depend: int): 369 | org_par = par.clone() 370 | org_left = left_child.clone() 371 | org_right = right_child.clone() 372 | 373 | # 内存 374 | last_val_key = left_child.keys.pop() 375 | last_val_ptr = left_child.ptrs_value.pop() 376 | val_key = par.keys[val_index] 377 | val_ptr = par.ptrs_value[val_index] 378 | 379 | par.keys[val_index] = last_val_key 380 | par.ptrs_value[val_index] = last_val_ptr 381 | right_child.keys.insert(0, val_key) 382 | right_child.ptrs_value.insert(0, val_ptr) 383 | 384 | if not left_child.is_leaf: 385 | last_ptr_child = left_child.ptrs_child.pop() 386 | right_child.ptrs_child.insert(0, last_ptr_child) 387 | 388 | # 空间 389 | left_b = bytes(left_child) 390 | right_b = bytes(right_child) 391 | left_child.ptr = self.malloc(left_child.size) 392 | right_child.ptr = self.malloc(right_child.size) 393 | 394 | par.ptrs_child[val_index] = left_child.ptr 395 | par.ptrs_child[val_index + 1] = right_child.ptr 396 | par_b = bytes(par) 397 | par.ptr = self.malloc(par.size) 398 | # 更新完毕 399 | 400 | # 释放 401 | free_nodes.extend((org_par, org_left, org_right)) 402 | # 同步 403 | _ = None 404 | for ptr, head, tail in ((address, org_par.ptr, par.ptr), 405 | (org_par.ptr, org_par, _), (par.ptr, _, par), 406 | (org_left.ptr, org_left, _), (left_child.ptr, _, left_child), 407 | (org_right.ptr, org_right, _), (right_child.ptr, _, right_child)): 408 | self.task_que.set(token, ptr, head, tail) 409 | # 命令 410 | command_map.update({address: (pack('Q', par.ptr), depend), 411 | par.ptr: par_b, left_child.ptr: left_b, right_child.ptr: right_b}) 412 | 413 | def right_to_left(address: int, par: IndexNode, val_index: int, 414 | left_child: IndexNode, right_child: IndexNode, depend: int): 415 | org_par = par.clone() 416 | org_left = left_child.clone() 417 | org_right = right_child.clone() 418 | 419 | # 内存 420 | first_val_key = right_child.keys.pop(0) 421 | first_val_ptr = right_child.ptrs_value.pop(0) 422 | val_key = par.keys[val_index] 423 | val_ptr = par.ptrs_value[val_index] 424 | 425 | par.keys[val_index] = first_val_key 426 | par.ptrs_value[val_index] = first_val_ptr 427 | left_child.keys.append(val_key) 428 | left_child.ptrs_value.append(val_ptr) 429 | 430 | if not right_child.is_leaf: 431 | first_ptr_child = right_child.ptrs_child.pop(0) 432 | left_child.ptrs_child.append(first_ptr_child) 433 | 434 | # 空间 435 | left_b = bytes(left_child) 436 | right_b = bytes(right_child) 437 | left_child.ptr = self.malloc(left_child.size) 438 | right_child.ptr = self.malloc(right_child.size) 439 | 440 | par.ptrs_child[val_index] = left_child.ptr 441 | par.ptrs_child[val_index + 1] = right_child.ptr 442 | par_b = bytes(par) 443 | par.ptr = self.malloc(par.size) 444 | # 更新完毕 445 | 446 | # 释放 447 | free_nodes.extend((org_par, org_left, org_right)) 448 | # 同步 449 | _ = None 450 | for ptr, head, tail in ((address, org_par.ptr, par.ptr), 451 | (org_par.ptr, org_par, _), (par.ptr, _, par), 452 | (org_left.ptr, org_left, _), (left_child.ptr, _, left_child), 453 | (org_right.ptr, org_right, _), (right_child.ptr, _, right_child)): 454 | self.task_que.set(token, ptr, head, tail) 455 | # 命令 456 | command_map.update({address: (pack('Q', par.ptr), depend), 457 | par.ptr: par_b, left_child.ptr: left_b, right_child.ptr: right_b}) 458 | 459 | def merge_left(address: int, par: IndexNode, val_index: int, 460 | left_child: IndexNode, cursor: IndexNode, depend: int): 461 | org_par = par.clone() 462 | org_cursor = cursor.clone() 463 | 464 | # 内存 465 | val_key = par.keys.pop(val_index) 466 | val_ptr = par.ptrs_value.pop(val_index) 467 | del par.ptrs_child[val_index] 468 | 469 | cursor.keys = [*left_child.keys, val_key, *cursor.keys] 470 | cursor.ptrs_value = [*left_child.ptrs_value, val_ptr, *cursor.ptrs_value] 471 | if not left_child.is_leaf: 472 | cursor.ptrs_child = [*left_child.ptrs_child, *cursor.ptrs_child] 473 | 474 | # 空间 475 | cursor_b = bytes(cursor) 476 | cursor.ptr = self.malloc(cursor.size) 477 | 478 | par.ptrs_child[val_index] = cursor.ptr 479 | par_b = bytes(par) 480 | par.ptr = self.malloc(par.size) 481 | # 更新完毕 482 | 483 | # 释放 484 | free_nodes.extend((org_par, org_cursor, left_child)) 485 | # 同步 486 | _ = None 487 | for ptr, head, tail in ((address, org_par.ptr, par.ptr), 488 | (org_par.ptr, org_par, _), (par.ptr, _, par), 489 | (org_cursor.ptr, org_cursor, _), (cursor.ptr, _, cursor), 490 | (left_child.ptr, left_child, _)): 491 | self.task_que.set(token, ptr, head, tail) 492 | # 命令 493 | command_map.update({address: (pack('Q', par.ptr), depend), par.ptr: par_b, cursor.ptr: cursor_b}) 494 | 495 | def merge_right(address: int, par: IndexNode, val_index: int, 496 | cursor: IndexNode, right_child: IndexNode, depend: int): 497 | org_par = par.clone() 498 | org_cursor = cursor.clone() 499 | 500 | # 内存 501 | val_key = par.keys.pop(val_index) 502 | val_ptr = par.ptrs_value.pop(val_index) 503 | del par.ptrs_child[val_index + 1] 504 | 505 | cursor.keys.extend((val_key, *right_child.keys)) 506 | cursor.ptrs_value.extend((val_ptr, *right_child.ptrs_value)) 507 | if not cursor.is_leaf: 508 | cursor.ptrs_child.extend(right_child.ptrs_child) 509 | 510 | # 空间 511 | cursor_b = bytes(cursor) 512 | cursor.ptr = self.malloc(cursor.size) 513 | 514 | par.ptrs_child[val_index] = cursor.ptr 515 | par_b = bytes(par) 516 | par.ptr = self.malloc(par.size) 517 | # 更新完毕 518 | 519 | # 释放 520 | free_nodes.extend((org_par, org_cursor, right_child)) 521 | # 同步 522 | _ = None 523 | for ptr, head, tail in ((address, org_par.ptr, par.ptr), 524 | (org_par.ptr, org_par, _), (par.ptr, _, par), 525 | (org_cursor.ptr, org_cursor, _), (cursor.ptr, _, cursor), 526 | (right_child.ptr, right_child, _)): 527 | self.task_que.set(token, ptr, head, tail) 528 | # 命令 529 | command_map.update({address: (pack('Q', par.ptr), depend), par.ptr: par_b, cursor.ptr: cursor_b}) 530 | 531 | def travel(address: int, init: IndexNode, key, depend: int): 532 | index = bisect(init.keys, key) - 1 533 | 534 | def key_in_leaf(): 535 | org_init = init.clone() 536 | self.file.seek(init.ptrs_value[index]) 537 | val = ValueNode(file=self.file) 538 | # 内存 539 | del init.keys[index] 540 | del init.ptrs_value[index] 541 | # 空间 542 | init_b = bytes(init) 543 | init.ptr = self.malloc(init.size) 544 | # 释放 545 | indicate(val) 546 | free_nodes.append(org_init) 547 | # 同步 548 | _ = None 549 | for ptr, head, tail in ((address, org_init.ptr, init.ptr), 550 | (org_init.ptr, org_init, _), (init.ptr, _, init)): 551 | self.task_que.set(token, ptr, head, tail) 552 | # 命令 553 | command_map.update({address: (pack('Q', init.ptr), depend), init.ptr: init_b}) 554 | return val.value 555 | 556 | def root_empty(successor: IndexNode): 557 | free_nodes.append(self.root) 558 | _ = None 559 | for ptr, head, tail in ((address, self.root.ptr, successor.ptr), 560 | (self.root.ptr, self.root, _), (successor.ptr, _, successor)): 561 | self.task_que.set(token, ptr, head, tail) 562 | command_map[address] = pack('Q', successor.ptr) 563 | self.root = successor 564 | 565 | # 已定位 566 | if index >= 0 and init.keys[index] == key: 567 | # 位于叶节点 568 | if init.is_leaf: 569 | return key_in_leaf() 570 | # 位于内部节点 571 | else: 572 | left_ptr = init.ptrs_child[index] 573 | left_child = fetch(left_ptr) 574 | right_ptr = init.ptrs_child[index + 1] 575 | right_child = fetch(right_ptr) 576 | 577 | # 左 >= t 578 | if len(left_child.keys) >= MIN_DEGREE: 579 | left_to_right(address, init, index, left_child, right_child, depend) 580 | return travel(init.nth_child_ads(index + 1), right_child, key, init.ptr) 581 | # 右 >= t 582 | elif len(right_child.keys) >= MIN_DEGREE: 583 | right_to_left(address, init, index, left_child, right_child, depend) 584 | return travel(init.nth_child_ads(index), left_child, key, init.ptr) 585 | # 左右均 < t 586 | else: 587 | merge_left(address, init, index, left_child, right_child, depend) 588 | if len(self.root.keys) == 0: 589 | root_empty(right_child) 590 | return travel(init.nth_child_ads(index), right_child, key, init.ptr) 591 | # 向下寻找 592 | elif not init.is_leaf: 593 | index += 1 594 | ptr = init.ptrs_child[index] 595 | cursor = fetch(ptr) 596 | 597 | # 目标 < t 598 | if len(cursor.keys) < MIN_DEGREE: 599 | left_sibling = right_sibling = None 600 | 601 | if index - 1 >= 0: 602 | left_ptr = init.ptrs_child[index - 1] 603 | left_sibling = fetch(left_ptr) 604 | # 左 >= t 605 | if len(left_sibling.keys) >= MIN_DEGREE: 606 | left_to_right(address, init, index - 1, left_sibling, cursor, depend) 607 | return travel(init.nth_child_ads(index), cursor, key, init.ptr) 608 | 609 | if index + 1 < len(init.ptrs_child): 610 | right_ptr = init.ptrs_child[index + 1] 611 | right_sibling = fetch(right_ptr) 612 | # 右 >= t 613 | if len(right_sibling.keys) >= MIN_DEGREE: 614 | right_to_left(address, init, index, cursor, right_sibling, depend) 615 | return travel(init.nth_child_ads(index), cursor, key, init.ptr) 616 | 617 | # 无 >= t 618 | if left_sibling: 619 | index -= 1 620 | merge_left(address, init, index, left_sibling, cursor, depend) 621 | else: 622 | merge_right(address, init, index, cursor, right_sibling, depend) 623 | if len(self.root.keys) == 0: 624 | root_empty(cursor) 625 | return travel(init.nth_child_ads(index), cursor, key, init.ptr) 626 | 627 | travel(1, self.root, key, 0) 628 | self.do_cum(token, free_nodes, command_map) 629 | 630 | async def items(self, item_from=None, item_to=None, max_len=0, reverse=False): 631 | assert item_from <= item_to if item_from and item_to else True 632 | token = self.task_que.create(is_active=False) 633 | token.command_num += 1 634 | result = [] 635 | 636 | async def travel(init: IndexNode): 637 | async def get_item(index: int): 638 | ptr = init.ptrs_value[index] 639 | val = await self.async_file.exec(ptr, lambda f: ValueNode(file=f)) 640 | return val.key, val.value 641 | 642 | async def get_child(index: int) -> IndexNode: 643 | ptr = init.ptrs_child[index] 644 | child = self.task_que.get(token, ptr, is_active=False) 645 | if not child: 646 | child = await self.async_file.exec(ptr, lambda f: IndexNode(file=f)) 647 | self.time_travel(token, child) 648 | return child 649 | 650 | # lo_key >= item_from 651 | # hi_key > item_to 652 | lo = 0 if item_from is None else bisect_left(init.keys, item_from) 653 | hi = len(init.keys) if item_to is None else bisect(init.keys, item_to) 654 | 655 | extend = not init.is_leaf and (item_from is None or lo == len(init.keys) or init.keys[lo] > item_from) 656 | if not reverse and extend: 657 | await travel(await get_child(lo)) 658 | for i in range(lo, hi) if not reverse else reversed(range(lo, hi)): 659 | if reverse and not init.is_leaf: 660 | await travel(await get_child(i + 1)) 661 | 662 | if max_len and len(result) >= max_len: 663 | return 664 | item = await get_item(i) 665 | result.append(item) 666 | 667 | if not reverse and not init.is_leaf: 668 | await travel(await get_child(i + 1)) 669 | if reverse and extend: 670 | await travel(await get_child(lo)) 671 | 672 | await travel(self.root) 673 | self.a_command_done(token) 674 | return result 675 | --------------------------------------------------------------------------------