├── utils ├── filter.py ├── parse.py ├── constants.py ├── system_operations │ ├── fio_runner.py │ └── get_sys_info.py ├── graph.py └── utils.py ├── requirements.txt ├── docker ├── Dockerfile └── docker_runner.py ├── gpt ├── gpt_request.py └── prompts_generator.py ├── README.md ├── options_files ├── default_options_files │ ├── initial_options_file.ini │ ├── dbbench_default_options-7.10.2.ini │ ├── bad_options.ini │ ├── good_options.ini │ ├── rocksdb_default_options.ini │ └── dbbench_default_options-8.8.1.ini └── ops_options_file.py ├── main.py └── rocksdb ├── parse_db_bench_output.py └── subprocess_manager.py /utils/filter.py: -------------------------------------------------------------------------------- 1 | def key_filter(key): 2 | if (key == 'wal_size_limit_mb'): 3 | key = 'WAL_size_limit_MB' 4 | if (key == 'wal_ttl_seconds'): 5 | key = 'WAL_ttl_seconds' 6 | return key 7 | 8 | # Options that should not be changed 9 | BLACKLIST = ['use_direct_io_for_flush_and_compaction', 10 | 'use_direct_reads', 'compression_type'] 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.1.0 3 | black==23.11.0 4 | certifi==2023.11.17 5 | click==8.1.7 6 | distro==1.8.0 7 | exceptiongroup==1.2.0 8 | h11==0.14.0 9 | httpcore==1.0.2 10 | httpx==0.25.2 11 | idna==3.6 12 | mypy-extensions==1.0.0 13 | openai==1.3.8 14 | packaging==23.2 15 | pathspec==0.12.0 16 | platformdirs==4.1.0 17 | psutil==5.9.6 18 | py-cpuinfo==9.0.0 19 | pydantic==2.5.2 20 | pydantic_core==2.14.5 21 | python-dotenv==1.0.0 22 | sniffio==1.3.0 23 | tomli==2.0.1 24 | tqdm==4.66.1 25 | typing_extensions==4.8.0 26 | matplotlib==3.7.4 27 | deepdiff==6.7.1 28 | cgroup-monitor==0.1.2 29 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ARG TARGETPLATFORM 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | 6 | # Install dependencies 7 | RUN apt-get update && apt-get install -y \ 8 | build-essential \ 9 | libgflags-dev \ 10 | libsnappy-dev \ 11 | zlib1g-dev \ 12 | libbz2-dev \ 13 | liblz4-dev \ 14 | libzstd-dev \ 15 | cmake \ 16 | git \ 17 | python3 \ 18 | python3-pip \ 19 | wget \ 20 | fio \ 21 | libjemalloc2 22 | 23 | # Setup RocksDB 24 | RUN wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz && \ 25 | tar -xzf v8.8.1.tar.gz && \ 26 | cd rocksdb-8.8.1 && \ 27 | make -j48 static_lib db_bench 28 | 29 | # Setup GPT Repo's requirements 30 | ADD gpt-assisted-rocksdb-config/requirements.txt /requirements.txt 31 | 32 | # Setup Python requirments 33 | RUN pip3 install -r requirements.txt 34 | 35 | # Setup GPT Repo 36 | ADD gpt-assisted-rocksdb-config /gpt-assisted-rocksdb-config 37 | 38 | WORKDIR /gpt-assisted-rocksdb-config 39 | CMD ["python3", "main.py"] 40 | -------------------------------------------------------------------------------- /utils/parse.py: -------------------------------------------------------------------------------- 1 | 2 | import configparser 3 | from utils.filter import key_filter 4 | 5 | def dict_to_configparser(dictionary): 6 | ''' 7 | Function to convert a dictionary to a configparser object 8 | 9 | Parameters: 10 | - dictionary (dict): The dictionary to be converted 11 | 12 | Returns: 13 | - config (configparser.ConfigParser): The configparser object 14 | ''' 15 | config = configparser.ConfigParser() 16 | 17 | for section, options in dictionary.items(): 18 | config[section] = {} 19 | for key, value in options.items(): 20 | config[section][key] = value 21 | 22 | return config 23 | 24 | def configparser_to_string(config_parser): 25 | ''' 26 | Function to convert a configparser object to a string 27 | 28 | Parameters: 29 | - config_parser (configparser.ConfigParser): The configparser object 30 | 31 | Returns: 32 | - string_representation (str): The string representation of the configparser object 33 | ''' 34 | string_representation = '' 35 | for section in config_parser.sections(): 36 | string_representation += f"[{section}]\n" 37 | for key, value in config_parser[section].items(): 38 | key = key_filter(key) 39 | string_representation += f" {key}={value}\n" 40 | string_representation += '\n' 41 | return string_representation -------------------------------------------------------------------------------- /gpt/gpt_request.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from openai import OpenAI 4 | 5 | # Environment variables 6 | client = OpenAI() 7 | client.api_key = os.getenv("OPENAI_API_KEY") 8 | 9 | def request_gpt(system_content, user_contents, temperature): 10 | ''' 11 | Function to make an API call to GPT-4 12 | 13 | Parameters: 14 | - system_content: string containing the system information 15 | - chunk_string: string containing the chunk of the options file 16 | - previous_option_files: list of tuples containing the previous option files and their benchmark results 17 | - temperature: Float (0-1) controlling GPT-4's output randomness. 18 | - average_cpu_used: Float indicating average CPU usage (default -1.0). 19 | - average_mem_used: Float indicating average memory usage (default -1.0). 20 | - test_name: String stating the benchmark test. 21 | 22 | Returns: 23 | - matches: string containing the options file generated by GPT-4 24 | ''' 25 | messages = [{"role": "system", "content": system_content}] 26 | for content in user_contents: 27 | messages.append({"role": "user", "content": content}) 28 | 29 | 30 | # Assuming 'client' is already defined and authenticated for GPT-4 API access 31 | completion = client.chat.completions.create( 32 | model="gpt-4-0125-preview", 33 | messages=messages, 34 | temperature=temperature, 35 | max_tokens=4096, 36 | frequency_penalty=0, 37 | presence_penalty=0, 38 | ) 39 | 40 | # Extract the assistant's reply 41 | assistant_reply = completion.choices[0].message.content 42 | matches = re.match("[\s\S]*```([\s\S]*)```([\s\S]*)", assistant_reply) 43 | 44 | # Check if result is good 45 | if matches is not None: 46 | return matches 47 | 48 | # Invalid response 49 | with open("invalid_assistant_reply.txt", "a") as file: 50 | file.write(assistant_reply + "\n\n" + "-" * 150 + "\n\n") 51 | return None 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ELMo-Tune ([HotStorage'24 Best Paper] Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?) 2 | 3 | 🏆HotStorage'24 Best Paper - Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?
4 | Paper URL: [https://doi.org/10.1145/3655038.3665954](https://doi.org/10.1145/3655038.3665954) 5 | 6 | ## Features 7 | This project will run a series of tests using the db_bench tool. The tests will be run using the default configuration and a series of configurations that will be determined by the research. The results of the tests will be compared to determine the best configuration for RocksDB when using ELMo-Tune. 8 | 9 | ## Prerequisites 10 | This project requires Python 3.6 or higher. The following dependencies are required: 11 | ```bash 12 | # Instructions for Ubuntu 20.04 13 | # Install dependencies 14 | apt-get update && apt-get install -y build-essential libgflags-dev libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev git python3 python3-pip wget fio 15 | 16 | # Install and Build RocksDB 8.8.1 17 | wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz 18 | tar -xzf v8.8.1.tar.gz 19 | cd rocksdb-8.8.1 20 | make -j static_lib db_bench 21 | 22 | git clone https://github.com/asu-idi/ELMo-Tune 23 | cd ELMo-Tune 24 | 25 | # Install requirements 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | ## Setup 30 | To run the tests sucessfully, some variables need to be defined. 31 | ```bash 32 | # You need OpenAI's API to run the code sucessfully. 33 | export OPENAI_API_KEY= 34 | ``` 35 | Additionally, set the DB_BENCH_PATH in utils/constants.py along with any other paths required for your system setup. 36 | 37 | ## How to use 38 | To run the tests, run the following command: 39 | ```bash 40 | # e.g. Run a random write (fillrandom) test with the db stored in the '/data/gpt_project/db' folder and with output in the './output' directory 41 | python3 main.py --workload=fillrandom --device=data --output=./output --num_entries=10000 42 | 43 | # You can explore the options using the --help command (or using the constants.py file) 44 | # -c --case CASE Specify the case number 45 | # -d --device DEVICE Specify the device 46 | # -t --workload WORKLOAD Specify the test name 47 | # -v --version VERSION Specify the version of RocksDB 48 | # -o --output OUTPUT Specify the output path 49 | # -n --num_entries NUM_ENTRIES Specify the number of entries 50 | # -s --side_checker SIDE_CHECKER Specify if side checker is enabled 51 | ``` 52 | 53 | > You can alternatively also use the Docker environment that can be built using the Dockerfile in the docker folder. 54 | -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | import argparse 4 | from datetime import datetime 5 | 6 | load_dotenv() 7 | 8 | def path_of_output_folder(): 9 | ''' 10 | Set the output folder directory 11 | 12 | Parameters: 13 | - None 14 | 15 | Returns: 16 | - output_folder_dir (str): The output folder directory 17 | ''' 18 | current_datetime = datetime.now() 19 | date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S") 20 | output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}" 21 | 22 | os.makedirs(output_folder_dir, exist_ok=True) 23 | print(f"[UTL] Using output folder: {output_folder_dir}") 24 | 25 | return output_folder_dir 26 | 27 | # Check the environement variables, set to default if not found 28 | env_DEVICE = os.getenv("DEVICE", None) 29 | env_TEST_NAME = os.getenv("TEST_NAME", None) 30 | env_CASE_NUMBER = os.getenv("CASE_NUMBER", 1) 31 | env_VERSION = os.getenv("VERSION", "8.8.1") 32 | env_OUTPUT_PATH = os.getenv("OUTPUT_PATH", None) 33 | env_NUM_ENTRIES = os.getenv("NUM_ENTRIES", 3000000000) 34 | env_SIDE_CHECKER = os.getenv("SIDE_CHECKER", True) 35 | 36 | # Parse the arguments. They replace the environment variables if they are set 37 | parser = argparse.ArgumentParser(description='Description of your script') 38 | parser.add_argument('-c', '--case', type=int, default=env_CASE_NUMBER, help='Specify the case number') 39 | parser.add_argument('-d', '--device', type=str, default=env_DEVICE, help='Specify the device') 40 | parser.add_argument('-t', '--workload', type=str, default=env_TEST_NAME, help='Specify the test name') 41 | parser.add_argument('-v', '--version', type=str, default=env_VERSION, help='Specify the version of RocksDB') 42 | parser.add_argument('-o', '--output', type=str, default=env_OUTPUT_PATH, help='Specify the output path') 43 | parser.add_argument('-n', '--num_entries', type=int, default=env_NUM_ENTRIES, help='Specify the number of entries') 44 | parser.add_argument('-s', '--side_checker', type=bool, default=env_SIDE_CHECKER, help='Specify if side checker is enabled') 45 | 46 | args = parser.parse_args() 47 | CASE_NUMBER = args.case 48 | DEVICE = args.device 49 | TEST_NAME = args.workload 50 | VERSION = args.version 51 | OUTPUT_PATH = args.output if args.output else path_of_output_folder() 52 | NUM_ENTRIES = args.num_entries 53 | SIDE_CHECKER = args.side_checker 54 | 55 | # Constants 56 | # DB_BENCH_PATH = f"/data/gpt_project/rocksdb-{VERSION}/db_bench" 57 | DB_BENCH_PATH = f"/rocksdb-{VERSION}/db_bench" 58 | DB_PATH = f"/{DEVICE}/gpt_project/db" 59 | FIO_RESULT_PATH = f"/data/gpt_project/gpt-assisted-rocksdb-config/data/fio/fio_output_{DEVICE}.txt" 60 | DEFAULT_OPTION_FILE_DIR = "options_files/default_options_files" 61 | INITIAL_OPTIONS_FILE_NAME = f"dbbench_default_options-{VERSION}.ini" 62 | OPTIONS_FILE_DIR = f"{OUTPUT_PATH}/options_file.ini" 63 | -------------------------------------------------------------------------------- /docker/docker_runner.py: -------------------------------------------------------------------------------- 1 | import docker 2 | import os 3 | 4 | client = docker.from_env() 5 | 6 | def main(): 7 | ''' 8 | Main function to run multiple docker containers one after the other. All containers mount a volume to the host machine. 9 | Additionally, before mounting, the environment variables are updated to reflect the current iteration number and the status 10 | of the for loop which is controlling the memory and cpus. 11 | ''' 12 | 13 | cpu_list = [2, 4] 14 | memory_list = [4, 8] 15 | devices = ["nvme", "data"] 16 | tests = ["fillrandom", "readrandom", "readrandomwriterandom"] 17 | base_output_path = f"/data/gpt_project/gpt-assisted-rocksdb-config/output/output" 18 | base_db_path = f"gpt_project/dbr" 19 | 20 | for memory_cap in memory_list: 21 | for cpu_cap in cpu_list: 22 | for test in tests: 23 | print("-" * 50) 24 | print(f"Running Iteration for CPU: {cpu_cap} Memory: {memory_cap} on /{devices[0]} and /{devices[1]} for {test}") 25 | 26 | # Run docker container with mount and environment variables as in cpu and memory 27 | container = client.containers.run( 28 | "gptproject:latest", 29 | detach=True, 30 | name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[0]}_{test}", 31 | environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}", 32 | f"OUTPUT_PATH={base_output_path}_{devices[0]}/c{cpu_cap}_m{memory_cap}_{test}", 33 | f"DEVICE={devices[0]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[0]}/{base_db_path}/{cpu_cap}_{test}"], 34 | cpu_count=cpu_cap, 35 | mem_limit=f"{memory_cap}g", 36 | volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'}, 37 | "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}} 38 | ) 39 | 40 | # Run docker container with mount and environment variables as in cpu and memory 41 | container2 = client.containers.run( 42 | "gptproject:latest", 43 | detach=True, 44 | name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[1]}_{test}", 45 | environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}", 46 | f"OUTPUT_PATH={base_output_path}_{devices[1]}/c{cpu_cap}_m{memory_cap}_{test}", 47 | f"DEVICE={devices[1]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[1]}/{base_db_path}/{cpu_cap}_{test}"], 48 | cpu_count=cpu_cap, 49 | mem_limit=f"{memory_cap}g", 50 | volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'}, 51 | "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}} 52 | ) 53 | 54 | # Wait for the container to finish 55 | container.wait() 56 | container2.wait() 57 | 58 | if __name__ == "__main__": 59 | main() -------------------------------------------------------------------------------- /utils/system_operations/fio_runner.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import re 3 | import os 4 | 5 | 6 | def fio_run(test_type, file_path): 7 | ''' 8 | Function to run fio benchmark 9 | 10 | Parameters: 11 | - test_type: string containing the type of test to run 12 | 13 | Returns: 14 | - parsed_res: string containing the parsed result of the fio test 15 | ''' 16 | command = [ 17 | "fio", 18 | "--name=test", 19 | "--ioengine=posixaio", 20 | f"--rw={test_type}", 21 | "--bs=4k", 22 | "--numjobs=1", 23 | "--size=10G", 24 | "--runtime=60", 25 | "--time_based" 26 | ] 27 | 28 | print("[FIO] running fio test now", test_type + "\n") 29 | proc = subprocess.run( 30 | command, 31 | stdout=subprocess.PIPE, 32 | stderr=subprocess.STDOUT, 33 | ) 34 | 35 | output = proc.stdout.decode() 36 | print("[FIO] output :", output) 37 | 38 | parsed_res = parse_fio_output(output, test_type) 39 | 40 | with open(file_path, "a") as file: 41 | file.write(parsed_res + '\n') 42 | 43 | return parsed_res 44 | 45 | 46 | def get_fio_result(file_path): 47 | ''' 48 | Function to get the fio result 49 | 50 | Parameters: 51 | - file_path: string containing the path to the fio result file 52 | 53 | Returns: 54 | - content: string containing the content of the fio result 55 | ''' 56 | if (os.path.exists(file_path) and os.path.getsize(file_path) != 0): 57 | print("[FIO] File exists and is not empty. Reading file.") 58 | with open(file_path, 'r') as file: 59 | content = file.read() 60 | return content 61 | 62 | # List of test types 63 | test_types = ["randwrite", "randread", "read", "write"] 64 | for test_type in test_types: 65 | fio_result = fio_run(test_type, file_path) 66 | combined_result = '\n'.join(fio_result) 67 | 68 | print(f"[FIO] result : \n {combined_result}") 69 | delete_test_file() 70 | return combined_result 71 | 72 | 73 | def parse_fio_output(fio_result, test_type): 74 | ''' 75 | Function to parse the fio output 76 | 77 | Parameters: 78 | - fio_result: string containing the fio result 79 | - test_type: string containing the type of test to run 80 | 81 | Returns: 82 | - result_string: string containing the parsed result of the fio test 83 | ''' 84 | if test_type in ["randwrite", "write"]: 85 | pattern = re.compile(r'WRITE: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)') 86 | elif test_type in ["randread", "read"]: 87 | pattern = re.compile(r'READ: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)') 88 | else: 89 | print(f"[FIO] Unsupported test type: {test_type}") 90 | 91 | match = pattern.search(fio_result) 92 | if match: 93 | values_list = [match.group(1), match.group(2)] 94 | result_string = f"{test_type} bandwidth is {values_list[0]} ({values_list[1]})" 95 | print(f"[FIO] result string : {result_string}") 96 | else: 97 | print("[FIO] Pattern not found in the fio result.") 98 | 99 | return result_string 100 | 101 | 102 | def delete_test_file(): 103 | ''' 104 | Function to delete the test file 105 | ''' 106 | proc = subprocess.run( 107 | f'rm test.0.0', 108 | stdout=subprocess.PIPE, 109 | stderr=subprocess.STDOUT, 110 | shell=True 111 | ) 112 | -------------------------------------------------------------------------------- /options_files/default_options_files/initial_options_file.ini: -------------------------------------------------------------------------------- 1 | [Version] 2 | rocksdb_version=4.3.0 3 | options_file_version=1.1 4 | 5 | [DBOptions] 6 | stats_dump_period_sec=600 7 | max_manifest_file_size=18446744073709551615 8 | bytes_per_sync=8388608 9 | delayed_write_rate=2097152 10 | WAL_ttl_seconds=0 11 | WAL_size_limit_MB=0 12 | max_subcompactions=1 13 | wal_bytes_per_sync=0 14 | db_write_buffer_size=0 15 | keep_log_file_num=1000 16 | table_cache_numshardbits=4 17 | max_file_opening_threads=1 18 | writable_file_max_buffer_size=1048576 19 | random_access_max_buffer_size=1048576 20 | use_fsync=false 21 | max_total_wal_size=0 22 | max_open_files=-1 23 | skip_stats_update_on_db_open=false 24 | max_background_compactions=16 25 | manifest_preallocation_size=4194304 26 | max_background_flushes=7 27 | is_fd_close_on_exec=true 28 | max_log_file_size=0 29 | advise_random_on_open=true 30 | create_missing_column_families=false 31 | paranoid_checks=true 32 | delete_obsolete_files_period_micros=21600000000 33 | log_file_time_to_roll=0 34 | compaction_readahead_size=0 35 | create_if_missing=false 36 | use_adaptive_mutex=false 37 | enable_thread_tracking=false 38 | allow_fallocate=true 39 | error_if_exists=false 40 | recycle_log_file_num=0 41 | skip_log_error_on_recovery=false 42 | new_table_reader_for_compaction_inputs=true 43 | allow_mmap_reads=false 44 | allow_mmap_writes=false 45 | use_direct_reads=false 46 | use_direct_writes=false 47 | 48 | 49 | [CFOptions "default"] 50 | compaction_style=kCompactionStyleLevel 51 | compaction_filter=nullptr 52 | num_levels=6 53 | table_factory=BlockBasedTable 54 | comparator=leveldb.BytewiseComparator 55 | max_sequential_skip_in_iterations=8 56 | max_bytes_for_level_base=1073741824 57 | memtable_prefix_bloom_probes=6 58 | memtable_prefix_bloom_bits=0 59 | memtable_prefix_bloom_huge_page_tlb_size=0 60 | max_successive_merges=0 61 | arena_block_size=16777216 62 | min_write_buffer_number_to_merge=1 63 | target_file_size_multiplier=1 64 | source_compaction_factor=1 65 | max_bytes_for_level_multiplier=8 66 | max_bytes_for_level_multiplier_additional=2:3:5 67 | compaction_filter_factory=nullptr 68 | max_write_buffer_number=8 69 | level0_stop_writes_trigger=20 70 | compression=kSnappyCompression 71 | level0_file_num_compaction_trigger=4 72 | purge_redundant_kvs_while_flush=true 73 | max_write_buffer_size_to_maintain=0 74 | memtable_factory=SkipListFactory 75 | max_grandparent_overlap_factor=8 76 | expanded_compaction_factor=25 77 | hard_pending_compaction_bytes_limit=137438953472 78 | inplace_update_num_locks=10000 79 | level_compaction_dynamic_level_bytes=true 80 | level0_slowdown_writes_trigger=12 81 | filter_deletes=false 82 | verify_checksums_in_compaction=true 83 | min_partial_merge_operands=2 84 | paranoid_file_checks=false 85 | target_file_size_base=134217728 86 | optimize_filters_for_hits=false 87 | merge_operator=PutOperator 88 | compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression 89 | compaction_measure_io_stats=false 90 | prefix_extractor=nullptr 91 | bloom_locality=0 92 | write_buffer_size=134217728 93 | disable_auto_compactions=false 94 | inplace_update_support=false 95 | 96 | [TableOptions/BlockBasedTable "default"] 97 | format_version=2 98 | whole_key_filtering=true 99 | no_block_cache=false 100 | checksum=kCRC32c 101 | filter_policy=rocksdb.BuiltinBloomFilter 102 | block_size_deviation=10 103 | block_size=8192 104 | block_restart_interval=16 105 | cache_index_and_filter_blocks=false 106 | pin_l0_filter_and_index_blocks_in_cache=false 107 | pin_top_level_index_and_filter=false 108 | index_type=kBinarySearch 109 | flush_block_policy_factory=FlushBlockBySizePolicyFactory -------------------------------------------------------------------------------- /options_files/ops_options_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import configparser 4 | from utils.constants import DEFAULT_OPTION_FILE_DIR, INITIAL_OPTIONS_FILE_NAME, OPTIONS_FILE_DIR 5 | from utils.filter import BLACKLIST 6 | from utils.parse import dict_to_configparser, configparser_to_string 7 | 8 | def parse_gpt_text_to_dict(gpt_output_text): 9 | ''' 10 | Function to parse the gpt output text with filters 11 | 12 | Parameters: 13 | - gpt_output_text (str): The output generated by gpt 14 | 15 | Returns: 16 | - options_dict (dict): A dictionary containing the parsed data 17 | ''' 18 | options_dict = {} 19 | 20 | for line in gpt_output_text.split("\n"): 21 | # Ignore lines starting with '#' as they are comments 22 | if not line.startswith('#'): 23 | # Split the line at the first '=' and strip whitespace 24 | parts = line.split(':', 1) 25 | if len(parts) == 1: 26 | parts = line.split('=', 1) 27 | if len(parts) == 2: 28 | # filters options that start with { - k 29 | if '{' not in parts[1].strip(): 30 | # filters options that are in the blacklist 31 | if parts[0].strip() not in BLACKLIST: 32 | key, value = parts[0].strip(), parts[1].strip() 33 | options_dict[key] = value 34 | 35 | return options_dict 36 | 37 | def cleanup_options_file(gpt_options_text): 38 | """ 39 | Function to clean up the options file generated by GPT 40 | - replace the values of the options in the original options file with the values generated by GPT-4 41 | eliminate 2 secnarios: 42 | 1. ```ini``` 43 | 2. ```...``` w/ multiple code blocks 44 | 45 | Parameters: 46 | - gpt_options_text: string containing the options file generated by GPT-4 47 | 48 | Returns: 49 | - config_string: string containing the options file in the original format 50 | """ 51 | clean_output_dict = parse_option_file_to_dict(open(f"{OPTIONS_FILE_DIR}").read()) 52 | 53 | # Parse the GPT-4 generated options 54 | gpt_output_dict = parse_gpt_text_to_dict(gpt_options_text) 55 | 56 | # Update the original options with GPT-4 generated value 57 | for key, value in gpt_output_dict.items(): 58 | for internal_dict in clean_output_dict: 59 | if key in clean_output_dict[internal_dict]: 60 | clean_output_dict[internal_dict][key] = gpt_output_dict[key] 61 | 62 | # Convert dictionary to configparser 63 | config_parser = dict_to_configparser(clean_output_dict) 64 | config_string = configparser_to_string(config_parser) 65 | 66 | # Save to a file 67 | with open(f"{OPTIONS_FILE_DIR}", "w") as file: 68 | file.write(config_string) 69 | return config_string 70 | 71 | def get_initial_options_file(): 72 | ''' 73 | Get the initial options file 74 | 75 | Parameters: 76 | - None 77 | 78 | Returns: 79 | - options (str): The initial options file 80 | - reasoning (str): The reasoning behind the options file 81 | ''' 82 | initial_options_file_path = os.path.join(DEFAULT_OPTION_FILE_DIR, 83 | INITIAL_OPTIONS_FILE_NAME) 84 | with open(initial_options_file_path, "r") as f: 85 | options = f.read() 86 | 87 | reasoning = f"Initial options file: {initial_options_file_path}" 88 | 89 | return options, reasoning 90 | 91 | 92 | def parse_option_file_to_dict(option_file): 93 | ''' 94 | Function to parse the given option file to a dictionary 95 | 96 | Parameters: 97 | - option_file (str): The path to the option file 98 | 99 | Returns: 100 | - parsed (dict): A dictionary containing the parsed data 101 | ''' 102 | pat = re.compile("(.*)\s*([#].*)?") 103 | config = configparser.ConfigParser() 104 | config.read_string(option_file) 105 | parsed = {section: dict(config.items(section)) 106 | for section in config.sections()} 107 | for section_name, section in parsed.items(): 108 | for k, v in section.items(): 109 | m = pat.match(v) 110 | section[k] = m[1] 111 | return parsed 112 | 113 | 114 | -------------------------------------------------------------------------------- /utils/graph.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def plot(values, title, file): 4 | ''' 5 | Plots a single line graph based on a list of values. 6 | 7 | This function plots a simple line graph where the X-axis represents the index of each value in the list, and the Y-axis represents the value itself. 8 | 9 | Parameters: 10 | values (list): A list of numerical values to be plotted. 11 | title (str): The title of the plot. 12 | file (str): The file path where the plot image will be saved. 13 | 14 | Returns: 15 | - None. The plot is saved to the specified file path. 16 | 17 | ''' 18 | # Plotting 19 | plt.figure(figsize=(12, 6)) 20 | plt.plot(range(len(values)), values, label=title, linestyle='-') 21 | 22 | plt.title(title) 23 | plt.legend() 24 | plt.grid(True) 25 | 26 | plt.ylim(0, 400000) 27 | 28 | # Save the plot to a file 29 | plt.savefig(file) 30 | 31 | 32 | def plot_2axis(keys, values, title, file): 33 | ''' 34 | Plots a line graph with specified keys and values. 35 | 36 | This function is designed to plot a line graph where the X-axis is determined by the 'keys' parameter and the Y-axis by the 'values' parameter. 37 | 38 | Parameters: 39 | keys (list): A list of keys or indices for the X-axis. 40 | values (list): A list of numerical values for the Y-axis. 41 | title (str): The title of the plot. 42 | file (str): The file path where the plot image will be saved. 43 | 44 | Returns: 45 | - None. The plot is saved to the specified file path. 46 | ''' 47 | # Plotting 48 | plt.figure(figsize=(12, 6)) 49 | plt.plot(keys, values, label=title, linestyle='-') 50 | 51 | plt.title(title) 52 | plt.legend() 53 | plt.grid(True) 54 | 55 | plt.ylim(0, 400000) 56 | 57 | # Save the plot to a file 58 | plt.savefig(file) 59 | 60 | 61 | def plot_multiple(data, title, file): 62 | ''' 63 | Plots multiple line graphs from a list of data sets. 64 | 65 | This function is used to plot multiple line graphs on the same plot. Each item in the 'data' list represents a different line on the graph. 66 | 67 | Parameters: 68 | data (list of tuples): Each tuple contains two elements - a list of keys for the X-axis and a list of values for the Y-axis. 69 | title (str): The title of the plot. 70 | file (str): The file path where the plot image will be saved. 71 | 72 | Each line is labeled as 'Iteration-i' where i is the index of the data set in the 'data' list. 73 | 74 | Returns: 75 | - None. The plot is saved to the specified file path. 76 | 77 | ''' 78 | 79 | # Plotting setup 80 | plt.figure(figsize=(12, 6)) 81 | for i, iteration in enumerate(data): 82 | keys, values = iteration[1]["ops_per_second_graph"] 83 | plt.plot(keys, values, label=f"Iteration-{i}", linestyle='-') 84 | 85 | plt.title(title) 86 | plt.legend() 87 | plt.grid(True) 88 | 89 | plt.ylim(0, 400000) 90 | 91 | # Save the plot to a file 92 | plt.savefig(file) 93 | 94 | def plot_multiple_manual(data, file): 95 | # Plotting 96 | plt.figure(figsize=(16.5, 8)) 97 | # labels = ["Default file", "Iteration 3", "Iteration 3", "Iteration 7"] 98 | labels = ["Default file", "Iteration 2", "Iteration 4", "Iteration 6"] 99 | colors = ['red', 'orange', 'royalblue', 'green'] 100 | for i, ops in enumerate(data): 101 | plt.plot(ops, label=f"{labels[i]}", linestyle='-',color=colors[i]) 102 | plt.xlabel("Time (seconds)") 103 | plt.ylabel("Throughput (kops/s)") 104 | plt.legend() 105 | 106 | 107 | plt.ylim(0, 400) 108 | plt.tight_layout() 109 | 110 | # Save the plot to a file 111 | plt.savefig(file) 112 | 113 | 114 | # pattern = r"\((\d+),(\d+)\) ops and \((\d+\.\d+),(\d+\.\d+)\) ops/second in \((\d+\.\d+),(\d+\.\d+)\) seconds" 115 | 116 | # folder_path = "/data/gpt_project/gpt-assisted-rocksdb-config/saved_output/fillrandom/output_nvme_v2/c4_m4" 117 | # file_names = ['0.ini', '2.ini', '4.ini', '6.ini'] 118 | # pattern = r'"ops_per_second_graph": \[\[([\d.,\s]+)\],\s+\[([\d.,\s]+)\]\]' 119 | 120 | # data = [] 121 | 122 | # for file_name in file_names: 123 | # file_path = os.path.join(folder_path, file_name) 124 | # with open(file_path, 'r') as f: 125 | # file_contents = f.read() 126 | # matches = re.findall(pattern, file_contents) 127 | # ops = [float(x)/1000 for x in matches[0][1].split(', ')] 128 | # data.append(ops) 129 | 130 | # plot_multiple_manual(data, "Ops_per_Second_combined.png") 131 | 132 | -------------------------------------------------------------------------------- /utils/system_operations/get_sys_info.py: -------------------------------------------------------------------------------- 1 | import os 2 | import psutil 3 | import subprocess 4 | import platform 5 | from cpuinfo import get_cpu_info 6 | from cgroup_monitor import CGroupMonitor 7 | 8 | def get_system_data(db_path): 9 | ''' 10 | Function to get the system data 11 | 12 | Parameters: 13 | - db_path (str): The path of database 14 | 15 | Returns: 16 | - brand_raw_value (str): The CPU model name 17 | - memory_total (int): The total memory 18 | - swap (int): The swap memory 19 | - total_disk_size (int): The total disk size 20 | - device (str): The device name 21 | ''' 22 | cgroup_monitor = CGroupMonitor() 23 | try: 24 | cpu_count = os.getenv("CPU_COUNT", str(cgroup_monitor.get_cpu_limit())) 25 | mem_max = os.getenv("MEMORY_MAX", str(cgroup_monitor.get_memory_limit())) 26 | 27 | # gets the CPU op-modes 28 | system_info = platform.uname() 29 | cpu_op_modes = system_info.processor 30 | 31 | # gets the CPU model name 32 | cpu_model = platform.processor() 33 | 34 | # get all the CPU cache sizes 35 | cpu_info = get_cpu_info() 36 | brand_raw_value = cpu_count + " cores of " + cpu_info['brand_raw'] 37 | 38 | l1_data_cache_size = cpu_info.get('l1_data_cache_size', 'N/A') 39 | l1_instruction_cache_size = cpu_info.get( 40 | 'l1_instruction_cache_size', 'N/A') 41 | l2_cache_size = cpu_info.get('l2_cache_size', 'N/A') 42 | l3_cache_size = cpu_info.get('l3_cache_size', 'N/A') 43 | 44 | # get the total memory 45 | # memory_total = psutil.virtual_memory().total 46 | memory_total = float(mem_max) 47 | 48 | # gets the percentage of RAM used 49 | memory_used = psutil.virtual_memory().percent 50 | 51 | # gets the percentage of RAM available 52 | memeory_remaining = psutil.virtual_memory().available * 100 / \ 53 | psutil.virtual_memory().total 54 | 55 | # gets the disk information 56 | # partitions = psutil.disk_partitions(all=True) 57 | 58 | swap = psutil.swap_memory() 59 | 60 | partitions = psutil.disk_partitions(all=False) 61 | path = os.path.dirname(db_path) 62 | total_disk_size = -1 63 | device = "" 64 | all_devices = check_drive_type() 65 | data_directory = path[:5] 66 | for partition in partitions: 67 | usage = psutil.disk_usage(partition.mountpoint) 68 | if (partition.mountpoint[:5] == data_directory): 69 | total_disk_size = usage.total 70 | if (partition.device.split('/')[-1] in all_devices): 71 | device = all_devices[partition.device.split('/')[-1]] 72 | elif (partition.device.split('/')[-1][:-1] in all_devices): 73 | device = all_devices[partition.device.split('/')[-1][:-1]] 74 | 75 | # returns all the system data required 76 | return brand_raw_value, memory_total, swap, total_disk_size, device 77 | 78 | except Exception as e: 79 | print(f"[SYS] Error in fetching system data: {e}") 80 | return None 81 | 82 | # Check drive type 83 | def check_drive_type(): 84 | ''' 85 | Function to check the drive type 86 | 87 | Returns: 88 | - drive_types (dict): A dictionary containing the drive types 89 | ''' 90 | # Path where the drive information is stored 91 | sys_block_path = "/sys/block" 92 | # Check if the path exists 93 | if os.path.exists(sys_block_path): 94 | # List of all devices 95 | devices = os.listdir(sys_block_path) 96 | drive_types = {} 97 | # Iterate through each device 98 | for device in devices: 99 | try: 100 | with open(f"{sys_block_path}/{device}/queue/rotational", "r") as file: 101 | rotational = file.read().strip() 102 | if rotational == "0": 103 | drive_types[device] = "SSD" 104 | else: 105 | drive_types[device] = "HDD" 106 | except IOError: 107 | # Unable to read the rotational file for this device 108 | pass 109 | return drive_types 110 | else: 111 | return "System block path does not exist." 112 | 113 | def system_info(db_path, fio_result): 114 | ''' 115 | Fetch system data for further runs 116 | 117 | Parameters: 118 | - db_path (str): The path of database 119 | - fio_result (str): The result of fio benchmark 120 | ''' 121 | system_data = get_system_data(db_path) 122 | data = (f"{system_data[0]} with {system_data[1]}GiB of Memory and {system_data[1]}GiB of Swap space." 123 | f"{system_data[4]} size : {system_data[3]/(1024 ** 4):.2f}T. A single instance of RocksDB is the always going to be the only process running. " 124 | f"{fio_result}") 125 | return data 126 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import getpass 4 | from datetime import datetime 5 | from collections import defaultdict 6 | from deepdiff import DeepDiff 7 | from utils.constants import OUTPUT_PATH, DEVICE, DB_PATH 8 | 9 | # LOG UTILS 10 | def log_update(update_string): 11 | ''' 12 | Update the log file with the given string 13 | 14 | Parameters: 15 | - update_string (str): The string to be updated in the log file 16 | 17 | Returns: 18 | - None 19 | ''' 20 | current_datetime = datetime.now() 21 | date_time_string = current_datetime.strftime("%Y-%m-%d %H:%M:%S") 22 | update_string = f"[{date_time_string}] {update_string}" 23 | 24 | if OUTPUT_PATH is None: 25 | with open(f"log.txt", "a+") as f: 26 | f.write(update_string + "\n") 27 | else: 28 | with open(f"{OUTPUT_PATH}/log.txt", "a+") as f: 29 | f.write(update_string + "\n") 30 | 31 | # STORE FILE UTILS 32 | def store_db_bench_output(output_folder_name, output_file_name, 33 | benchmark_results, options_file, reasoning): 34 | ''' 35 | Store the output of db_bench in a file 36 | 37 | Parameters: 38 | - output_folder_name (str): Name of the folder to store the output file 39 | - output_file_name (str): Name of the output file 40 | - benchmark_results (dict): Dictionary containing the benchmark results 41 | - options_file (str): The options file used to generate the benchmark results 42 | - reasoning (str): The reasoning behind the options file 43 | 44 | Returns: 45 | - None 46 | ''' 47 | with open(f"{output_folder_name}/{output_file_name}", "a+") as f: 48 | f.write("# " + json.dumps(benchmark_results) + "\n\n") 49 | f.write(options_file + "\n") 50 | for line in reasoning.splitlines(): 51 | f.write("# " + line + "\n") 52 | 53 | def store_best_option_file(options_files, output_folder_dir): 54 | ''' 55 | Save the best option file 56 | 57 | Parameters: 58 | - options_files (list): List of options files 59 | - output_folder_dir (str): The output directory 60 | ''' 61 | best_result = max(options_files, key=lambda x: x[1]["ops_per_sec"]) 62 | best_options = best_result[0] 63 | best_reasoning = best_result[2] 64 | with open(f"{output_folder_dir}/best_options.ini", "w") as f: 65 | f.write(best_options) 66 | for line in best_reasoning.splitlines(): 67 | f.write("# " + line + "\n") 68 | 69 | def store_diff_options_list(options_list, output_folder_dir): 70 | # Calculate differences between options_list 71 | differences = calculate_differences(options_list) 72 | changed_fields_frequency = defaultdict(lambda: 0) 73 | 74 | with open(f"{output_folder_dir}/diffOptions.txt", 'w') as f: 75 | for i, diff in enumerate(differences, start=1): 76 | f.write(f"[MFN] Differences between iteration {i} and iteration {i + 1}: \n") 77 | f.write(json.dumps(diff, indent=4)) 78 | f.write("\n") 79 | f.write("=" * 50) 80 | f.write("\n\n") 81 | 82 | for key in diff["values_changed"]: 83 | changed_fields_frequency[key] += 1 84 | 85 | f.write("\n\n[MFN] Changed Fields Frequency:\n") 86 | f.write(json.dumps(changed_fields_frequency, indent=4)) 87 | 88 | # PATH UTILS 89 | def path_of_db(): 90 | ''' 91 | Choose the database path 92 | 93 | Parameters: 94 | - None 95 | 96 | Returns: 97 | - db_path (str): The path of the database 98 | ''' 99 | user_name = getpass.getuser() 100 | db_path_name = DB_PATH + user_name[0].lower() 101 | db_path = os.getenv("DB_PATH", db_path_name) 102 | # log_update(f"[UTL] Using database path: {db_path}") 103 | print(f"[UTL] Using database path: {db_path}") 104 | 105 | return db_path 106 | 107 | def path_of_output_folder(): 108 | ''' 109 | Set the output folder directory 110 | 111 | Parameters: 112 | - None 113 | 114 | Returns: 115 | - output_folder_dir (str): The output folder directory 116 | ''' 117 | current_datetime = datetime.now() 118 | date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S") 119 | if OUTPUT_PATH is None: 120 | output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}" 121 | else: 122 | output_folder_dir = OUTPUT_PATH 123 | 124 | os.makedirs(output_folder_dir, exist_ok=True) 125 | log_update(f"[UTL] Using output folder: {output_folder_dir}") 126 | print(f"[UTL] Using output folder: {output_folder_dir}") 127 | 128 | return output_folder_dir 129 | 130 | # OTHER UTILS 131 | def calculate_differences(iterations): 132 | ''' 133 | Function to calculate the differences between the iterations 134 | 135 | Parameters: 136 | - iterations (list): A list of the iterations 137 | 138 | Returns: 139 | - differences (list): A list of the differences between the iterations 140 | ''' 141 | differences = [] 142 | for i in range(1, len(iterations)): 143 | diff = DeepDiff(iterations[i-1], iterations[i]) 144 | differences.append(diff) 145 | return differences 146 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import utils.constants as constants 2 | from utils.graph import plot, plot_multiple 3 | from utils.system_operations.fio_runner import get_fio_result 4 | from options_files.ops_options_file import parse_option_file_to_dict, get_initial_options_file 5 | 6 | import rocksdb.subprocess_manager as spm 7 | from utils.utils import log_update, store_best_option_file, path_of_db, store_diff_options_list 8 | from utils.system_operations.get_sys_info import system_info 9 | from gpt.prompts_generator import generate_option_file_with_gpt 10 | import os 11 | 12 | def main(): 13 | ''' 14 | Main function to run the project. This function will run the db_bench with the initial options file and then 15 | generate new options files using GPT API and run db_bench with the new options file. This function will also 16 | store the output of db_bench in a file. The output file will contain the benchmark results, the options file 17 | used to generate the benchmark results and the reasoning behind the options file as provided by the GPT API. 18 | There will be a separate file for each iteration. 19 | 20 | Parameters: 21 | - None 22 | 23 | Returns: 24 | - None 25 | ''' 26 | 27 | # initialize variables 28 | options_files = [] 29 | options_list = [] 30 | 31 | # Set up the path 32 | output_folder_dir = constants.OUTPUT_PATH 33 | os.makedirs(output_folder_dir, exist_ok=True) 34 | db_path = path_of_db() 35 | fio_result = get_fio_result(constants.FIO_RESULT_PATH) 36 | 37 | log_update(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}") 38 | print(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}") 39 | 40 | # First run, Initial options file and see how the results are 41 | options, reasoning = get_initial_options_file() 42 | 43 | is_error, benchmark_results, average_cpu_usage, average_memory_usage, options = spm.benchmark( 44 | db_path, options, output_folder_dir, reasoning, 0, None, options_files) 45 | 46 | if is_error: 47 | # If the initial options file fails, exit the program 48 | log_update("[MFN] Failed to benchmark with the initial options file. Exiting.") 49 | print("[MFN] Failed to benchmark with the initial options file. Exiting.") 50 | exit(1) 51 | else: 52 | # If the initial options file succeeds, store the options file and benchmark results, pass it to the GPT API to generate a new options file 53 | parsed_options = parse_option_file_to_dict(options) 54 | options_list.append(parsed_options) 55 | 56 | # Maintain a list of options files, benchmark results and why that option file was generated 57 | options_files.append((options, benchmark_results, reasoning, "")) 58 | 59 | iteration_count = 7 60 | 61 | for i in range(1, iteration_count + 1): 62 | 63 | log_update(f"[MFN] Starting iteration {i}") 64 | log_update(f"[MFN] Querying ChatGPT for next options file") 65 | 66 | print("-" * 50) 67 | print(f"[MFN] Starting iteration {i}") 68 | 69 | print("[MFN] Querying ChatGPT for next options file") 70 | temperature = 0.4 71 | retry_counter = 5 72 | generated = False 73 | 74 | for gpt_query_count in range(retry_counter, 0, -1): 75 | # Generate new options file with retry limit of 5 76 | 77 | new_options_file, reasoning, summary_of_changes = generate_option_file_with_gpt( 78 | constants.CASE_NUMBER, options_files, 79 | system_info(db_path, fio_result), temperature, 80 | average_cpu_usage, average_memory_usage, 81 | constants.TEST_NAME, constants.VERSION) 82 | if new_options_file is None: 83 | log_update(f"[MFN] Failed to generate options file. Retrying. Retries left: {gpt_query_count - 1}") 84 | print("[MFN] Failed to generate options file. Retrying. Retries left: ", gpt_query_count - 1) 85 | continue 86 | 87 | # Parse output 88 | is_error, benchmark_results, average_cpu_usage, average_memory_usage, new_options_file = spm.benchmark( 89 | db_path, new_options_file, output_folder_dir, reasoning, iteration_count, benchmark_results, options_files) 90 | if is_error: 91 | log_update(f"[MFN] Benchmark failed. Retrying with new options file. Retries left: {gpt_query_count - 1}") 92 | print("[MFN] Benchmark failed. Retrying with new options file. Retries left: ", gpt_query_count - 1) 93 | temperature += 0.1 94 | continue 95 | else: 96 | generated = True 97 | break 98 | 99 | if generated: 100 | options = new_options_file 101 | options_files.append((options, benchmark_results, reasoning, 102 | summary_of_changes)) 103 | parsed_options = parse_option_file_to_dict(options) 104 | options_list.append(parsed_options) 105 | else: 106 | log_update("[MFN] Failed to generate options file over 5 times. Exiting.") 107 | print("[MFN] Failed to generate options file over 5 times. Exiting.") 108 | exit(1) 109 | 110 | store_best_option_file(options_files, output_folder_dir) 111 | 112 | # Graph Ops/Sec 113 | plot([e[1]["ops_per_sec"] for e in options_files], "OpsPerSec", 114 | f"{output_folder_dir}/OpsPerSec.png") 115 | plot_multiple(options_files, "Ops Per Second", 116 | f"{output_folder_dir}/opsM_per_sec.png") 117 | 118 | store_diff_options_list(options_list, output_folder_dir) 119 | 120 | 121 | 122 | if __name__ == "__main__": 123 | main() 124 | -------------------------------------------------------------------------------- /options_files/default_options_files/dbbench_default_options-7.10.2.ini: -------------------------------------------------------------------------------- 1 | # This is a RocksDB option file. 2 | # 3 | # For detailed file format spec, please refer to the example file 4 | # in examples/rocksdb_option_file_example.ini 5 | # 6 | 7 | [Version] 8 | rocksdb_version=7.10.2 9 | options_file_version=1.1 10 | 11 | [DBOptions] 12 | max_open_files=-1 13 | stats_history_buffer_size=1048576 14 | stats_persist_period_sec=600 15 | max_background_flushes=-1 16 | stats_dump_period_sec=600 17 | compaction_readahead_size=0 18 | bytes_per_sync=0 19 | delete_obsolete_files_period_micros=21600000000 20 | max_total_wal_size=0 21 | delayed_write_rate=8388608 22 | wal_bytes_per_sync=0 23 | writable_file_max_buffer_size=1048576 24 | avoid_flush_during_shutdown=false 25 | max_subcompactions=1 26 | strict_bytes_per_sync=false 27 | max_background_compactions=-1 28 | max_background_jobs=2 29 | lowest_used_cache_tier=kNonVolatileBlockTier 30 | bgerror_resume_retry_interval=1000000 31 | max_bgerror_resume_count=2147483647 32 | best_efforts_recovery=false 33 | write_dbid_to_manifest=false 34 | avoid_unnecessary_blocking_io=false 35 | atomic_flush=false 36 | log_readahead_size=0 37 | dump_malloc_stats=true 38 | info_log_level=INFO_LEVEL 39 | write_thread_max_yield_usec=100 40 | max_write_batch_group_size_bytes=1048576 41 | wal_compression=kNoCompression 42 | write_thread_slow_yield_usec=3 43 | enable_pipelined_write=true 44 | persist_stats_to_disk=false 45 | max_manifest_file_size=1073741824 46 | WAL_size_limit_MB=0 47 | fail_if_options_file_error=false 48 | max_log_file_size=0 49 | manifest_preallocation_size=4194304 50 | listeners={ErrorHandlerListener:ErrorHandlerListener} 51 | log_file_time_to_roll=0 52 | allow_data_in_errors=false 53 | WAL_ttl_seconds=0 54 | recycle_log_file_num=0 55 | file_checksum_gen_factory=nullptr 56 | keep_log_file_num=1000 57 | db_write_buffer_size=0 58 | table_cache_numshardbits=4 59 | use_adaptive_mutex=false 60 | allow_ingest_behind=false 61 | skip_checking_sst_file_sizes_on_db_open=false 62 | skip_stats_update_on_db_open=false 63 | random_access_max_buffer_size=1048576 64 | access_hint_on_compaction_start=NORMAL 65 | allow_concurrent_memtable_write=true 66 | track_and_verify_wals_in_manifest=false 67 | paranoid_checks=true 68 | max_file_opening_threads=16 69 | verify_sst_unique_id_in_manifest=true 70 | avoid_flush_during_recovery=false 71 | flush_verify_memtable_count=true 72 | db_host_id=__hostname__ 73 | error_if_exists=false 74 | wal_recovery_mode=kPointInTimeRecovery 75 | enable_thread_tracking=false 76 | is_fd_close_on_exec=true 77 | enforce_single_del_contracts=true 78 | create_missing_column_families=true 79 | create_if_missing=true 80 | use_fsync=false 81 | wal_filter=nullptr 82 | allow_2pc=false 83 | use_direct_io_for_flush_and_compaction=false 84 | manual_wal_flush=false 85 | enable_write_thread_adaptive_yield=true 86 | use_direct_reads=false 87 | allow_mmap_writes=false 88 | allow_fallocate=true 89 | two_write_queues=false 90 | allow_mmap_reads=false 91 | unordered_write=false 92 | advise_random_on_open=true 93 | 94 | 95 | [CFOptions "default"] 96 | memtable_protection_bytes_per_key=0 97 | sample_for_compression=0 98 | blob_file_starting_level=0 99 | blob_compaction_readahead_size=0 100 | blob_garbage_collection_force_threshold=1.000000 101 | enable_blob_garbage_collection=false 102 | min_blob_size=0 103 | last_level_temperature=kUnknown 104 | enable_blob_files=false 105 | target_file_size_base=67108864 106 | max_sequential_skip_in_iterations=8 107 | prepopulate_blob_cache=kDisable 108 | compaction_options_fifo={allow_compaction=true;age_for_warm=0;max_table_files_size=0;} 109 | max_bytes_for_level_multiplier=10.000000 110 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1 111 | max_bytes_for_level_base=268435456 112 | experimental_mempurge_threshold=0.000000 113 | write_buffer_size=67108864 114 | bottommost_compression=kDisableCompressionOption 115 | prefix_extractor=nullptr 116 | blob_file_size=268435456 117 | memtable_huge_page_size=0 118 | max_successive_merges=0 119 | compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;} 120 | arena_block_size=1048576 121 | memtable_whole_key_filtering=false 122 | target_file_size_multiplier=1 123 | max_write_buffer_number=2 124 | blob_compression_type=kNoCompression 125 | compression=kSnappyCompression 126 | level0_stop_writes_trigger=36 127 | level0_slowdown_writes_trigger=20 128 | level0_file_num_compaction_trigger=4 129 | ignore_max_compaction_bytes_for_input=true 130 | max_compaction_bytes=1677721600 131 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;} 132 | memtable_prefix_bloom_size_ratio=0.000000 133 | hard_pending_compaction_bytes_limit=137438953472 134 | bottommost_compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;} 135 | blob_garbage_collection_age_cutoff=0.250000 136 | ttl=2592000 137 | soft_pending_compaction_bytes_limit=68719476736 138 | inplace_update_num_locks=10000 139 | paranoid_file_checks=false 140 | check_flush_compaction_key_order=true 141 | periodic_compaction_seconds=0 142 | disable_auto_compactions=false 143 | report_bg_io_stats=false 144 | compaction_style=kCompactionStyleLevel 145 | merge_operator=nullptr 146 | compaction_filter_factory=nullptr 147 | sst_partitioner_factory=nullptr 148 | table_factory=BlockBasedTable 149 | memtable_factory=SkipListFactory 150 | comparator=leveldb.BytewiseComparator 151 | compaction_pri=kMinOverlappingRatio 152 | bloom_locality=0 153 | num_levels=7 154 | min_write_buffer_number_to_merge=1 155 | compaction_filter=nullptr 156 | max_write_buffer_size_to_maintain=0 157 | max_write_buffer_number_to_maintain=0 158 | memtable_insert_with_hint_prefix_extractor=nullptr 159 | preclude_last_level_data_seconds=0 160 | force_consistency_checks=true 161 | optimize_filters_for_hits=false 162 | level_compaction_dynamic_file_size=true 163 | level_compaction_dynamic_level_bytes=false 164 | preserve_internal_time_seconds=0 165 | inplace_update_support=false 166 | 167 | [TableOptions/BlockBasedTable "default"] 168 | num_file_reads_for_auto_readahead=2 169 | initial_auto_readahead_size=8192 170 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;} 171 | enable_index_compression=true 172 | pin_top_level_index_and_filter=false 173 | read_amp_bytes_per_bit=0 174 | verify_compression=false 175 | prepopulate_block_cache=kDisable 176 | format_version=5 177 | partition_filters=false 178 | metadata_block_size=4096 179 | max_auto_readahead_size=262144 180 | index_block_restart_interval=1 181 | block_size_deviation=10 182 | block_size=4096 183 | detect_filter_construct_corruption=false 184 | no_block_cache=false 185 | checksum=kXXH3 186 | filter_policy=nullptr 187 | data_block_hash_table_util_ratio=0.750000 188 | block_restart_interval=16 189 | index_type=kBinarySearch 190 | pin_l0_filter_and_index_blocks_in_cache=false 191 | data_block_index_type=kDataBlockBinarySearch 192 | cache_index_and_filter_blocks_with_high_priority=true 193 | whole_key_filtering=true 194 | index_shortening=kShortenSeparatorsAndSuccessor 195 | cache_index_and_filter_blocks=false 196 | block_align=false 197 | optimize_filters_for_memory=false 198 | flush_block_policy_factory=FlushBlockBySizePolicyFactory 199 | 200 | -------------------------------------------------------------------------------- /options_files/default_options_files/bad_options.ini: -------------------------------------------------------------------------------- 1 | # This is a RocksDB option file. 2 | # 3 | # For detailed file format spec, please refer to the example file 4 | # in examples/rocksdb_option_file_example.ini 5 | # 6 | 7 | [Version] 8 | rocksdb_version=8.8.1 9 | options_file_version=1.1 10 | 11 | [DBOptions] 12 | max_background_flushes=1 13 | compaction_readahead_size=2097152 14 | wal_bytes_per_sync=0 15 | bytes_per_sync=0 16 | max_open_files=10 17 | stats_history_buffer_size=1048576 18 | stats_dump_period_sec=600 19 | stats_persist_period_sec=600 20 | delete_obsolete_files_period_micros=21600000000 21 | max_total_wal_size=0 22 | strict_bytes_per_sync=false 23 | delayed_write_rate=16777216 24 | avoid_flush_during_shutdown=false 25 | writable_file_max_buffer_size=1048576 26 | max_subcompactions=1 27 | max_background_compactions=1 28 | max_background_jobs=1 29 | lowest_used_cache_tier=kNonVolatileBlockTier 30 | bgerror_resume_retry_interval=1000000 31 | max_bgerror_resume_count=2147483647 32 | best_efforts_recovery=false 33 | write_dbid_to_manifest=false 34 | avoid_unnecessary_blocking_io=false 35 | atomic_flush=false 36 | log_readahead_size=0 37 | dump_malloc_stats=false 38 | info_log_level=INFO_LEVEL 39 | write_thread_max_yield_usec=100 40 | max_write_batch_group_size_bytes=1048576 41 | wal_compression=kNoCompression 42 | write_thread_slow_yield_usec=3 43 | enable_pipelined_write=false 44 | persist_stats_to_disk=false 45 | max_manifest_file_size=1073741824 46 | WAL_size_limit_MB=0 47 | fail_if_options_file_error=true 48 | max_log_file_size=0 49 | manifest_preallocation_size=4194304 50 | log_file_time_to_roll=0 51 | allow_data_in_errors=false 52 | WAL_ttl_seconds=0 53 | recycle_log_file_num=0 54 | file_checksum_gen_factory=nullptr 55 | keep_log_file_num=1000 56 | db_write_buffer_size=0 57 | table_cache_numshardbits=6 58 | use_adaptive_mutex=false 59 | allow_ingest_behind=false 60 | skip_checking_sst_file_sizes_on_db_open=false 61 | random_access_max_buffer_size=1048576 62 | access_hint_on_compaction_start=NORMAL 63 | allow_concurrent_memtable_write=true 64 | track_and_verify_wals_in_manifest=false 65 | skip_stats_update_on_db_open=false 66 | compaction_verify_record_count=true 67 | paranoid_checks=true 68 | max_file_opening_threads=16 69 | verify_sst_unique_id_in_manifest=true 70 | avoid_flush_during_recovery=false 71 | flush_verify_memtable_count=true 72 | db_host_id=__hostname__ 73 | error_if_exists=false 74 | wal_recovery_mode=kPointInTimeRecovery 75 | enable_thread_tracking=false 76 | is_fd_close_on_exec=true 77 | enforce_single_del_contracts=true 78 | create_missing_column_families=false 79 | create_if_missing=true 80 | use_fsync=false 81 | wal_filter=nullptr 82 | allow_2pc=false 83 | use_direct_io_for_flush_and_compaction=false 84 | manual_wal_flush=false 85 | enable_write_thread_adaptive_yield=true 86 | use_direct_reads=false 87 | allow_mmap_writes=false 88 | allow_fallocate=true 89 | two_write_queues=false 90 | allow_mmap_reads=false 91 | unordered_write=false 92 | advise_random_on_open=true 93 | 94 | 95 | [CFOptions "default"] 96 | memtable_max_range_deletions=0 97 | block_protection_bytes_per_key=0 98 | memtable_protection_bytes_per_key=0 99 | sample_for_compression=0 100 | blob_file_starting_level=0 101 | blob_compaction_readahead_size=0 102 | blob_garbage_collection_force_threshold=1.000000 103 | enable_blob_garbage_collection=false 104 | min_blob_size=0 105 | last_level_temperature=kUnknown 106 | enable_blob_files=false 107 | target_file_size_base=16777216 108 | max_sequential_skip_in_iterations=8 109 | prepopulate_blob_cache=kDisable 110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;} 111 | max_bytes_for_level_multiplier=10.000000 112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1 113 | max_bytes_for_level_base=268435456 114 | experimental_mempurge_threshold=0.000000 115 | write_buffer_size=16777216 116 | bottommost_compression=kDisableCompressionOption 117 | prefix_extractor=nullptr 118 | blob_file_size=268435456 119 | memtable_huge_page_size=0 120 | bottommost_file_compaction_delay=0 121 | max_successive_merges=0 122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 123 | arena_block_size=1048576 124 | memtable_whole_key_filtering=false 125 | target_file_size_multiplier=1 126 | max_write_buffer_number=2 127 | blob_compression_type=kNoCompression 128 | compression=kSnappyCompression 129 | level0_stop_writes_trigger=20 130 | level0_slowdown_writes_trigger=10 131 | level0_file_num_compaction_trigger=6 132 | ignore_max_compaction_bytes_for_input=true 133 | max_compaction_bytes=1677721600 134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;} 135 | memtable_prefix_bloom_size_ratio=0.000000 136 | hard_pending_compaction_bytes_limit=21474836480 137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 138 | blob_garbage_collection_age_cutoff=0.250000 139 | ttl=2592000 140 | soft_pending_compaction_bytes_limit=68719476736 141 | inplace_update_num_locks=10000 142 | paranoid_file_checks=false 143 | check_flush_compaction_key_order=true 144 | periodic_compaction_seconds=0 145 | disable_auto_compactions=false 146 | report_bg_io_stats=false 147 | compaction_pri=kMinOverlappingRatio 148 | compaction_style=kCompactionStyleLevel 149 | merge_operator=nullptr 150 | table_factory=BlockBasedTable 151 | memtable_factory=SkipListFactory 152 | comparator=leveldb.BytewiseComparator 153 | compaction_filter_factory=nullptr 154 | num_levels=7 155 | min_write_buffer_number_to_merge=1 156 | bloom_locality=0 157 | max_write_buffer_size_to_maintain=0 158 | sst_partitioner_factory=nullptr 159 | preserve_internal_time_seconds=0 160 | preclude_last_level_data_seconds=0 161 | max_write_buffer_number_to_maintain=0 162 | default_temperature=kUnknown 163 | optimize_filters_for_hits=false 164 | level_compaction_dynamic_file_size=false 165 | memtable_insert_with_hint_prefix_extractor=nullptr 166 | level_compaction_dynamic_level_bytes=true 167 | inplace_update_support=false 168 | persist_user_defined_timestamps=true 169 | compaction_filter=nullptr 170 | force_consistency_checks=true 171 | 172 | [TableOptions/BlockBasedTable "default"] 173 | num_file_reads_for_auto_readahead=2 174 | initial_auto_readahead_size=8192 175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;} 176 | enable_index_compression=true 177 | pin_top_level_index_and_filter=true 178 | read_amp_bytes_per_bit=0 179 | verify_compression=false 180 | prepopulate_block_cache=kDisable 181 | format_version=5 182 | partition_filters=false 183 | metadata_block_size=4096 184 | max_auto_readahead_size=262144 185 | index_block_restart_interval=1 186 | block_size_deviation=10 187 | block_size=4096 188 | detect_filter_construct_corruption=false 189 | no_block_cache=false 190 | checksum=kXXH3 191 | filter_policy=nullptr 192 | data_block_hash_table_util_ratio=0.750000 193 | block_restart_interval=16 194 | index_type=kBinarySearch 195 | pin_l0_filter_and_index_blocks_in_cache=false 196 | data_block_index_type=kDataBlockBinarySearch 197 | cache_index_and_filter_blocks_with_high_priority=true 198 | whole_key_filtering=true 199 | index_shortening=kShortenSeparators 200 | cache_index_and_filter_blocks=false 201 | block_align=false 202 | optimize_filters_for_memory=false 203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory 204 | 205 | -------------------------------------------------------------------------------- /options_files/default_options_files/good_options.ini: -------------------------------------------------------------------------------- 1 | # This is a RocksDB option file. 2 | # 3 | # For detailed file format spec, please refer to the example file 4 | # in examples/rocksdb_option_file_example.ini 5 | # 6 | 7 | [Version] 8 | rocksdb_version=8.8.1 9 | options_file_version=1.1 10 | 11 | [DBOptions] 12 | max_background_flushes=-1 13 | compaction_readahead_size=2097152 14 | wal_bytes_per_sync=0 15 | bytes_per_sync=0 16 | max_open_files=-1 17 | stats_history_buffer_size=1048576 18 | stats_dump_period_sec=600 19 | stats_persist_period_sec=600 20 | delete_obsolete_files_period_micros=21600000000 21 | max_total_wal_size=0 22 | strict_bytes_per_sync=false 23 | delayed_write_rate=16777216 24 | avoid_flush_during_shutdown=false 25 | writable_file_max_buffer_size=1048576 26 | max_subcompactions=8 27 | max_background_compactions=-1 28 | max_background_jobs=12 29 | lowest_used_cache_tier=kNonVolatileBlockTier 30 | bgerror_resume_retry_interval=1000000 31 | max_bgerror_resume_count=2147483647 32 | best_efforts_recovery=false 33 | write_dbid_to_manifest=false 34 | avoid_unnecessary_blocking_io=false 35 | atomic_flush=false 36 | log_readahead_size=0 37 | dump_malloc_stats=false 38 | info_log_level=INFO_LEVEL 39 | write_thread_max_yield_usec=100 40 | max_write_batch_group_size_bytes=1048576 41 | wal_compression=kNoCompression 42 | write_thread_slow_yield_usec=3 43 | enable_pipelined_write=false 44 | persist_stats_to_disk=false 45 | max_manifest_file_size=1073741824 46 | WAL_size_limit_MB=0 47 | fail_if_options_file_error=true 48 | max_log_file_size=0 49 | manifest_preallocation_size=4194304 50 | log_file_time_to_roll=0 51 | allow_data_in_errors=false 52 | WAL_ttl_seconds=0 53 | recycle_log_file_num=0 54 | file_checksum_gen_factory=nullptr 55 | keep_log_file_num=1000 56 | db_write_buffer_size=0 57 | table_cache_numshardbits=6 58 | use_adaptive_mutex=false 59 | allow_ingest_behind=false 60 | skip_checking_sst_file_sizes_on_db_open=false 61 | random_access_max_buffer_size=1048576 62 | access_hint_on_compaction_start=NORMAL 63 | allow_concurrent_memtable_write=true 64 | track_and_verify_wals_in_manifest=false 65 | skip_stats_update_on_db_open=false 66 | compaction_verify_record_count=true 67 | paranoid_checks=true 68 | max_file_opening_threads=16 69 | verify_sst_unique_id_in_manifest=true 70 | avoid_flush_during_recovery=false 71 | flush_verify_memtable_count=true 72 | db_host_id=__hostname__ 73 | error_if_exists=false 74 | wal_recovery_mode=kPointInTimeRecovery 75 | enable_thread_tracking=false 76 | is_fd_close_on_exec=true 77 | enforce_single_del_contracts=true 78 | create_missing_column_families=false 79 | create_if_missing=true 80 | use_fsync=false 81 | wal_filter=nullptr 82 | allow_2pc=false 83 | use_direct_io_for_flush_and_compaction=false 84 | manual_wal_flush=false 85 | enable_write_thread_adaptive_yield=true 86 | use_direct_reads=false 87 | allow_mmap_writes=false 88 | allow_fallocate=true 89 | two_write_queues=false 90 | allow_mmap_reads=false 91 | unordered_write=false 92 | advise_random_on_open=true 93 | 94 | 95 | [CFOptions "default"] 96 | memtable_max_range_deletions=0 97 | block_protection_bytes_per_key=0 98 | memtable_protection_bytes_per_key=0 99 | sample_for_compression=0 100 | blob_file_starting_level=0 101 | blob_compaction_readahead_size=0 102 | blob_garbage_collection_force_threshold=1.000000 103 | enable_blob_garbage_collection=false 104 | min_blob_size=0 105 | last_level_temperature=kUnknown 106 | enable_blob_files=false 107 | target_file_size_base=67108864 108 | max_sequential_skip_in_iterations=8 109 | prepopulate_blob_cache=kDisable 110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;} 111 | max_bytes_for_level_multiplier=10.000000 112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1 113 | max_bytes_for_level_base=268435456 114 | experimental_mempurge_threshold=0.000000 115 | write_buffer_size=134217728 116 | bottommost_compression=kDisableCompressionOption 117 | prefix_extractor=nullptr 118 | blob_file_size=268435456 119 | memtable_huge_page_size=0 120 | bottommost_file_compaction_delay=0 121 | max_successive_merges=0 122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 123 | arena_block_size=1048576 124 | memtable_whole_key_filtering=false 125 | target_file_size_multiplier=1 126 | max_write_buffer_number=4 127 | blob_compression_type=kNoCompression 128 | compression=kSnappyCompression 129 | level0_stop_writes_trigger=36 130 | level0_slowdown_writes_trigger=24 131 | level0_file_num_compaction_trigger=4 132 | ignore_max_compaction_bytes_for_input=true 133 | max_compaction_bytes=1677721600 134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;} 135 | memtable_prefix_bloom_size_ratio=0.000000 136 | hard_pending_compaction_bytes_limit=274877906944 137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 138 | blob_garbage_collection_age_cutoff=0.250000 139 | ttl=2592000 140 | soft_pending_compaction_bytes_limit=68719476736 141 | inplace_update_num_locks=10000 142 | paranoid_file_checks=false 143 | check_flush_compaction_key_order=true 144 | periodic_compaction_seconds=0 145 | disable_auto_compactions=false 146 | report_bg_io_stats=false 147 | compaction_pri=kMinOverlappingRatio 148 | compaction_style=kCompactionStyleLevel 149 | merge_operator=nullptr 150 | table_factory=BlockBasedTable 151 | memtable_factory=SkipListFactory 152 | comparator=leveldb.BytewiseComparator 153 | compaction_filter_factory=nullptr 154 | num_levels=7 155 | min_write_buffer_number_to_merge=1 156 | bloom_locality=0 157 | max_write_buffer_size_to_maintain=0 158 | sst_partitioner_factory=nullptr 159 | preserve_internal_time_seconds=0 160 | preclude_last_level_data_seconds=0 161 | max_write_buffer_number_to_maintain=0 162 | default_temperature=kUnknown 163 | optimize_filters_for_hits=false 164 | level_compaction_dynamic_file_size=true 165 | memtable_insert_with_hint_prefix_extractor=nullptr 166 | level_compaction_dynamic_level_bytes=true 167 | inplace_update_support=false 168 | persist_user_defined_timestamps=true 169 | compaction_filter=nullptr 170 | force_consistency_checks=true 171 | 172 | [TableOptions/BlockBasedTable "default"] 173 | num_file_reads_for_auto_readahead=2 174 | initial_auto_readahead_size=8192 175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;} 176 | enable_index_compression=true 177 | pin_top_level_index_and_filter=true 178 | read_amp_bytes_per_bit=0 179 | verify_compression=false 180 | prepopulate_block_cache=kDisable 181 | format_version=5 182 | partition_filters=false 183 | metadata_block_size=4096 184 | max_auto_readahead_size=262144 185 | index_block_restart_interval=1 186 | block_size_deviation=10 187 | block_size=4096 188 | detect_filter_construct_corruption=false 189 | no_block_cache=false 190 | checksum=kXXH3 191 | filter_policy=nullptr 192 | data_block_hash_table_util_ratio=0.750000 193 | block_restart_interval=16 194 | index_type=kBinarySearch 195 | pin_l0_filter_and_index_blocks_in_cache=false 196 | data_block_index_type=kDataBlockBinarySearch 197 | cache_index_and_filter_blocks_with_high_priority=true 198 | whole_key_filtering=true 199 | index_shortening=kShortenSeparators 200 | cache_index_and_filter_blocks=false 201 | block_align=false 202 | optimize_filters_for_memory=false 203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory 204 | 205 | -------------------------------------------------------------------------------- /options_files/default_options_files/rocksdb_default_options.ini: -------------------------------------------------------------------------------- 1 | # This is a RocksDB option file. 2 | # 3 | # For detailed file format spec, please refer to the example file 4 | # in examples/rocksdb_option_file_example.ini 5 | # 6 | 7 | [Version] 8 | rocksdb_version=8.8.1 9 | options_file_version=1.1 10 | 11 | [DBOptions] 12 | max_background_flushes=-1 13 | compaction_readahead_size=2097152 14 | wal_bytes_per_sync=0 15 | bytes_per_sync=0 16 | max_open_files=-1 17 | stats_history_buffer_size=1048576 18 | stats_dump_period_sec=600 19 | stats_persist_period_sec=600 20 | delete_obsolete_files_period_micros=21600000000 21 | max_total_wal_size=0 22 | strict_bytes_per_sync=false 23 | delayed_write_rate=16777216 24 | avoid_flush_during_shutdown=false 25 | writable_file_max_buffer_size=1048576 26 | max_subcompactions=1 27 | max_background_compactions=-1 28 | max_background_jobs=2 29 | lowest_used_cache_tier=kNonVolatileBlockTier 30 | bgerror_resume_retry_interval=1000000 31 | max_bgerror_resume_count=2147483647 32 | best_efforts_recovery=false 33 | write_dbid_to_manifest=false 34 | avoid_unnecessary_blocking_io=false 35 | atomic_flush=false 36 | log_readahead_size=0 37 | dump_malloc_stats=false 38 | info_log_level=INFO_LEVEL 39 | write_thread_max_yield_usec=100 40 | max_write_batch_group_size_bytes=1048576 41 | wal_compression=kNoCompression 42 | write_thread_slow_yield_usec=3 43 | enable_pipelined_write=false 44 | persist_stats_to_disk=false 45 | max_manifest_file_size=1073741824 46 | WAL_size_limit_MB=0 47 | fail_if_options_file_error=true 48 | max_log_file_size=0 49 | manifest_preallocation_size=4194304 50 | log_file_time_to_roll=0 51 | allow_data_in_errors=false 52 | WAL_ttl_seconds=0 53 | recycle_log_file_num=0 54 | file_checksum_gen_factory=nullptr 55 | keep_log_file_num=1000 56 | db_write_buffer_size=0 57 | table_cache_numshardbits=6 58 | use_adaptive_mutex=false 59 | allow_ingest_behind=false 60 | skip_checking_sst_file_sizes_on_db_open=false 61 | random_access_max_buffer_size=1048576 62 | access_hint_on_compaction_start=NORMAL 63 | allow_concurrent_memtable_write=true 64 | track_and_verify_wals_in_manifest=false 65 | skip_stats_update_on_db_open=false 66 | compaction_verify_record_count=true 67 | paranoid_checks=true 68 | max_file_opening_threads=16 69 | verify_sst_unique_id_in_manifest=true 70 | avoid_flush_during_recovery=false 71 | flush_verify_memtable_count=true 72 | db_host_id=__hostname__ 73 | error_if_exists=false 74 | wal_recovery_mode=kPointInTimeRecovery 75 | enable_thread_tracking=false 76 | is_fd_close_on_exec=true 77 | enforce_single_del_contracts=true 78 | create_missing_column_families=false 79 | create_if_missing=false 80 | use_fsync=false 81 | wal_filter=nullptr 82 | allow_2pc=false 83 | use_direct_io_for_flush_and_compaction=false 84 | manual_wal_flush=false 85 | enable_write_thread_adaptive_yield=true 86 | use_direct_reads=false 87 | allow_mmap_writes=false 88 | allow_fallocate=true 89 | two_write_queues=false 90 | allow_mmap_reads=false 91 | unordered_write=false 92 | advise_random_on_open=true 93 | 94 | 95 | [CFOptions "default"] 96 | memtable_max_range_deletions=0 97 | block_protection_bytes_per_key=0 98 | memtable_protection_bytes_per_key=0 99 | sample_for_compression=0 100 | blob_file_starting_level=0 101 | blob_compaction_readahead_size=0 102 | blob_garbage_collection_force_threshold=1.000000 103 | enable_blob_garbage_collection=false 104 | min_blob_size=0 105 | last_level_temperature=kUnknown 106 | enable_blob_files=false 107 | target_file_size_base=67108864 108 | max_sequential_skip_in_iterations=8 109 | prepopulate_blob_cache=kDisable 110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;} 111 | max_bytes_for_level_multiplier=10.000000 112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1 113 | max_bytes_for_level_base=268435456 114 | experimental_mempurge_threshold=0.000000 115 | write_buffer_size=67108864 116 | bottommost_compression=kDisableCompressionOption 117 | prefix_extractor=nullptr 118 | blob_file_size=268435456 119 | memtable_huge_page_size=0 120 | bottommost_file_compaction_delay=0 121 | max_successive_merges=0 122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 123 | arena_block_size=1048576 124 | memtable_whole_key_filtering=false 125 | target_file_size_multiplier=1 126 | max_write_buffer_number=2 127 | blob_compression_type=kNoCompression 128 | compression=kSnappyCompression 129 | level0_stop_writes_trigger=36 130 | level0_slowdown_writes_trigger=20 131 | level0_file_num_compaction_trigger=4 132 | ignore_max_compaction_bytes_for_input=true 133 | max_compaction_bytes=1677721600 134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;} 135 | memtable_prefix_bloom_size_ratio=0.000000 136 | hard_pending_compaction_bytes_limit=274877906944 137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 138 | blob_garbage_collection_age_cutoff=0.250000 139 | ttl=2592000 140 | soft_pending_compaction_bytes_limit=68719476736 141 | inplace_update_num_locks=10000 142 | paranoid_file_checks=false 143 | check_flush_compaction_key_order=true 144 | periodic_compaction_seconds=0 145 | disable_auto_compactions=false 146 | report_bg_io_stats=false 147 | compaction_pri=kMinOverlappingRatio 148 | compaction_style=kCompactionStyleLevel 149 | merge_operator=nullptr 150 | table_factory=BlockBasedTable 151 | memtable_factory=SkipListFactory 152 | comparator=leveldb.BytewiseComparator 153 | compaction_filter_factory=nullptr 154 | num_levels=7 155 | min_write_buffer_number_to_merge=1 156 | bloom_locality=0 157 | max_write_buffer_size_to_maintain=0 158 | sst_partitioner_factory=nullptr 159 | preserve_internal_time_seconds=0 160 | preclude_last_level_data_seconds=0 161 | max_write_buffer_number_to_maintain=0 162 | default_temperature=kUnknown 163 | optimize_filters_for_hits=false 164 | level_compaction_dynamic_file_size=true 165 | memtable_insert_with_hint_prefix_extractor=nullptr 166 | level_compaction_dynamic_level_bytes=true 167 | inplace_update_support=false 168 | persist_user_defined_timestamps=true 169 | compaction_filter=nullptr 170 | force_consistency_checks=true 171 | 172 | [TableOptions/BlockBasedTable "default"] 173 | num_file_reads_for_auto_readahead=2 174 | initial_auto_readahead_size=8192 175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;} 176 | enable_index_compression=true 177 | pin_top_level_index_and_filter=true 178 | read_amp_bytes_per_bit=0 179 | verify_compression=false 180 | prepopulate_block_cache=kDisable 181 | format_version=5 182 | partition_filters=false 183 | metadata_block_size=4096 184 | max_auto_readahead_size=262144 185 | index_block_restart_interval=1 186 | block_size_deviation=10 187 | block_size=4096 188 | detect_filter_construct_corruption=false 189 | no_block_cache=false 190 | checksum=kXXH3 191 | filter_policy=nullptr 192 | data_block_hash_table_util_ratio=0.750000 193 | block_restart_interval=16 194 | index_type=kBinarySearch 195 | pin_l0_filter_and_index_blocks_in_cache=false 196 | data_block_index_type=kDataBlockBinarySearch 197 | cache_index_and_filter_blocks_with_high_priority=true 198 | whole_key_filtering=true 199 | index_shortening=kShortenSeparators 200 | cache_index_and_filter_blocks=false 201 | block_align=false 202 | optimize_filters_for_memory=false 203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory 204 | 205 | -------------------------------------------------------------------------------- /options_files/default_options_files/dbbench_default_options-8.8.1.ini: -------------------------------------------------------------------------------- 1 | # This is a RocksDB option file. 2 | # 3 | # For detailed file format spec, please refer to the example file 4 | # in examples/rocksdb_option_file_example.ini 5 | # 6 | 7 | [Version] 8 | rocksdb_version=8.8.1 9 | options_file_version=1.1 10 | 11 | [DBOptions] 12 | max_background_flushes=-1 13 | compaction_readahead_size=2097152 14 | wal_bytes_per_sync=0 15 | bytes_per_sync=0 16 | max_open_files=-1 17 | stats_history_buffer_size=1048576 18 | stats_dump_period_sec=600 19 | stats_persist_period_sec=600 20 | delete_obsolete_files_period_micros=21600000000 21 | max_total_wal_size=0 22 | strict_bytes_per_sync=false 23 | delayed_write_rate=8388608 24 | avoid_flush_during_shutdown=false 25 | writable_file_max_buffer_size=1048576 26 | max_subcompactions=1 27 | max_background_compactions=-1 28 | max_background_jobs=2 29 | lowest_used_cache_tier=kNonVolatileBlockTier 30 | bgerror_resume_retry_interval=1000000 31 | max_bgerror_resume_count=2147483647 32 | best_efforts_recovery=false 33 | write_dbid_to_manifest=false 34 | avoid_unnecessary_blocking_io=false 35 | atomic_flush=false 36 | log_readahead_size=0 37 | dump_malloc_stats=true 38 | info_log_level=INFO_LEVEL 39 | write_thread_max_yield_usec=100 40 | max_write_batch_group_size_bytes=1048576 41 | wal_compression=kNoCompression 42 | write_thread_slow_yield_usec=3 43 | enable_pipelined_write=true 44 | persist_stats_to_disk=false 45 | max_manifest_file_size=1073741824 46 | WAL_size_limit_MB=0 47 | fail_if_options_file_error=true 48 | max_log_file_size=0 49 | manifest_preallocation_size=4194304 50 | listeners={ErrorHandlerListener:ErrorHandlerListener} 51 | log_file_time_to_roll=0 52 | allow_data_in_errors=false 53 | WAL_ttl_seconds=0 54 | recycle_log_file_num=0 55 | file_checksum_gen_factory=nullptr 56 | keep_log_file_num=1000 57 | db_write_buffer_size=0 58 | table_cache_numshardbits=4 59 | use_adaptive_mutex=false 60 | allow_ingest_behind=false 61 | skip_checking_sst_file_sizes_on_db_open=false 62 | random_access_max_buffer_size=1048576 63 | access_hint_on_compaction_start=NORMAL 64 | allow_concurrent_memtable_write=true 65 | track_and_verify_wals_in_manifest=false 66 | skip_stats_update_on_db_open=false 67 | compaction_verify_record_count=true 68 | paranoid_checks=true 69 | max_file_opening_threads=16 70 | verify_sst_unique_id_in_manifest=true 71 | avoid_flush_during_recovery=false 72 | flush_verify_memtable_count=true 73 | db_host_id=__hostname__ 74 | error_if_exists=false 75 | wal_recovery_mode=kPointInTimeRecovery 76 | enable_thread_tracking=false 77 | is_fd_close_on_exec=true 78 | enforce_single_del_contracts=true 79 | create_missing_column_families=true 80 | create_if_missing=true 81 | use_fsync=false 82 | wal_filter=nullptr 83 | allow_2pc=false 84 | use_direct_io_for_flush_and_compaction=true 85 | manual_wal_flush=false 86 | enable_write_thread_adaptive_yield=true 87 | use_direct_reads=true 88 | allow_mmap_writes=false 89 | allow_fallocate=true 90 | two_write_queues=false 91 | allow_mmap_reads=false 92 | unordered_write=false 93 | advise_random_on_open=true 94 | 95 | 96 | [CFOptions "default"] 97 | memtable_max_range_deletions=0 98 | block_protection_bytes_per_key=0 99 | memtable_protection_bytes_per_key=0 100 | sample_for_compression=0 101 | blob_file_starting_level=0 102 | blob_compaction_readahead_size=0 103 | blob_garbage_collection_force_threshold=1.000000 104 | enable_blob_garbage_collection=false 105 | min_blob_size=0 106 | last_level_temperature=kUnknown 107 | enable_blob_files=false 108 | target_file_size_base=67108864 109 | max_sequential_skip_in_iterations=8 110 | prepopulate_blob_cache=kDisable 111 | compaction_options_fifo={allow_compaction=true;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=0;} 112 | max_bytes_for_level_multiplier=10.000000 113 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1 114 | max_bytes_for_level_base=268435456 115 | experimental_mempurge_threshold=0.000000 116 | write_buffer_size=67108864 117 | bottommost_compression=kDisableCompressionOption 118 | prefix_extractor=nullptr 119 | blob_file_size=268435456 120 | memtable_huge_page_size=0 121 | bottommost_file_compaction_delay=0 122 | max_successive_merges=0 123 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 124 | arena_block_size=1048576 125 | memtable_whole_key_filtering=false 126 | target_file_size_multiplier=1 127 | max_write_buffer_number=2 128 | blob_compression_type=kNoCompression 129 | compression=kNoCompression 130 | level0_stop_writes_trigger=36 131 | level0_slowdown_writes_trigger=20 132 | level0_file_num_compaction_trigger=4 133 | ignore_max_compaction_bytes_for_input=true 134 | max_compaction_bytes=1677721600 135 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;} 136 | memtable_prefix_bloom_size_ratio=0.000000 137 | hard_pending_compaction_bytes_limit=137438953472 138 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;} 139 | blob_garbage_collection_age_cutoff=0.250000 140 | ttl=2592000 141 | soft_pending_compaction_bytes_limit=68719476736 142 | inplace_update_num_locks=10000 143 | paranoid_file_checks=false 144 | check_flush_compaction_key_order=true 145 | periodic_compaction_seconds=0 146 | disable_auto_compactions=false 147 | report_bg_io_stats=false 148 | compaction_pri=kMinOverlappingRatio 149 | compaction_style=kCompactionStyleLevel 150 | merge_operator=nullptr 151 | table_factory=BlockBasedTable 152 | memtable_factory=SkipListFactory 153 | comparator=leveldb.BytewiseComparator 154 | compaction_filter_factory=nullptr 155 | num_levels=7 156 | min_write_buffer_number_to_merge=1 157 | bloom_locality=0 158 | max_write_buffer_size_to_maintain=0 159 | sst_partitioner_factory=nullptr 160 | preserve_internal_time_seconds=0 161 | preclude_last_level_data_seconds=0 162 | max_write_buffer_number_to_maintain=0 163 | default_temperature=kUnknown 164 | optimize_filters_for_hits=false 165 | level_compaction_dynamic_file_size=true 166 | memtable_insert_with_hint_prefix_extractor=nullptr 167 | level_compaction_dynamic_level_bytes=false 168 | inplace_update_support=false 169 | persist_user_defined_timestamps=true 170 | compaction_filter=nullptr 171 | force_consistency_checks=true 172 | 173 | [TableOptions/BlockBasedTable "default"] 174 | num_file_reads_for_auto_readahead=2 175 | initial_auto_readahead_size=8192 176 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;} 177 | enable_index_compression=true 178 | pin_top_level_index_and_filter=false 179 | read_amp_bytes_per_bit=0 180 | verify_compression=false 181 | prepopulate_block_cache=kDisable 182 | format_version=5 183 | partition_filters=false 184 | metadata_block_size=4096 185 | max_auto_readahead_size=262144 186 | index_block_restart_interval=1 187 | block_size_deviation=10 188 | block_size=4096 189 | detect_filter_construct_corruption=false 190 | no_block_cache=false 191 | checksum=kXXH3 192 | filter_policy=nullptr 193 | data_block_hash_table_util_ratio=0.750000 194 | block_restart_interval=16 195 | index_type=kBinarySearch 196 | pin_l0_filter_and_index_blocks_in_cache=false 197 | data_block_index_type=kDataBlockBinarySearch 198 | cache_index_and_filter_blocks_with_high_priority=true 199 | whole_key_filtering=true 200 | index_shortening=kShortenSeparatorsAndSuccessor 201 | cache_index_and_filter_blocks=false 202 | block_align=false 203 | optimize_filters_for_memory=false 204 | flush_block_policy_factory=FlushBlockBySizePolicyFactory 205 | 206 | -------------------------------------------------------------------------------- /rocksdb/parse_db_bench_output.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | from utils.utils import log_update 4 | 5 | def parse_db_bench_output(output): 6 | 7 | if re.match("Unable to load options file.*", output) is not None: 8 | return { 9 | "error": "Invalid options file" 10 | } 11 | 12 | # Regular expression to find and extract the number of Entries 13 | # Searches for the pattern "Entries:" followed by one or more digits 14 | entries_match = re.search(r"Entries:\s+(\d+)", output) 15 | # If a match is found, convert the captured digits to an integer 16 | entries = int(entries_match.group(1)) if entries_match else None 17 | 18 | # Regular expression to parse the output line 19 | # Captures various performance metrics and their units 20 | test_name = None 21 | 22 | if "readrandomwriterandom" in output: 23 | op_line = output.split("readrandomwriterandom")[1].split("\n")[0] 24 | test_name = "readrandomwriterandom" 25 | test_pattern = r"readrandomwriterandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;" 26 | elif "fillrandom" in output: 27 | op_line = output.split("fillrandom")[1].split("\n")[0] 28 | test_name = "fillrandom" 29 | test_pattern = r"fillrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\nMicroseconds per write:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}" 30 | elif "readrandom" in output: 31 | op_line = output.split("readrandom")[1].split("\n")[0] 32 | test_name = "readrandom" 33 | test_pattern = r"readrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\s+\((\d+)\s+of\s+(\d+)\s+found\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}" 34 | elif "mixgraph" in output: 35 | op_line = output.split("mixgraph :")[1].split("\n")[0] 36 | test_name = "mixgraph" 37 | test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;" 38 | # test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+\(\s+Gets:+(\d+)\s+Puts:+(\d+)\s+Seek:(\d+),\s+reads\s+(\d+)\s+in\s+(\d+)\s+found,\s+avg\s+size:\s+\d+\s+value,\s+-nan\s+scan\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}" 39 | elif "readwhilewriting" in output: 40 | op_line = output.split("readwhilewriting")[1].split("\n")[0] 41 | test_name = "readwhilewriting" 42 | test_pattern = r"readwhilewriting\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;" 43 | else: 44 | log_update(f"[PDB] Test name not found in output: {output}") 45 | op_line = "unknown test" 46 | test_name = "unknown" 47 | test_pattern = r"(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;(\s+\(.*found:\d+\))?\nMicroseconds per (read|write):\nCount: (\d+) Average: (\d+\.\d+) StdDev: (\d+\.\d+)\nMin: (\d+) Median: (\d+\.\d+) Max: (\d+)\nPercentiles: P50: (\d+\.\d+) P75: (\d+\.\d+) P99: (\d+\.\d+) P99.9: (\d+\.\d+) P99.99: (\d+\.\d+)" 48 | 49 | pattern_matches = re.findall(test_pattern, output) 50 | log_update(f"[PDB] Test name: {test_name}") 51 | log_update(f"[PDB] Matches: {pattern_matches}") 52 | log_update(f"[PDB] Output line: {op_line}") 53 | # Set all values to None if the pattern is not found 54 | micros_per_op = ops_per_sec = total_seconds = total_operations = data_speed = data_speed_unit = None 55 | 56 | # Extract the performance metrics if the pattern is found 57 | for pattern_match in pattern_matches: 58 | # Convert each captured group to the appropriate type (float or int) 59 | micros_per_op = float(pattern_match[0]) 60 | ops_per_sec = int(pattern_match[1]) 61 | total_seconds = float(pattern_match[2]) 62 | total_operations = int(pattern_match[3]) 63 | # Check for specific workloads to handle additional data 64 | if "readrandomwriterandom" in output: 65 | data_speed = ops_per_sec 66 | data_speed_unit = "ops/sec" 67 | reads_found = None 68 | elif "fillrandom" in output: 69 | data_speed = float(pattern_match[4]) 70 | data_speed_unit = pattern_match[5] 71 | writes_data = { 72 | "count": int(pattern_match[6]), 73 | "average": float(pattern_match[7]), 74 | "std_dev": float(pattern_match[8]), 75 | "min": int(pattern_match[9]), 76 | "median": float(pattern_match[10]), 77 | "max": int(pattern_match[11]), 78 | "percentiles": { 79 | "P50": float(pattern_match[12]), 80 | "P75": float(pattern_match[13]), 81 | "P99": float(pattern_match[14]), 82 | "P99.9": float(pattern_match[15]), 83 | "P99.99": float(pattern_match[16]) 84 | } 85 | } 86 | elif "readrandom" in output: 87 | data_speed = float(pattern_match[4]) 88 | data_speed_unit = pattern_match[5] 89 | reads_found = { 90 | "count": int(pattern_match[6]), 91 | "total": int(pattern_match[7]) 92 | } 93 | reads_data = { 94 | "count": int(pattern_match[8]), 95 | "average": float(pattern_match[9]), 96 | "std_dev": float(pattern_match[10]), 97 | "min": int(pattern_match[11]), 98 | "median": float(pattern_match[12]), 99 | "max": int(pattern_match[13]), 100 | "percentiles": { 101 | "P50": float(pattern_match[14]), 102 | "P75": float(pattern_match[15]), 103 | "P99": float(pattern_match[16]), 104 | "P99.9": float(pattern_match[17]), 105 | "P99.99": float(pattern_match[18]) 106 | } 107 | } 108 | elif "readwhilewriting" in output: 109 | data_speed = float(pattern_match[4]) 110 | data_speed_unit = pattern_match[5] 111 | # reads_found = { 112 | # "count": int(pattern_match[6]), 113 | # "total": int(pattern_match[7]) 114 | # } 115 | # reads_data = { 116 | # "count": int(pattern_match[8]), 117 | # "average": float(pattern_match[9]), 118 | # "std_dev": float(pattern_match[10]), 119 | # "min": int(pattern_match[11]), 120 | # "median": float(pattern_match[12]), 121 | # "max": int(pattern_match[13]), 122 | # "percentiles": { 123 | # "P50": float(pattern_match[14]), 124 | # "P75": float(pattern_match[15]), 125 | # "P99": float(pattern_match[16]), 126 | # "P99.9": float(pattern_match[17]), 127 | # "P99.99": float(pattern_match[18]) 128 | # } 129 | # } 130 | elif "mixgraph" in output: 131 | data_speed = ops_per_sec 132 | data_speed_unit = "ops/sec" 133 | else: 134 | log_update(f"[PDB] Test name not found in output: {output}") 135 | data_speed = ops_per_sec 136 | data_speed_unit = "ops/sec" 137 | 138 | log_update(f"[PDB] Ops per sec: {ops_per_sec} Total seconds: {total_seconds} Total operations: {total_operations} Data speed: {data_speed} {data_speed_unit}") 139 | 140 | ops_per_sec_points = re.findall("and \((.*),.*\) ops\/second in \(.*,(.*)\)", output) 141 | 142 | # Store all extracted values in a dictionary 143 | parsed_data = { 144 | "entries": entries, 145 | "micros_per_op": micros_per_op, 146 | "ops_per_sec": ops_per_sec, 147 | "total_seconds": total_seconds, 148 | "total_operations": total_operations, 149 | "data_speed": data_speed, 150 | "data_speed_unit": data_speed_unit, 151 | "ops_per_second_graph": [ 152 | [float(a[1]) for a in ops_per_sec_points], 153 | [float(a[0]) for a in ops_per_sec_points], 154 | ] 155 | } 156 | 157 | # Grab the latency and push into the output logs file 158 | latency = re.findall("Percentiles:.*", output) 159 | for i in latency: 160 | log_update("[PDB] " + i) 161 | 162 | # Return the dictionary with the parsed data 163 | return parsed_data 164 | -------------------------------------------------------------------------------- /rocksdb/subprocess_manager.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import time 4 | 5 | from cgroup_monitor import CGroupMonitor 6 | 7 | from utils.utils import log_update, path_of_db 8 | from utils.constants import TEST_NAME, DB_BENCH_PATH, OPTIONS_FILE_DIR, NUM_ENTRIES, SIDE_CHECKER, FIO_RESULT_PATH 9 | from rocksdb.parse_db_bench_output import parse_db_bench_output 10 | from utils.utils import store_db_bench_output 11 | from utils.graph import plot_2axis 12 | from gpt.prompts_generator import midway_options_file_generation 13 | from utils.system_operations.fio_runner import get_fio_result 14 | from utils.system_operations.get_sys_info import system_info 15 | 16 | 17 | def pre_tasks(database_path, run_count): 18 | ''' 19 | Function to perform the pre-tasks before running the db_bench 20 | Parameters: 21 | - database_path (str): The path to the database 22 | - run_count (str): The current iteration of the benchmark 23 | 24 | Returns: 25 | - None 26 | ''' 27 | 28 | # Try to delete the database if path exists 29 | proc = subprocess.run( 30 | f'rm -rf {database_path}', 31 | stdout=subprocess.PIPE, 32 | stderr=subprocess.STDOUT, 33 | shell=True, 34 | check=False 35 | ) 36 | 37 | log_update("[SPM] Flushing the cache") 38 | print("[SPM] Flushing the cache") 39 | # Delay for all the current memory to be freed 40 | proc = subprocess.run( 41 | f'sync; echo 3 > /proc/sys/vm/drop_caches', 42 | stdout=subprocess.PIPE, 43 | stderr=subprocess.STDOUT, 44 | shell=True, 45 | check=False 46 | ) 47 | 48 | # update_log_file("[SPM] Waiting for 90 seconds to free up memory, IO and other resources") 49 | print("[SPM] Waiting for 30 seconds to free up memory, IO and other resources") 50 | # Give a 1.5 min delay for all the current memory/IO/etc to be freed 51 | time.sleep(30) 52 | 53 | 54 | def generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name): 55 | ''' 56 | Generate the DB bench command 57 | 58 | Parameters: 59 | - db_bench_path (str): The path to the db_bench executable 60 | - database_path (str): The path to the database 61 | - option_file (dict): The options file to be used 62 | - run_count (str): The current iteration of the benchmark 63 | - test_name (str): The name of the test 64 | 65 | Returns: 66 | - list: The db_bench command 67 | ''' 68 | 69 | db_bench_command = [ 70 | db_bench_path, 71 | f"--db={database_path}", 72 | f"--options_file={OPTIONS_FILE_DIR}", 73 | "--use_direct_io_for_flush_and_compaction", 74 | "--use_direct_reads", "--compression_type=none", 75 | "--stats_interval_seconds=1", "--histogram", 76 | f"--num={NUM_ENTRIES}", "--duration=100" 77 | ] 78 | 79 | 80 | if test_name == "fillrandom": 81 | db_bench_command.append("--benchmarks=fillrandom") 82 | elif test_name == "readrandomwriterandom": 83 | db_bench_command.append("--benchmarks=readrandomwriterandom") 84 | elif test_name == "readrandom": 85 | tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"] 86 | tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False) 87 | new_db_bench = db_bench_command[:-2] + ["--benchmarks=readrandom", "--use_existing_db", "--num=25000000", "--duration=1000"] 88 | db_bench_command = new_db_bench 89 | elif test_name == "mixgraph": 90 | tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"] 91 | tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False) 92 | new_db_bench = db_bench_command[:-1] + ["--benchmarks=mixgraph", "--use_existing_db", "--duration=1000", "--mix_get_ratio=0.5", "--mix_put_ratio=0.5", "--mix_seek_ratio=0.0", "--mix_get_ratio=0.5"] 93 | db_bench_command = new_db_bench 94 | elif test_name == "readwhilewriting": 95 | db_bench_command.append("--benchmarks=readwhilewriting") 96 | else: 97 | print(f"[SPM] Test name {test_name} not recognized") 98 | exit(1) 99 | 100 | log_update(f"[SPM] Command: {db_bench_command}") 101 | return db_bench_command 102 | 103 | 104 | def db_bench(db_bench_path, database_path, options, run_count, test_name, previous_throughput, options_files, bm_iter=0): 105 | ''' 106 | Store the options in a file 107 | Do the benchmark 108 | 109 | Parameters: 110 | - db_bench_path (str): The path to the db_bench executable 111 | - database_path (str): The path to the database 112 | - option_file (dict): The options file to be used 113 | - run_count (str): The current iteration of the benchmark 114 | 115 | Returns: 116 | - None 117 | ''' 118 | global proc_out 119 | with open(f"{OPTIONS_FILE_DIR}", "w") as f: 120 | f.write(options) 121 | 122 | # Perform pre-tasks to reset the environment 123 | pre_tasks(database_path, run_count) 124 | command = generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name) 125 | 126 | log_update(f"[SPM] Executing db_bench with command: {command}") 127 | print("[SPM] Executing db_bench") 128 | 129 | 130 | if SIDE_CHECKER and previous_throughput != None: 131 | cgroup_monitor = CGroupMonitor() 132 | cgroup_monitor.start_monitor() 133 | start_time = time.time() 134 | 135 | with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) as proc_out: 136 | output = "" 137 | check_interval = 30 138 | for line in proc_out.stdout: 139 | output += line 140 | if time.time() - start_time <= check_interval: 141 | continue 142 | 143 | start_time = time.time() 144 | if "ops/second" in line: 145 | current_avg_throughput = float(line.split("(")[2].split(",")[1].split(")")[0]) 146 | 147 | if (current_avg_throughput < 0.9 * float(previous_throughput)) and (bm_iter < 3): 148 | print("[SQU] Throughput decreased, resetting the benchmark") 149 | log_update(f"[SQU] Throughput decreased {previous_throughput}->{current_avg_throughput}, resetting the benchmark") 150 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor() 151 | proc_out.kill() 152 | 153 | db_path = path_of_db() 154 | fio_result = get_fio_result(FIO_RESULT_PATH) 155 | device_info = system_info(db_path, fio_result) 156 | 157 | new_options, _, _ = midway_options_file_generation(options, avg_cpu_used, avg_mem_used, current_avg_throughput, device_info, options_files) 158 | output, avg_cpu_used, avg_mem_used, options = db_bench(db_bench_path, database_path, new_options, run_count, test_name, previous_throughput, options_files, bm_iter+1) 159 | 160 | log_update("[SPM] Finished running db_bench") 161 | return output, avg_cpu_used, avg_mem_used, options 162 | else: 163 | print("[SQU] No throughput found in the output") 164 | log_update("[SQU] No throughput found in the output") 165 | # exit(1) 166 | 167 | print("[SPM] Finished running db_bench") 168 | print("----------------------------------------------------------------------------") 169 | print("[SPM] Output: ", output) 170 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor() 171 | return output, avg_cpu_used, avg_mem_used, options 172 | else: 173 | cgroup_monitor = CGroupMonitor() 174 | cgroup_monitor.start_monitor() 175 | proc_out = subprocess.run( 176 | command, 177 | stdout=subprocess.PIPE, 178 | stderr=subprocess.STDOUT, 179 | check=False 180 | ) 181 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor() 182 | return proc_out.stdout.decode(), avg_cpu_used, avg_mem_used, options 183 | 184 | 185 | def benchmark(db_path, options, output_file_dir, reasoning, iteration_count, previous_results, options_files): 186 | ''' 187 | Function to run db_bench with the given options file and store the output in a file 188 | 189 | Parameters: 190 | - db_path (str): The path of database 191 | - options (dict): The options to be used 192 | - output_file_dir (str): the output directory 193 | - reasoning (str): The reasoning of the benchmark 194 | 195 | Returns: 196 | - is_error (bool): 197 | - benchmark_results (dict): 198 | ''' 199 | if previous_results is None: 200 | output, average_cpu_usage, average_memory_usage, options = db_bench( 201 | DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, None, options_files) 202 | else: 203 | output, average_cpu_usage, average_memory_usage, options = db_bench( 204 | DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, previous_results['ops_per_sec'], options_files) 205 | 206 | # log_update(f"[SPM] Output: {output}") 207 | benchmark_results = parse_db_bench_output(output) 208 | 209 | contents = os.listdir(output_file_dir) 210 | ini_file_count = len([f for f in contents if f.endswith(".ini")]) 211 | 212 | if benchmark_results.get("error") is not None: 213 | is_error = True 214 | log_update(f"[SPM] Benchmark failed, the error is: {benchmark_results.get('error')}") 215 | print("[SPM] Benchmark failed, the error is: ", 216 | benchmark_results.get("error")) 217 | # Save incorrect options in a file 218 | store_db_bench_output(output_file_dir, 219 | f"{ini_file_count}-incorrect_options.ini", 220 | benchmark_results, options, reasoning) 221 | elif benchmark_results['data_speed'] is None: 222 | is_error = True 223 | log_update(f"[SPM] Benchmark failed, the error is: Data speed is None. Check DB save path") 224 | print("[SPM] Benchmark failed, the error is: ", 225 | "Data speed is None. Check DB save path") 226 | # Save incorrect options in a file 227 | store_db_bench_output(output_file_dir, 228 | f"{ini_file_count}-incorrect_options.ini", 229 | benchmark_results, options, reasoning) 230 | else: 231 | is_error = False 232 | # Store the output of db_bench in a file 233 | store_db_bench_output(output_file_dir, f"{ini_file_count}.ini", 234 | benchmark_results, options, reasoning) 235 | plot_2axis(*benchmark_results["ops_per_second_graph"], 236 | "Ops Per Second", 237 | f"{output_file_dir}/ops_per_sec_{ini_file_count}.png") 238 | log_update(f"[SPM] Latest result: {benchmark_results['data_speed']}" 239 | f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.") 240 | log_update(f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%") 241 | print( 242 | f"[SPM] Latest result: {benchmark_results['data_speed']}", 243 | f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.\n", 244 | f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%" 245 | ) 246 | 247 | return is_error, benchmark_results, average_cpu_usage, average_memory_usage, options 248 | -------------------------------------------------------------------------------- /gpt/prompts_generator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from difflib import Differ 3 | from options_files.ops_options_file import cleanup_options_file 4 | from gpt.gpt_request import request_gpt 5 | from utils.utils import log_update 6 | from dotenv import load_dotenv 7 | import utils.constants as constants 8 | 9 | load_dotenv() 10 | 11 | def generate_system_content(device_information, rocksdb_version): 12 | """ 13 | Function to generate the system content with device info and rocksDB version. 14 | 15 | Parameters: 16 | device_information (str): Information about the device. 17 | 18 | Returns: 19 | str: A prompt for configuring RocksDB for enhanced performance. 20 | """ 21 | 22 | content = ( 23 | "You are a RocksDB Expert. " 24 | "You are being consulted by a company to help improve their RocksDB configuration " 25 | "by optimizing their options file based on their System information and benchmark output." 26 | f"Only provide option files for rocksdb version {rocksdb_version}. Also, Direct IO will always be used for both flush and compaction." 27 | "Additionally, compression type is set to none always." 28 | "First Explain the reasoning, only change 10 options, then show the option file in original format." 29 | f"The Device information is: {device_information}" 30 | ) 31 | return content 32 | 33 | def generate_default_user_content(chunk_string, previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"): 34 | user_contents = [] 35 | for _, benchmark_result, reasoning, _ in previous_option_files[1: -1]: 36 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used) 37 | user_content = f"The option file changes were:\n```\n{reasoning}\n```\nThe benchmark results are: {benchmark_line}" 38 | user_contents.append(user_content) 39 | 40 | _, benchmark_result, _, _ = previous_option_files[-1] 41 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used) 42 | user_content = f"Part of the current option file is:\n```\n{chunk_string}\n```\nThe benchmark results are: {benchmark_line}" 43 | user_contents.append(user_content) 44 | user_contents.append("Based on these information generate a new file in same format as the options_file to improve my database performance. Enclose the new options file in ```.") 45 | return user_contents 46 | 47 | def generate_user_content_with_difference(previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"): 48 | result =" " 49 | user_content = [] 50 | 51 | if len(previous_option_files) == 1: 52 | m1_file, m1_benchmark_result, _, _ = previous_option_files[-1] 53 | benchmark_line = generate_benchmark_info(test_name, m1_benchmark_result, average_cpu_used, average_mem_used) 54 | user_content = f"The original file is:\n```\n{m1_file}\n```\nThe benchmark results for the original file are: {benchmark_line}" 55 | 56 | elif len(previous_option_files) > 1: 57 | previous_option_file1, _, _, _ = previous_option_files[-1] 58 | previous_option_file2, _, _, _ = previous_option_files[-2] 59 | 60 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*') 61 | 62 | file1_lines = pattern.findall(previous_option_file1) 63 | file2_lines = pattern.findall(previous_option_file2) 64 | 65 | file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines] 66 | file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines] 67 | differ = Differ() 68 | diff = list(differ.compare(file1_lines, file2_lines)) 69 | lst= [] 70 | for line in diff: 71 | if line.startswith('+'): 72 | lst.append(line) 73 | result = '\n'.join(line[2:] for line in lst) 74 | m2_file, m2_benchmark_result, _, _ = previous_option_files[-2] 75 | benchmark_line = generate_benchmark_info(test_name, m2_benchmark_result, average_cpu_used, average_mem_used) 76 | user_content = ( 77 | f"The original file is:\n```\n{m2_file}\n```\n" 78 | f"The benchmark results for the original file are: {benchmark_line}\n" 79 | f"The previous file modifications are:\n```\n{result}\n```\n" 80 | ) 81 | 82 | else: 83 | _, benchmark_result, _, _ = previous_option_files[-1] 84 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used) 85 | 86 | user_content = ("The previous file modifications are: " 87 | f"\n```\n{result}\n```\n" 88 | f"The benchmark results for the previous file are: {benchmark_line}") 89 | 90 | 91 | user_contents = [user_content, "Based on these information generate a new file in the same format as the options_file to improve my database performance. Enclose the new options file in ```."] 92 | return user_contents 93 | 94 | def generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used): 95 | """ 96 | Function to create a formatted string with benchmark information. 97 | 98 | Parameters: 99 | - test_name: Name of the test. 100 | - benchmark_result: Dictionary with benchmark results. 101 | - average_cpu_used: Average CPU usage. 102 | - average_mem_used: Average Memory usage. 103 | 104 | Returns: 105 | - A formatted string with all benchmark information. 106 | """ 107 | benchmark_line = (f"The use case for the database is perfectly simulated by the {test_name} test. " 108 | f"The db_bench benchmark results for {test_name} are: Write/Read speed: {benchmark_result['data_speed']} " 109 | f"{benchmark_result['data_speed_unit']}, Operations per second: {benchmark_result['ops_per_sec']}.") 110 | 111 | if average_cpu_used != -1 and average_mem_used != -1: 112 | benchmark_line += f" CPU used: {average_cpu_used}%, Memory used: {average_mem_used}% during test." 113 | 114 | return benchmark_line 115 | 116 | def midway_options_file_generation(options, avg_cpu_used, avg_mem_used, last_throughput, device_information, options_file): 117 | """ 118 | Function to generate a prompt for the midway options file generation. 119 | 120 | Returns: 121 | - str: A prompt for the midway options file generation. 122 | """ 123 | 124 | sys_content = ( 125 | "You are a RocksDB Expert being consulted by a company to help improve their RocksDB performance " 126 | "by optimizing the options configured for a particular scenario they face." 127 | f"Only provide option files for rocksdb version {constants.VERSION}. Direct IO will always be used. " 128 | "Additionally, compression type is set to none always. " 129 | "Respond with the the reasoning first, then show the option file in original format." 130 | f"The Device information is: {device_information}" 131 | ) 132 | 133 | user_content = [] 134 | content = "Can you generate a new options file for RocksDB based on the following information?\n" 135 | content += "The previous options file is:\n" 136 | 137 | content += "```\n" 138 | content += options_file[-1][0] 139 | content += "```\n" 140 | 141 | content += ( 142 | f"The throughput results for the above options file are: {options_file[-1][1]['ops_per_sec']}. " 143 | ) 144 | 145 | user_content.append(content) 146 | content = "" 147 | 148 | content += "We then made the following changes to the options file:\n" 149 | 150 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*') 151 | 152 | file1_lines = pattern.findall(options) 153 | file2_lines = pattern.findall(options_file[-1][0]) 154 | 155 | file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines] 156 | file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines] 157 | differ = Differ() 158 | diff = list(differ.compare(file1_lines, file2_lines)) 159 | lst= [] 160 | for line in diff: 161 | if line.startswith('+'): 162 | lst.append(line) 163 | result = '\n'.join(line[2:] for line in lst) 164 | 165 | content += "```\n" 166 | content += result 167 | content += "```\n" 168 | 169 | content += f"\nThe updated file has a throughput of: {last_throughput}. \n\n" 170 | user_content.append(content) 171 | content = "" 172 | content += "Based on this information generate a new file. Enclose the new options in ```. Feel free to use upto 100% of the CPU and Memory." 173 | user_content.append(content) 174 | 175 | log_update("[OG] Generating options file with differences") 176 | log_update("[OG] Prompt for midway options file generation") 177 | log_update(content) 178 | 179 | matches = request_gpt(sys_content, user_content, 0.4) 180 | 181 | if matches is not None: 182 | clean_options_file = cleanup_options_file(matches[1]) 183 | reasoning = matches[0] + matches[2] 184 | 185 | return clean_options_file, reasoning, "" 186 | 187 | def generate_option_file_with_gpt(case, previous_option_files, device_information, temperature=0.4, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom", version="8.8.1"): 188 | """ 189 | Function that generates an options file for RocksDB based on specified parameters and case scenarios. 190 | - This function selects one of three different approaches to generate a RocksDB configuration options file. 191 | 192 | Parameters: 193 | - case (int): Determines the approach to use for generating the options file. Valid values are 1, 2, or 3. 194 | - previous_option_files (list): A list of tuples containing past options file configurations and other relevant data. 195 | - device_information (str): Information about the device/system on which RocksDB is running. 196 | - temperature (float, optional): Controls the randomness/creativity of the generated output. Default is 0.4. 197 | - average_cpu_used (float, optional): Average CPU usage, used for tuning the configuration. Default is -1.0, indicating not specified. 198 | - average_mem_used (float, optional): Average memory usage, used for tuning the configuration. Default is -1.0, indicating not specified. 199 | - test_name (str, optional): Identifier for the type of test or configuration scenario. Default is "fillrandom". 200 | 201 | Returns: 202 | - tuple: A tuple containing the generated options file, reasoning behind the options, and an empty string as a placeholder. 203 | 204 | Raises: 205 | - ValueError: If the `case` parameter is not 1, 2, or 3. 206 | """ 207 | def case_1(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version): 208 | log_update("[OG] Generating options file with long option changes") 209 | print("[OG] Generating options file with long option changes") 210 | system_content = generate_system_content(device_information, version) 211 | previous_option_file, _, _, _ = previous_option_files[-1] 212 | user_contents = generate_default_user_content(previous_option_file, previous_option_files, average_cpu_used, average_mem_used, test_name) 213 | matches = request_gpt(system_content, user_contents, temperature) 214 | # Process the GPT-generated response 215 | if matches is not None: 216 | clean_options_file = cleanup_options_file(matches[1]) 217 | reasoning = matches[0] + matches[2] 218 | 219 | return clean_options_file, reasoning, "" 220 | 221 | def case_2(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version): 222 | log_update("[OG] Generating options file with short option changes") 223 | print("[OG] Generating options file with short option changes") 224 | system_content = ( 225 | "You are a RocksDB Expert. " 226 | "You are being consulted by a company to help improve their RocksDB configuration " 227 | "by optimizing their options file based on their System information and benchmark output." 228 | f"Only provide option files for rocksdb version {version}. Also, Direct IO will always be used for both flush and compaction." 229 | "Additionally, compression type is set to none always." 230 | "First Explain the reasoning, only change the options I provided, then show the option file in original format." 231 | f"The Device information is: {device_information}") 232 | previous_option_file, _, _, _ = previous_option_files[-1] 233 | 234 | # Define a regular expression pattern to match key-value pairs 235 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*') 236 | 237 | # Extract key-value pairs from the string 238 | key_value_pairs = {match.group(1): match.group( 239 | 2) for match in pattern.finditer(previous_option_file)} 240 | 241 | # Remove key-value pairs where the key is "xxx" (case-insensitive) 242 | key_value_pairs = {key: value for key, value in key_value_pairs.items( 243 | ) if key.lower() not in {'rocksdb_version', 'options_file_version'}} 244 | 245 | # Split key-value pairs into chunks of 5 pairs each 246 | pairs_per_chunk = 20 247 | chunks = [list(key_value_pairs.items())[i:i + pairs_per_chunk] 248 | for i in range(0, len(key_value_pairs), pairs_per_chunk)] 249 | 250 | # Create strings for each chunk 251 | chunk_strings = [ 252 | '\n'.join([f"{key}: {value}" for key, value in chunk]) for chunk in chunks] 253 | 254 | clean_options_file = "" 255 | reasoning = "" 256 | 257 | # Loop through each part and make API calls 258 | for chunk_string in chunk_strings: 259 | user_contents = generate_default_user_content(chunk_string, previous_option_files, average_cpu_used, average_mem_used, test_name) 260 | matches = request_gpt(system_content, user_contents, temperature) 261 | if matches is not None: 262 | clean_options_file = cleanup_options_file(matches[1]) 263 | reasoning += matches[0] + matches[2] 264 | 265 | return clean_options_file, reasoning, "" 266 | 267 | 268 | def case_3(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version): 269 | 270 | log_update("[OG] Generating options file with differences") 271 | print("[OG] Generating options file with differences") 272 | system_content = generate_system_content(device_information, version) 273 | # Request GPT to generate new option 274 | user_contents = generate_user_content_with_difference(previous_option_files, average_cpu_used, average_mem_used, test_name) 275 | matches = request_gpt(system_content, user_contents, temperature) 276 | # Process the GPT response 277 | if matches is not None: 278 | clean_options_file = cleanup_options_file(matches[1]) 279 | reasoning = matches[0] + matches[2] 280 | 281 | return clean_options_file, reasoning, "" 282 | 283 | switch = { 284 | 1: case_1, 285 | 2: case_2, 286 | 3: case_3, 287 | } 288 | func = switch.get(case) 289 | if func: 290 | return func(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version) 291 | else: 292 | raise ValueError(f"No function defined for case {case}") --------------------------------------------------------------------------------