├── utils
├── filter.py
├── parse.py
├── constants.py
├── system_operations
│ ├── fio_runner.py
│ └── get_sys_info.py
├── graph.py
└── utils.py
├── requirements.txt
├── docker
├── Dockerfile
└── docker_runner.py
├── gpt
├── gpt_request.py
└── prompts_generator.py
├── README.md
├── options_files
├── default_options_files
│ ├── initial_options_file.ini
│ ├── dbbench_default_options-7.10.2.ini
│ ├── bad_options.ini
│ ├── good_options.ini
│ ├── rocksdb_default_options.ini
│ └── dbbench_default_options-8.8.1.ini
└── ops_options_file.py
├── main.py
└── rocksdb
├── parse_db_bench_output.py
└── subprocess_manager.py
/utils/filter.py:
--------------------------------------------------------------------------------
1 | def key_filter(key):
2 | if (key == 'wal_size_limit_mb'):
3 | key = 'WAL_size_limit_MB'
4 | if (key == 'wal_ttl_seconds'):
5 | key = 'WAL_ttl_seconds'
6 | return key
7 |
8 | # Options that should not be changed
9 | BLACKLIST = ['use_direct_io_for_flush_and_compaction',
10 | 'use_direct_reads', 'compression_type']
11 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types==0.6.0
2 | anyio==4.1.0
3 | black==23.11.0
4 | certifi==2023.11.17
5 | click==8.1.7
6 | distro==1.8.0
7 | exceptiongroup==1.2.0
8 | h11==0.14.0
9 | httpcore==1.0.2
10 | httpx==0.25.2
11 | idna==3.6
12 | mypy-extensions==1.0.0
13 | openai==1.3.8
14 | packaging==23.2
15 | pathspec==0.12.0
16 | platformdirs==4.1.0
17 | psutil==5.9.6
18 | py-cpuinfo==9.0.0
19 | pydantic==2.5.2
20 | pydantic_core==2.14.5
21 | python-dotenv==1.0.0
22 | sniffio==1.3.0
23 | tomli==2.0.1
24 | tqdm==4.66.1
25 | typing_extensions==4.8.0
26 | matplotlib==3.7.4
27 | deepdiff==6.7.1
28 | cgroup-monitor==0.1.2
29 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 |
3 | ARG TARGETPLATFORM
4 | ARG DEBIAN_FRONTEND=noninteractive
5 |
6 | # Install dependencies
7 | RUN apt-get update && apt-get install -y \
8 | build-essential \
9 | libgflags-dev \
10 | libsnappy-dev \
11 | zlib1g-dev \
12 | libbz2-dev \
13 | liblz4-dev \
14 | libzstd-dev \
15 | cmake \
16 | git \
17 | python3 \
18 | python3-pip \
19 | wget \
20 | fio \
21 | libjemalloc2
22 |
23 | # Setup RocksDB
24 | RUN wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz && \
25 | tar -xzf v8.8.1.tar.gz && \
26 | cd rocksdb-8.8.1 && \
27 | make -j48 static_lib db_bench
28 |
29 | # Setup GPT Repo's requirements
30 | ADD gpt-assisted-rocksdb-config/requirements.txt /requirements.txt
31 |
32 | # Setup Python requirments
33 | RUN pip3 install -r requirements.txt
34 |
35 | # Setup GPT Repo
36 | ADD gpt-assisted-rocksdb-config /gpt-assisted-rocksdb-config
37 |
38 | WORKDIR /gpt-assisted-rocksdb-config
39 | CMD ["python3", "main.py"]
40 |
--------------------------------------------------------------------------------
/utils/parse.py:
--------------------------------------------------------------------------------
1 |
2 | import configparser
3 | from utils.filter import key_filter
4 |
5 | def dict_to_configparser(dictionary):
6 | '''
7 | Function to convert a dictionary to a configparser object
8 |
9 | Parameters:
10 | - dictionary (dict): The dictionary to be converted
11 |
12 | Returns:
13 | - config (configparser.ConfigParser): The configparser object
14 | '''
15 | config = configparser.ConfigParser()
16 |
17 | for section, options in dictionary.items():
18 | config[section] = {}
19 | for key, value in options.items():
20 | config[section][key] = value
21 |
22 | return config
23 |
24 | def configparser_to_string(config_parser):
25 | '''
26 | Function to convert a configparser object to a string
27 |
28 | Parameters:
29 | - config_parser (configparser.ConfigParser): The configparser object
30 |
31 | Returns:
32 | - string_representation (str): The string representation of the configparser object
33 | '''
34 | string_representation = ''
35 | for section in config_parser.sections():
36 | string_representation += f"[{section}]\n"
37 | for key, value in config_parser[section].items():
38 | key = key_filter(key)
39 | string_representation += f" {key}={value}\n"
40 | string_representation += '\n'
41 | return string_representation
--------------------------------------------------------------------------------
/gpt/gpt_request.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from openai import OpenAI
4 |
5 | # Environment variables
6 | client = OpenAI()
7 | client.api_key = os.getenv("OPENAI_API_KEY")
8 |
9 | def request_gpt(system_content, user_contents, temperature):
10 | '''
11 | Function to make an API call to GPT-4
12 |
13 | Parameters:
14 | - system_content: string containing the system information
15 | - chunk_string: string containing the chunk of the options file
16 | - previous_option_files: list of tuples containing the previous option files and their benchmark results
17 | - temperature: Float (0-1) controlling GPT-4's output randomness.
18 | - average_cpu_used: Float indicating average CPU usage (default -1.0).
19 | - average_mem_used: Float indicating average memory usage (default -1.0).
20 | - test_name: String stating the benchmark test.
21 |
22 | Returns:
23 | - matches: string containing the options file generated by GPT-4
24 | '''
25 | messages = [{"role": "system", "content": system_content}]
26 | for content in user_contents:
27 | messages.append({"role": "user", "content": content})
28 |
29 |
30 | # Assuming 'client' is already defined and authenticated for GPT-4 API access
31 | completion = client.chat.completions.create(
32 | model="gpt-4-0125-preview",
33 | messages=messages,
34 | temperature=temperature,
35 | max_tokens=4096,
36 | frequency_penalty=0,
37 | presence_penalty=0,
38 | )
39 |
40 | # Extract the assistant's reply
41 | assistant_reply = completion.choices[0].message.content
42 | matches = re.match("[\s\S]*```([\s\S]*)```([\s\S]*)", assistant_reply)
43 |
44 | # Check if result is good
45 | if matches is not None:
46 | return matches
47 |
48 | # Invalid response
49 | with open("invalid_assistant_reply.txt", "a") as file:
50 | file.write(assistant_reply + "\n\n" + "-" * 150 + "\n\n")
51 | return None
52 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ELMo-Tune ([HotStorage'24 Best Paper] Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?)
2 |
3 | 🏆HotStorage'24 Best Paper - Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?
4 | Paper URL: [https://doi.org/10.1145/3655038.3665954](https://doi.org/10.1145/3655038.3665954)
5 |
6 | ## Features
7 | This project will run a series of tests using the db_bench tool. The tests will be run using the default configuration and a series of configurations that will be determined by the research. The results of the tests will be compared to determine the best configuration for RocksDB when using ELMo-Tune.
8 |
9 | ## Prerequisites
10 | This project requires Python 3.6 or higher. The following dependencies are required:
11 | ```bash
12 | # Instructions for Ubuntu 20.04
13 | # Install dependencies
14 | apt-get update && apt-get install -y build-essential libgflags-dev libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev git python3 python3-pip wget fio
15 |
16 | # Install and Build RocksDB 8.8.1
17 | wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz
18 | tar -xzf v8.8.1.tar.gz
19 | cd rocksdb-8.8.1
20 | make -j static_lib db_bench
21 |
22 | git clone https://github.com/asu-idi/ELMo-Tune
23 | cd ELMo-Tune
24 |
25 | # Install requirements
26 | pip install -r requirements.txt
27 | ```
28 |
29 | ## Setup
30 | To run the tests sucessfully, some variables need to be defined.
31 | ```bash
32 | # You need OpenAI's API to run the code sucessfully.
33 | export OPENAI_API_KEY=
34 | ```
35 | Additionally, set the DB_BENCH_PATH in utils/constants.py along with any other paths required for your system setup.
36 |
37 | ## How to use
38 | To run the tests, run the following command:
39 | ```bash
40 | # e.g. Run a random write (fillrandom) test with the db stored in the '/data/gpt_project/db' folder and with output in the './output' directory
41 | python3 main.py --workload=fillrandom --device=data --output=./output --num_entries=10000
42 |
43 | # You can explore the options using the --help command (or using the constants.py file)
44 | # -c --case CASE Specify the case number
45 | # -d --device DEVICE Specify the device
46 | # -t --workload WORKLOAD Specify the test name
47 | # -v --version VERSION Specify the version of RocksDB
48 | # -o --output OUTPUT Specify the output path
49 | # -n --num_entries NUM_ENTRIES Specify the number of entries
50 | # -s --side_checker SIDE_CHECKER Specify if side checker is enabled
51 | ```
52 |
53 | > You can alternatively also use the Docker environment that can be built using the Dockerfile in the docker folder.
54 |
--------------------------------------------------------------------------------
/utils/constants.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 | import argparse
4 | from datetime import datetime
5 |
6 | load_dotenv()
7 |
8 | def path_of_output_folder():
9 | '''
10 | Set the output folder directory
11 |
12 | Parameters:
13 | - None
14 |
15 | Returns:
16 | - output_folder_dir (str): The output folder directory
17 | '''
18 | current_datetime = datetime.now()
19 | date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
20 | output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}"
21 |
22 | os.makedirs(output_folder_dir, exist_ok=True)
23 | print(f"[UTL] Using output folder: {output_folder_dir}")
24 |
25 | return output_folder_dir
26 |
27 | # Check the environement variables, set to default if not found
28 | env_DEVICE = os.getenv("DEVICE", None)
29 | env_TEST_NAME = os.getenv("TEST_NAME", None)
30 | env_CASE_NUMBER = os.getenv("CASE_NUMBER", 1)
31 | env_VERSION = os.getenv("VERSION", "8.8.1")
32 | env_OUTPUT_PATH = os.getenv("OUTPUT_PATH", None)
33 | env_NUM_ENTRIES = os.getenv("NUM_ENTRIES", 3000000000)
34 | env_SIDE_CHECKER = os.getenv("SIDE_CHECKER", True)
35 |
36 | # Parse the arguments. They replace the environment variables if they are set
37 | parser = argparse.ArgumentParser(description='Description of your script')
38 | parser.add_argument('-c', '--case', type=int, default=env_CASE_NUMBER, help='Specify the case number')
39 | parser.add_argument('-d', '--device', type=str, default=env_DEVICE, help='Specify the device')
40 | parser.add_argument('-t', '--workload', type=str, default=env_TEST_NAME, help='Specify the test name')
41 | parser.add_argument('-v', '--version', type=str, default=env_VERSION, help='Specify the version of RocksDB')
42 | parser.add_argument('-o', '--output', type=str, default=env_OUTPUT_PATH, help='Specify the output path')
43 | parser.add_argument('-n', '--num_entries', type=int, default=env_NUM_ENTRIES, help='Specify the number of entries')
44 | parser.add_argument('-s', '--side_checker', type=bool, default=env_SIDE_CHECKER, help='Specify if side checker is enabled')
45 |
46 | args = parser.parse_args()
47 | CASE_NUMBER = args.case
48 | DEVICE = args.device
49 | TEST_NAME = args.workload
50 | VERSION = args.version
51 | OUTPUT_PATH = args.output if args.output else path_of_output_folder()
52 | NUM_ENTRIES = args.num_entries
53 | SIDE_CHECKER = args.side_checker
54 |
55 | # Constants
56 | # DB_BENCH_PATH = f"/data/gpt_project/rocksdb-{VERSION}/db_bench"
57 | DB_BENCH_PATH = f"/rocksdb-{VERSION}/db_bench"
58 | DB_PATH = f"/{DEVICE}/gpt_project/db"
59 | FIO_RESULT_PATH = f"/data/gpt_project/gpt-assisted-rocksdb-config/data/fio/fio_output_{DEVICE}.txt"
60 | DEFAULT_OPTION_FILE_DIR = "options_files/default_options_files"
61 | INITIAL_OPTIONS_FILE_NAME = f"dbbench_default_options-{VERSION}.ini"
62 | OPTIONS_FILE_DIR = f"{OUTPUT_PATH}/options_file.ini"
63 |
--------------------------------------------------------------------------------
/docker/docker_runner.py:
--------------------------------------------------------------------------------
1 | import docker
2 | import os
3 |
4 | client = docker.from_env()
5 |
6 | def main():
7 | '''
8 | Main function to run multiple docker containers one after the other. All containers mount a volume to the host machine.
9 | Additionally, before mounting, the environment variables are updated to reflect the current iteration number and the status
10 | of the for loop which is controlling the memory and cpus.
11 | '''
12 |
13 | cpu_list = [2, 4]
14 | memory_list = [4, 8]
15 | devices = ["nvme", "data"]
16 | tests = ["fillrandom", "readrandom", "readrandomwriterandom"]
17 | base_output_path = f"/data/gpt_project/gpt-assisted-rocksdb-config/output/output"
18 | base_db_path = f"gpt_project/dbr"
19 |
20 | for memory_cap in memory_list:
21 | for cpu_cap in cpu_list:
22 | for test in tests:
23 | print("-" * 50)
24 | print(f"Running Iteration for CPU: {cpu_cap} Memory: {memory_cap} on /{devices[0]} and /{devices[1]} for {test}")
25 |
26 | # Run docker container with mount and environment variables as in cpu and memory
27 | container = client.containers.run(
28 | "gptproject:latest",
29 | detach=True,
30 | name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[0]}_{test}",
31 | environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}",
32 | f"OUTPUT_PATH={base_output_path}_{devices[0]}/c{cpu_cap}_m{memory_cap}_{test}",
33 | f"DEVICE={devices[0]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[0]}/{base_db_path}/{cpu_cap}_{test}"],
34 | cpu_count=cpu_cap,
35 | mem_limit=f"{memory_cap}g",
36 | volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'},
37 | "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}}
38 | )
39 |
40 | # Run docker container with mount and environment variables as in cpu and memory
41 | container2 = client.containers.run(
42 | "gptproject:latest",
43 | detach=True,
44 | name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[1]}_{test}",
45 | environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}",
46 | f"OUTPUT_PATH={base_output_path}_{devices[1]}/c{cpu_cap}_m{memory_cap}_{test}",
47 | f"DEVICE={devices[1]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[1]}/{base_db_path}/{cpu_cap}_{test}"],
48 | cpu_count=cpu_cap,
49 | mem_limit=f"{memory_cap}g",
50 | volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'},
51 | "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}}
52 | )
53 |
54 | # Wait for the container to finish
55 | container.wait()
56 | container2.wait()
57 |
58 | if __name__ == "__main__":
59 | main()
--------------------------------------------------------------------------------
/utils/system_operations/fio_runner.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import re
3 | import os
4 |
5 |
6 | def fio_run(test_type, file_path):
7 | '''
8 | Function to run fio benchmark
9 |
10 | Parameters:
11 | - test_type: string containing the type of test to run
12 |
13 | Returns:
14 | - parsed_res: string containing the parsed result of the fio test
15 | '''
16 | command = [
17 | "fio",
18 | "--name=test",
19 | "--ioengine=posixaio",
20 | f"--rw={test_type}",
21 | "--bs=4k",
22 | "--numjobs=1",
23 | "--size=10G",
24 | "--runtime=60",
25 | "--time_based"
26 | ]
27 |
28 | print("[FIO] running fio test now", test_type + "\n")
29 | proc = subprocess.run(
30 | command,
31 | stdout=subprocess.PIPE,
32 | stderr=subprocess.STDOUT,
33 | )
34 |
35 | output = proc.stdout.decode()
36 | print("[FIO] output :", output)
37 |
38 | parsed_res = parse_fio_output(output, test_type)
39 |
40 | with open(file_path, "a") as file:
41 | file.write(parsed_res + '\n')
42 |
43 | return parsed_res
44 |
45 |
46 | def get_fio_result(file_path):
47 | '''
48 | Function to get the fio result
49 |
50 | Parameters:
51 | - file_path: string containing the path to the fio result file
52 |
53 | Returns:
54 | - content: string containing the content of the fio result
55 | '''
56 | if (os.path.exists(file_path) and os.path.getsize(file_path) != 0):
57 | print("[FIO] File exists and is not empty. Reading file.")
58 | with open(file_path, 'r') as file:
59 | content = file.read()
60 | return content
61 |
62 | # List of test types
63 | test_types = ["randwrite", "randread", "read", "write"]
64 | for test_type in test_types:
65 | fio_result = fio_run(test_type, file_path)
66 | combined_result = '\n'.join(fio_result)
67 |
68 | print(f"[FIO] result : \n {combined_result}")
69 | delete_test_file()
70 | return combined_result
71 |
72 |
73 | def parse_fio_output(fio_result, test_type):
74 | '''
75 | Function to parse the fio output
76 |
77 | Parameters:
78 | - fio_result: string containing the fio result
79 | - test_type: string containing the type of test to run
80 |
81 | Returns:
82 | - result_string: string containing the parsed result of the fio test
83 | '''
84 | if test_type in ["randwrite", "write"]:
85 | pattern = re.compile(r'WRITE: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)')
86 | elif test_type in ["randread", "read"]:
87 | pattern = re.compile(r'READ: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)')
88 | else:
89 | print(f"[FIO] Unsupported test type: {test_type}")
90 |
91 | match = pattern.search(fio_result)
92 | if match:
93 | values_list = [match.group(1), match.group(2)]
94 | result_string = f"{test_type} bandwidth is {values_list[0]} ({values_list[1]})"
95 | print(f"[FIO] result string : {result_string}")
96 | else:
97 | print("[FIO] Pattern not found in the fio result.")
98 |
99 | return result_string
100 |
101 |
102 | def delete_test_file():
103 | '''
104 | Function to delete the test file
105 | '''
106 | proc = subprocess.run(
107 | f'rm test.0.0',
108 | stdout=subprocess.PIPE,
109 | stderr=subprocess.STDOUT,
110 | shell=True
111 | )
112 |
--------------------------------------------------------------------------------
/options_files/default_options_files/initial_options_file.ini:
--------------------------------------------------------------------------------
1 | [Version]
2 | rocksdb_version=4.3.0
3 | options_file_version=1.1
4 |
5 | [DBOptions]
6 | stats_dump_period_sec=600
7 | max_manifest_file_size=18446744073709551615
8 | bytes_per_sync=8388608
9 | delayed_write_rate=2097152
10 | WAL_ttl_seconds=0
11 | WAL_size_limit_MB=0
12 | max_subcompactions=1
13 | wal_bytes_per_sync=0
14 | db_write_buffer_size=0
15 | keep_log_file_num=1000
16 | table_cache_numshardbits=4
17 | max_file_opening_threads=1
18 | writable_file_max_buffer_size=1048576
19 | random_access_max_buffer_size=1048576
20 | use_fsync=false
21 | max_total_wal_size=0
22 | max_open_files=-1
23 | skip_stats_update_on_db_open=false
24 | max_background_compactions=16
25 | manifest_preallocation_size=4194304
26 | max_background_flushes=7
27 | is_fd_close_on_exec=true
28 | max_log_file_size=0
29 | advise_random_on_open=true
30 | create_missing_column_families=false
31 | paranoid_checks=true
32 | delete_obsolete_files_period_micros=21600000000
33 | log_file_time_to_roll=0
34 | compaction_readahead_size=0
35 | create_if_missing=false
36 | use_adaptive_mutex=false
37 | enable_thread_tracking=false
38 | allow_fallocate=true
39 | error_if_exists=false
40 | recycle_log_file_num=0
41 | skip_log_error_on_recovery=false
42 | new_table_reader_for_compaction_inputs=true
43 | allow_mmap_reads=false
44 | allow_mmap_writes=false
45 | use_direct_reads=false
46 | use_direct_writes=false
47 |
48 |
49 | [CFOptions "default"]
50 | compaction_style=kCompactionStyleLevel
51 | compaction_filter=nullptr
52 | num_levels=6
53 | table_factory=BlockBasedTable
54 | comparator=leveldb.BytewiseComparator
55 | max_sequential_skip_in_iterations=8
56 | max_bytes_for_level_base=1073741824
57 | memtable_prefix_bloom_probes=6
58 | memtable_prefix_bloom_bits=0
59 | memtable_prefix_bloom_huge_page_tlb_size=0
60 | max_successive_merges=0
61 | arena_block_size=16777216
62 | min_write_buffer_number_to_merge=1
63 | target_file_size_multiplier=1
64 | source_compaction_factor=1
65 | max_bytes_for_level_multiplier=8
66 | max_bytes_for_level_multiplier_additional=2:3:5
67 | compaction_filter_factory=nullptr
68 | max_write_buffer_number=8
69 | level0_stop_writes_trigger=20
70 | compression=kSnappyCompression
71 | level0_file_num_compaction_trigger=4
72 | purge_redundant_kvs_while_flush=true
73 | max_write_buffer_size_to_maintain=0
74 | memtable_factory=SkipListFactory
75 | max_grandparent_overlap_factor=8
76 | expanded_compaction_factor=25
77 | hard_pending_compaction_bytes_limit=137438953472
78 | inplace_update_num_locks=10000
79 | level_compaction_dynamic_level_bytes=true
80 | level0_slowdown_writes_trigger=12
81 | filter_deletes=false
82 | verify_checksums_in_compaction=true
83 | min_partial_merge_operands=2
84 | paranoid_file_checks=false
85 | target_file_size_base=134217728
86 | optimize_filters_for_hits=false
87 | merge_operator=PutOperator
88 | compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression
89 | compaction_measure_io_stats=false
90 | prefix_extractor=nullptr
91 | bloom_locality=0
92 | write_buffer_size=134217728
93 | disable_auto_compactions=false
94 | inplace_update_support=false
95 |
96 | [TableOptions/BlockBasedTable "default"]
97 | format_version=2
98 | whole_key_filtering=true
99 | no_block_cache=false
100 | checksum=kCRC32c
101 | filter_policy=rocksdb.BuiltinBloomFilter
102 | block_size_deviation=10
103 | block_size=8192
104 | block_restart_interval=16
105 | cache_index_and_filter_blocks=false
106 | pin_l0_filter_and_index_blocks_in_cache=false
107 | pin_top_level_index_and_filter=false
108 | index_type=kBinarySearch
109 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
--------------------------------------------------------------------------------
/options_files/ops_options_file.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import configparser
4 | from utils.constants import DEFAULT_OPTION_FILE_DIR, INITIAL_OPTIONS_FILE_NAME, OPTIONS_FILE_DIR
5 | from utils.filter import BLACKLIST
6 | from utils.parse import dict_to_configparser, configparser_to_string
7 |
8 | def parse_gpt_text_to_dict(gpt_output_text):
9 | '''
10 | Function to parse the gpt output text with filters
11 |
12 | Parameters:
13 | - gpt_output_text (str): The output generated by gpt
14 |
15 | Returns:
16 | - options_dict (dict): A dictionary containing the parsed data
17 | '''
18 | options_dict = {}
19 |
20 | for line in gpt_output_text.split("\n"):
21 | # Ignore lines starting with '#' as they are comments
22 | if not line.startswith('#'):
23 | # Split the line at the first '=' and strip whitespace
24 | parts = line.split(':', 1)
25 | if len(parts) == 1:
26 | parts = line.split('=', 1)
27 | if len(parts) == 2:
28 | # filters options that start with { - k
29 | if '{' not in parts[1].strip():
30 | # filters options that are in the blacklist
31 | if parts[0].strip() not in BLACKLIST:
32 | key, value = parts[0].strip(), parts[1].strip()
33 | options_dict[key] = value
34 |
35 | return options_dict
36 |
37 | def cleanup_options_file(gpt_options_text):
38 | """
39 | Function to clean up the options file generated by GPT
40 | - replace the values of the options in the original options file with the values generated by GPT-4
41 | eliminate 2 secnarios:
42 | 1. ```ini```
43 | 2. ```...``` w/ multiple code blocks
44 |
45 | Parameters:
46 | - gpt_options_text: string containing the options file generated by GPT-4
47 |
48 | Returns:
49 | - config_string: string containing the options file in the original format
50 | """
51 | clean_output_dict = parse_option_file_to_dict(open(f"{OPTIONS_FILE_DIR}").read())
52 |
53 | # Parse the GPT-4 generated options
54 | gpt_output_dict = parse_gpt_text_to_dict(gpt_options_text)
55 |
56 | # Update the original options with GPT-4 generated value
57 | for key, value in gpt_output_dict.items():
58 | for internal_dict in clean_output_dict:
59 | if key in clean_output_dict[internal_dict]:
60 | clean_output_dict[internal_dict][key] = gpt_output_dict[key]
61 |
62 | # Convert dictionary to configparser
63 | config_parser = dict_to_configparser(clean_output_dict)
64 | config_string = configparser_to_string(config_parser)
65 |
66 | # Save to a file
67 | with open(f"{OPTIONS_FILE_DIR}", "w") as file:
68 | file.write(config_string)
69 | return config_string
70 |
71 | def get_initial_options_file():
72 | '''
73 | Get the initial options file
74 |
75 | Parameters:
76 | - None
77 |
78 | Returns:
79 | - options (str): The initial options file
80 | - reasoning (str): The reasoning behind the options file
81 | '''
82 | initial_options_file_path = os.path.join(DEFAULT_OPTION_FILE_DIR,
83 | INITIAL_OPTIONS_FILE_NAME)
84 | with open(initial_options_file_path, "r") as f:
85 | options = f.read()
86 |
87 | reasoning = f"Initial options file: {initial_options_file_path}"
88 |
89 | return options, reasoning
90 |
91 |
92 | def parse_option_file_to_dict(option_file):
93 | '''
94 | Function to parse the given option file to a dictionary
95 |
96 | Parameters:
97 | - option_file (str): The path to the option file
98 |
99 | Returns:
100 | - parsed (dict): A dictionary containing the parsed data
101 | '''
102 | pat = re.compile("(.*)\s*([#].*)?")
103 | config = configparser.ConfigParser()
104 | config.read_string(option_file)
105 | parsed = {section: dict(config.items(section))
106 | for section in config.sections()}
107 | for section_name, section in parsed.items():
108 | for k, v in section.items():
109 | m = pat.match(v)
110 | section[k] = m[1]
111 | return parsed
112 |
113 |
114 |
--------------------------------------------------------------------------------
/utils/graph.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | def plot(values, title, file):
4 | '''
5 | Plots a single line graph based on a list of values.
6 |
7 | This function plots a simple line graph where the X-axis represents the index of each value in the list, and the Y-axis represents the value itself.
8 |
9 | Parameters:
10 | values (list): A list of numerical values to be plotted.
11 | title (str): The title of the plot.
12 | file (str): The file path where the plot image will be saved.
13 |
14 | Returns:
15 | - None. The plot is saved to the specified file path.
16 |
17 | '''
18 | # Plotting
19 | plt.figure(figsize=(12, 6))
20 | plt.plot(range(len(values)), values, label=title, linestyle='-')
21 |
22 | plt.title(title)
23 | plt.legend()
24 | plt.grid(True)
25 |
26 | plt.ylim(0, 400000)
27 |
28 | # Save the plot to a file
29 | plt.savefig(file)
30 |
31 |
32 | def plot_2axis(keys, values, title, file):
33 | '''
34 | Plots a line graph with specified keys and values.
35 |
36 | This function is designed to plot a line graph where the X-axis is determined by the 'keys' parameter and the Y-axis by the 'values' parameter.
37 |
38 | Parameters:
39 | keys (list): A list of keys or indices for the X-axis.
40 | values (list): A list of numerical values for the Y-axis.
41 | title (str): The title of the plot.
42 | file (str): The file path where the plot image will be saved.
43 |
44 | Returns:
45 | - None. The plot is saved to the specified file path.
46 | '''
47 | # Plotting
48 | plt.figure(figsize=(12, 6))
49 | plt.plot(keys, values, label=title, linestyle='-')
50 |
51 | plt.title(title)
52 | plt.legend()
53 | plt.grid(True)
54 |
55 | plt.ylim(0, 400000)
56 |
57 | # Save the plot to a file
58 | plt.savefig(file)
59 |
60 |
61 | def plot_multiple(data, title, file):
62 | '''
63 | Plots multiple line graphs from a list of data sets.
64 |
65 | This function is used to plot multiple line graphs on the same plot. Each item in the 'data' list represents a different line on the graph.
66 |
67 | Parameters:
68 | data (list of tuples): Each tuple contains two elements - a list of keys for the X-axis and a list of values for the Y-axis.
69 | title (str): The title of the plot.
70 | file (str): The file path where the plot image will be saved.
71 |
72 | Each line is labeled as 'Iteration-i' where i is the index of the data set in the 'data' list.
73 |
74 | Returns:
75 | - None. The plot is saved to the specified file path.
76 |
77 | '''
78 |
79 | # Plotting setup
80 | plt.figure(figsize=(12, 6))
81 | for i, iteration in enumerate(data):
82 | keys, values = iteration[1]["ops_per_second_graph"]
83 | plt.plot(keys, values, label=f"Iteration-{i}", linestyle='-')
84 |
85 | plt.title(title)
86 | plt.legend()
87 | plt.grid(True)
88 |
89 | plt.ylim(0, 400000)
90 |
91 | # Save the plot to a file
92 | plt.savefig(file)
93 |
94 | def plot_multiple_manual(data, file):
95 | # Plotting
96 | plt.figure(figsize=(16.5, 8))
97 | # labels = ["Default file", "Iteration 3", "Iteration 3", "Iteration 7"]
98 | labels = ["Default file", "Iteration 2", "Iteration 4", "Iteration 6"]
99 | colors = ['red', 'orange', 'royalblue', 'green']
100 | for i, ops in enumerate(data):
101 | plt.plot(ops, label=f"{labels[i]}", linestyle='-',color=colors[i])
102 | plt.xlabel("Time (seconds)")
103 | plt.ylabel("Throughput (kops/s)")
104 | plt.legend()
105 |
106 |
107 | plt.ylim(0, 400)
108 | plt.tight_layout()
109 |
110 | # Save the plot to a file
111 | plt.savefig(file)
112 |
113 |
114 | # pattern = r"\((\d+),(\d+)\) ops and \((\d+\.\d+),(\d+\.\d+)\) ops/second in \((\d+\.\d+),(\d+\.\d+)\) seconds"
115 |
116 | # folder_path = "/data/gpt_project/gpt-assisted-rocksdb-config/saved_output/fillrandom/output_nvme_v2/c4_m4"
117 | # file_names = ['0.ini', '2.ini', '4.ini', '6.ini']
118 | # pattern = r'"ops_per_second_graph": \[\[([\d.,\s]+)\],\s+\[([\d.,\s]+)\]\]'
119 |
120 | # data = []
121 |
122 | # for file_name in file_names:
123 | # file_path = os.path.join(folder_path, file_name)
124 | # with open(file_path, 'r') as f:
125 | # file_contents = f.read()
126 | # matches = re.findall(pattern, file_contents)
127 | # ops = [float(x)/1000 for x in matches[0][1].split(', ')]
128 | # data.append(ops)
129 |
130 | # plot_multiple_manual(data, "Ops_per_Second_combined.png")
131 |
132 |
--------------------------------------------------------------------------------
/utils/system_operations/get_sys_info.py:
--------------------------------------------------------------------------------
1 | import os
2 | import psutil
3 | import subprocess
4 | import platform
5 | from cpuinfo import get_cpu_info
6 | from cgroup_monitor import CGroupMonitor
7 |
8 | def get_system_data(db_path):
9 | '''
10 | Function to get the system data
11 |
12 | Parameters:
13 | - db_path (str): The path of database
14 |
15 | Returns:
16 | - brand_raw_value (str): The CPU model name
17 | - memory_total (int): The total memory
18 | - swap (int): The swap memory
19 | - total_disk_size (int): The total disk size
20 | - device (str): The device name
21 | '''
22 | cgroup_monitor = CGroupMonitor()
23 | try:
24 | cpu_count = os.getenv("CPU_COUNT", str(cgroup_monitor.get_cpu_limit()))
25 | mem_max = os.getenv("MEMORY_MAX", str(cgroup_monitor.get_memory_limit()))
26 |
27 | # gets the CPU op-modes
28 | system_info = platform.uname()
29 | cpu_op_modes = system_info.processor
30 |
31 | # gets the CPU model name
32 | cpu_model = platform.processor()
33 |
34 | # get all the CPU cache sizes
35 | cpu_info = get_cpu_info()
36 | brand_raw_value = cpu_count + " cores of " + cpu_info['brand_raw']
37 |
38 | l1_data_cache_size = cpu_info.get('l1_data_cache_size', 'N/A')
39 | l1_instruction_cache_size = cpu_info.get(
40 | 'l1_instruction_cache_size', 'N/A')
41 | l2_cache_size = cpu_info.get('l2_cache_size', 'N/A')
42 | l3_cache_size = cpu_info.get('l3_cache_size', 'N/A')
43 |
44 | # get the total memory
45 | # memory_total = psutil.virtual_memory().total
46 | memory_total = float(mem_max)
47 |
48 | # gets the percentage of RAM used
49 | memory_used = psutil.virtual_memory().percent
50 |
51 | # gets the percentage of RAM available
52 | memeory_remaining = psutil.virtual_memory().available * 100 / \
53 | psutil.virtual_memory().total
54 |
55 | # gets the disk information
56 | # partitions = psutil.disk_partitions(all=True)
57 |
58 | swap = psutil.swap_memory()
59 |
60 | partitions = psutil.disk_partitions(all=False)
61 | path = os.path.dirname(db_path)
62 | total_disk_size = -1
63 | device = ""
64 | all_devices = check_drive_type()
65 | data_directory = path[:5]
66 | for partition in partitions:
67 | usage = psutil.disk_usage(partition.mountpoint)
68 | if (partition.mountpoint[:5] == data_directory):
69 | total_disk_size = usage.total
70 | if (partition.device.split('/')[-1] in all_devices):
71 | device = all_devices[partition.device.split('/')[-1]]
72 | elif (partition.device.split('/')[-1][:-1] in all_devices):
73 | device = all_devices[partition.device.split('/')[-1][:-1]]
74 |
75 | # returns all the system data required
76 | return brand_raw_value, memory_total, swap, total_disk_size, device
77 |
78 | except Exception as e:
79 | print(f"[SYS] Error in fetching system data: {e}")
80 | return None
81 |
82 | # Check drive type
83 | def check_drive_type():
84 | '''
85 | Function to check the drive type
86 |
87 | Returns:
88 | - drive_types (dict): A dictionary containing the drive types
89 | '''
90 | # Path where the drive information is stored
91 | sys_block_path = "/sys/block"
92 | # Check if the path exists
93 | if os.path.exists(sys_block_path):
94 | # List of all devices
95 | devices = os.listdir(sys_block_path)
96 | drive_types = {}
97 | # Iterate through each device
98 | for device in devices:
99 | try:
100 | with open(f"{sys_block_path}/{device}/queue/rotational", "r") as file:
101 | rotational = file.read().strip()
102 | if rotational == "0":
103 | drive_types[device] = "SSD"
104 | else:
105 | drive_types[device] = "HDD"
106 | except IOError:
107 | # Unable to read the rotational file for this device
108 | pass
109 | return drive_types
110 | else:
111 | return "System block path does not exist."
112 |
113 | def system_info(db_path, fio_result):
114 | '''
115 | Fetch system data for further runs
116 |
117 | Parameters:
118 | - db_path (str): The path of database
119 | - fio_result (str): The result of fio benchmark
120 | '''
121 | system_data = get_system_data(db_path)
122 | data = (f"{system_data[0]} with {system_data[1]}GiB of Memory and {system_data[1]}GiB of Swap space."
123 | f"{system_data[4]} size : {system_data[3]/(1024 ** 4):.2f}T. A single instance of RocksDB is the always going to be the only process running. "
124 | f"{fio_result}")
125 | return data
126 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import getpass
4 | from datetime import datetime
5 | from collections import defaultdict
6 | from deepdiff import DeepDiff
7 | from utils.constants import OUTPUT_PATH, DEVICE, DB_PATH
8 |
9 | # LOG UTILS
10 | def log_update(update_string):
11 | '''
12 | Update the log file with the given string
13 |
14 | Parameters:
15 | - update_string (str): The string to be updated in the log file
16 |
17 | Returns:
18 | - None
19 | '''
20 | current_datetime = datetime.now()
21 | date_time_string = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
22 | update_string = f"[{date_time_string}] {update_string}"
23 |
24 | if OUTPUT_PATH is None:
25 | with open(f"log.txt", "a+") as f:
26 | f.write(update_string + "\n")
27 | else:
28 | with open(f"{OUTPUT_PATH}/log.txt", "a+") as f:
29 | f.write(update_string + "\n")
30 |
31 | # STORE FILE UTILS
32 | def store_db_bench_output(output_folder_name, output_file_name,
33 | benchmark_results, options_file, reasoning):
34 | '''
35 | Store the output of db_bench in a file
36 |
37 | Parameters:
38 | - output_folder_name (str): Name of the folder to store the output file
39 | - output_file_name (str): Name of the output file
40 | - benchmark_results (dict): Dictionary containing the benchmark results
41 | - options_file (str): The options file used to generate the benchmark results
42 | - reasoning (str): The reasoning behind the options file
43 |
44 | Returns:
45 | - None
46 | '''
47 | with open(f"{output_folder_name}/{output_file_name}", "a+") as f:
48 | f.write("# " + json.dumps(benchmark_results) + "\n\n")
49 | f.write(options_file + "\n")
50 | for line in reasoning.splitlines():
51 | f.write("# " + line + "\n")
52 |
53 | def store_best_option_file(options_files, output_folder_dir):
54 | '''
55 | Save the best option file
56 |
57 | Parameters:
58 | - options_files (list): List of options files
59 | - output_folder_dir (str): The output directory
60 | '''
61 | best_result = max(options_files, key=lambda x: x[1]["ops_per_sec"])
62 | best_options = best_result[0]
63 | best_reasoning = best_result[2]
64 | with open(f"{output_folder_dir}/best_options.ini", "w") as f:
65 | f.write(best_options)
66 | for line in best_reasoning.splitlines():
67 | f.write("# " + line + "\n")
68 |
69 | def store_diff_options_list(options_list, output_folder_dir):
70 | # Calculate differences between options_list
71 | differences = calculate_differences(options_list)
72 | changed_fields_frequency = defaultdict(lambda: 0)
73 |
74 | with open(f"{output_folder_dir}/diffOptions.txt", 'w') as f:
75 | for i, diff in enumerate(differences, start=1):
76 | f.write(f"[MFN] Differences between iteration {i} and iteration {i + 1}: \n")
77 | f.write(json.dumps(diff, indent=4))
78 | f.write("\n")
79 | f.write("=" * 50)
80 | f.write("\n\n")
81 |
82 | for key in diff["values_changed"]:
83 | changed_fields_frequency[key] += 1
84 |
85 | f.write("\n\n[MFN] Changed Fields Frequency:\n")
86 | f.write(json.dumps(changed_fields_frequency, indent=4))
87 |
88 | # PATH UTILS
89 | def path_of_db():
90 | '''
91 | Choose the database path
92 |
93 | Parameters:
94 | - None
95 |
96 | Returns:
97 | - db_path (str): The path of the database
98 | '''
99 | user_name = getpass.getuser()
100 | db_path_name = DB_PATH + user_name[0].lower()
101 | db_path = os.getenv("DB_PATH", db_path_name)
102 | # log_update(f"[UTL] Using database path: {db_path}")
103 | print(f"[UTL] Using database path: {db_path}")
104 |
105 | return db_path
106 |
107 | def path_of_output_folder():
108 | '''
109 | Set the output folder directory
110 |
111 | Parameters:
112 | - None
113 |
114 | Returns:
115 | - output_folder_dir (str): The output folder directory
116 | '''
117 | current_datetime = datetime.now()
118 | date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
119 | if OUTPUT_PATH is None:
120 | output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}"
121 | else:
122 | output_folder_dir = OUTPUT_PATH
123 |
124 | os.makedirs(output_folder_dir, exist_ok=True)
125 | log_update(f"[UTL] Using output folder: {output_folder_dir}")
126 | print(f"[UTL] Using output folder: {output_folder_dir}")
127 |
128 | return output_folder_dir
129 |
130 | # OTHER UTILS
131 | def calculate_differences(iterations):
132 | '''
133 | Function to calculate the differences between the iterations
134 |
135 | Parameters:
136 | - iterations (list): A list of the iterations
137 |
138 | Returns:
139 | - differences (list): A list of the differences between the iterations
140 | '''
141 | differences = []
142 | for i in range(1, len(iterations)):
143 | diff = DeepDiff(iterations[i-1], iterations[i])
144 | differences.append(diff)
145 | return differences
146 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import utils.constants as constants
2 | from utils.graph import plot, plot_multiple
3 | from utils.system_operations.fio_runner import get_fio_result
4 | from options_files.ops_options_file import parse_option_file_to_dict, get_initial_options_file
5 |
6 | import rocksdb.subprocess_manager as spm
7 | from utils.utils import log_update, store_best_option_file, path_of_db, store_diff_options_list
8 | from utils.system_operations.get_sys_info import system_info
9 | from gpt.prompts_generator import generate_option_file_with_gpt
10 | import os
11 |
12 | def main():
13 | '''
14 | Main function to run the project. This function will run the db_bench with the initial options file and then
15 | generate new options files using GPT API and run db_bench with the new options file. This function will also
16 | store the output of db_bench in a file. The output file will contain the benchmark results, the options file
17 | used to generate the benchmark results and the reasoning behind the options file as provided by the GPT API.
18 | There will be a separate file for each iteration.
19 |
20 | Parameters:
21 | - None
22 |
23 | Returns:
24 | - None
25 | '''
26 |
27 | # initialize variables
28 | options_files = []
29 | options_list = []
30 |
31 | # Set up the path
32 | output_folder_dir = constants.OUTPUT_PATH
33 | os.makedirs(output_folder_dir, exist_ok=True)
34 | db_path = path_of_db()
35 | fio_result = get_fio_result(constants.FIO_RESULT_PATH)
36 |
37 | log_update(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}")
38 | print(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}")
39 |
40 | # First run, Initial options file and see how the results are
41 | options, reasoning = get_initial_options_file()
42 |
43 | is_error, benchmark_results, average_cpu_usage, average_memory_usage, options = spm.benchmark(
44 | db_path, options, output_folder_dir, reasoning, 0, None, options_files)
45 |
46 | if is_error:
47 | # If the initial options file fails, exit the program
48 | log_update("[MFN] Failed to benchmark with the initial options file. Exiting.")
49 | print("[MFN] Failed to benchmark with the initial options file. Exiting.")
50 | exit(1)
51 | else:
52 | # If the initial options file succeeds, store the options file and benchmark results, pass it to the GPT API to generate a new options file
53 | parsed_options = parse_option_file_to_dict(options)
54 | options_list.append(parsed_options)
55 |
56 | # Maintain a list of options files, benchmark results and why that option file was generated
57 | options_files.append((options, benchmark_results, reasoning, ""))
58 |
59 | iteration_count = 7
60 |
61 | for i in range(1, iteration_count + 1):
62 |
63 | log_update(f"[MFN] Starting iteration {i}")
64 | log_update(f"[MFN] Querying ChatGPT for next options file")
65 |
66 | print("-" * 50)
67 | print(f"[MFN] Starting iteration {i}")
68 |
69 | print("[MFN] Querying ChatGPT for next options file")
70 | temperature = 0.4
71 | retry_counter = 5
72 | generated = False
73 |
74 | for gpt_query_count in range(retry_counter, 0, -1):
75 | # Generate new options file with retry limit of 5
76 |
77 | new_options_file, reasoning, summary_of_changes = generate_option_file_with_gpt(
78 | constants.CASE_NUMBER, options_files,
79 | system_info(db_path, fio_result), temperature,
80 | average_cpu_usage, average_memory_usage,
81 | constants.TEST_NAME, constants.VERSION)
82 | if new_options_file is None:
83 | log_update(f"[MFN] Failed to generate options file. Retrying. Retries left: {gpt_query_count - 1}")
84 | print("[MFN] Failed to generate options file. Retrying. Retries left: ", gpt_query_count - 1)
85 | continue
86 |
87 | # Parse output
88 | is_error, benchmark_results, average_cpu_usage, average_memory_usage, new_options_file = spm.benchmark(
89 | db_path, new_options_file, output_folder_dir, reasoning, iteration_count, benchmark_results, options_files)
90 | if is_error:
91 | log_update(f"[MFN] Benchmark failed. Retrying with new options file. Retries left: {gpt_query_count - 1}")
92 | print("[MFN] Benchmark failed. Retrying with new options file. Retries left: ", gpt_query_count - 1)
93 | temperature += 0.1
94 | continue
95 | else:
96 | generated = True
97 | break
98 |
99 | if generated:
100 | options = new_options_file
101 | options_files.append((options, benchmark_results, reasoning,
102 | summary_of_changes))
103 | parsed_options = parse_option_file_to_dict(options)
104 | options_list.append(parsed_options)
105 | else:
106 | log_update("[MFN] Failed to generate options file over 5 times. Exiting.")
107 | print("[MFN] Failed to generate options file over 5 times. Exiting.")
108 | exit(1)
109 |
110 | store_best_option_file(options_files, output_folder_dir)
111 |
112 | # Graph Ops/Sec
113 | plot([e[1]["ops_per_sec"] for e in options_files], "OpsPerSec",
114 | f"{output_folder_dir}/OpsPerSec.png")
115 | plot_multiple(options_files, "Ops Per Second",
116 | f"{output_folder_dir}/opsM_per_sec.png")
117 |
118 | store_diff_options_list(options_list, output_folder_dir)
119 |
120 |
121 |
122 | if __name__ == "__main__":
123 | main()
124 |
--------------------------------------------------------------------------------
/options_files/default_options_files/dbbench_default_options-7.10.2.ini:
--------------------------------------------------------------------------------
1 | # This is a RocksDB option file.
2 | #
3 | # For detailed file format spec, please refer to the example file
4 | # in examples/rocksdb_option_file_example.ini
5 | #
6 |
7 | [Version]
8 | rocksdb_version=7.10.2
9 | options_file_version=1.1
10 |
11 | [DBOptions]
12 | max_open_files=-1
13 | stats_history_buffer_size=1048576
14 | stats_persist_period_sec=600
15 | max_background_flushes=-1
16 | stats_dump_period_sec=600
17 | compaction_readahead_size=0
18 | bytes_per_sync=0
19 | delete_obsolete_files_period_micros=21600000000
20 | max_total_wal_size=0
21 | delayed_write_rate=8388608
22 | wal_bytes_per_sync=0
23 | writable_file_max_buffer_size=1048576
24 | avoid_flush_during_shutdown=false
25 | max_subcompactions=1
26 | strict_bytes_per_sync=false
27 | max_background_compactions=-1
28 | max_background_jobs=2
29 | lowest_used_cache_tier=kNonVolatileBlockTier
30 | bgerror_resume_retry_interval=1000000
31 | max_bgerror_resume_count=2147483647
32 | best_efforts_recovery=false
33 | write_dbid_to_manifest=false
34 | avoid_unnecessary_blocking_io=false
35 | atomic_flush=false
36 | log_readahead_size=0
37 | dump_malloc_stats=true
38 | info_log_level=INFO_LEVEL
39 | write_thread_max_yield_usec=100
40 | max_write_batch_group_size_bytes=1048576
41 | wal_compression=kNoCompression
42 | write_thread_slow_yield_usec=3
43 | enable_pipelined_write=true
44 | persist_stats_to_disk=false
45 | max_manifest_file_size=1073741824
46 | WAL_size_limit_MB=0
47 | fail_if_options_file_error=false
48 | max_log_file_size=0
49 | manifest_preallocation_size=4194304
50 | listeners={ErrorHandlerListener:ErrorHandlerListener}
51 | log_file_time_to_roll=0
52 | allow_data_in_errors=false
53 | WAL_ttl_seconds=0
54 | recycle_log_file_num=0
55 | file_checksum_gen_factory=nullptr
56 | keep_log_file_num=1000
57 | db_write_buffer_size=0
58 | table_cache_numshardbits=4
59 | use_adaptive_mutex=false
60 | allow_ingest_behind=false
61 | skip_checking_sst_file_sizes_on_db_open=false
62 | skip_stats_update_on_db_open=false
63 | random_access_max_buffer_size=1048576
64 | access_hint_on_compaction_start=NORMAL
65 | allow_concurrent_memtable_write=true
66 | track_and_verify_wals_in_manifest=false
67 | paranoid_checks=true
68 | max_file_opening_threads=16
69 | verify_sst_unique_id_in_manifest=true
70 | avoid_flush_during_recovery=false
71 | flush_verify_memtable_count=true
72 | db_host_id=__hostname__
73 | error_if_exists=false
74 | wal_recovery_mode=kPointInTimeRecovery
75 | enable_thread_tracking=false
76 | is_fd_close_on_exec=true
77 | enforce_single_del_contracts=true
78 | create_missing_column_families=true
79 | create_if_missing=true
80 | use_fsync=false
81 | wal_filter=nullptr
82 | allow_2pc=false
83 | use_direct_io_for_flush_and_compaction=false
84 | manual_wal_flush=false
85 | enable_write_thread_adaptive_yield=true
86 | use_direct_reads=false
87 | allow_mmap_writes=false
88 | allow_fallocate=true
89 | two_write_queues=false
90 | allow_mmap_reads=false
91 | unordered_write=false
92 | advise_random_on_open=true
93 |
94 |
95 | [CFOptions "default"]
96 | memtable_protection_bytes_per_key=0
97 | sample_for_compression=0
98 | blob_file_starting_level=0
99 | blob_compaction_readahead_size=0
100 | blob_garbage_collection_force_threshold=1.000000
101 | enable_blob_garbage_collection=false
102 | min_blob_size=0
103 | last_level_temperature=kUnknown
104 | enable_blob_files=false
105 | target_file_size_base=67108864
106 | max_sequential_skip_in_iterations=8
107 | prepopulate_blob_cache=kDisable
108 | compaction_options_fifo={allow_compaction=true;age_for_warm=0;max_table_files_size=0;}
109 | max_bytes_for_level_multiplier=10.000000
110 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
111 | max_bytes_for_level_base=268435456
112 | experimental_mempurge_threshold=0.000000
113 | write_buffer_size=67108864
114 | bottommost_compression=kDisableCompressionOption
115 | prefix_extractor=nullptr
116 | blob_file_size=268435456
117 | memtable_huge_page_size=0
118 | max_successive_merges=0
119 | compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;}
120 | arena_block_size=1048576
121 | memtable_whole_key_filtering=false
122 | target_file_size_multiplier=1
123 | max_write_buffer_number=2
124 | blob_compression_type=kNoCompression
125 | compression=kSnappyCompression
126 | level0_stop_writes_trigger=36
127 | level0_slowdown_writes_trigger=20
128 | level0_file_num_compaction_trigger=4
129 | ignore_max_compaction_bytes_for_input=true
130 | max_compaction_bytes=1677721600
131 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
132 | memtable_prefix_bloom_size_ratio=0.000000
133 | hard_pending_compaction_bytes_limit=137438953472
134 | bottommost_compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;}
135 | blob_garbage_collection_age_cutoff=0.250000
136 | ttl=2592000
137 | soft_pending_compaction_bytes_limit=68719476736
138 | inplace_update_num_locks=10000
139 | paranoid_file_checks=false
140 | check_flush_compaction_key_order=true
141 | periodic_compaction_seconds=0
142 | disable_auto_compactions=false
143 | report_bg_io_stats=false
144 | compaction_style=kCompactionStyleLevel
145 | merge_operator=nullptr
146 | compaction_filter_factory=nullptr
147 | sst_partitioner_factory=nullptr
148 | table_factory=BlockBasedTable
149 | memtable_factory=SkipListFactory
150 | comparator=leveldb.BytewiseComparator
151 | compaction_pri=kMinOverlappingRatio
152 | bloom_locality=0
153 | num_levels=7
154 | min_write_buffer_number_to_merge=1
155 | compaction_filter=nullptr
156 | max_write_buffer_size_to_maintain=0
157 | max_write_buffer_number_to_maintain=0
158 | memtable_insert_with_hint_prefix_extractor=nullptr
159 | preclude_last_level_data_seconds=0
160 | force_consistency_checks=true
161 | optimize_filters_for_hits=false
162 | level_compaction_dynamic_file_size=true
163 | level_compaction_dynamic_level_bytes=false
164 | preserve_internal_time_seconds=0
165 | inplace_update_support=false
166 |
167 | [TableOptions/BlockBasedTable "default"]
168 | num_file_reads_for_auto_readahead=2
169 | initial_auto_readahead_size=8192
170 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
171 | enable_index_compression=true
172 | pin_top_level_index_and_filter=false
173 | read_amp_bytes_per_bit=0
174 | verify_compression=false
175 | prepopulate_block_cache=kDisable
176 | format_version=5
177 | partition_filters=false
178 | metadata_block_size=4096
179 | max_auto_readahead_size=262144
180 | index_block_restart_interval=1
181 | block_size_deviation=10
182 | block_size=4096
183 | detect_filter_construct_corruption=false
184 | no_block_cache=false
185 | checksum=kXXH3
186 | filter_policy=nullptr
187 | data_block_hash_table_util_ratio=0.750000
188 | block_restart_interval=16
189 | index_type=kBinarySearch
190 | pin_l0_filter_and_index_blocks_in_cache=false
191 | data_block_index_type=kDataBlockBinarySearch
192 | cache_index_and_filter_blocks_with_high_priority=true
193 | whole_key_filtering=true
194 | index_shortening=kShortenSeparatorsAndSuccessor
195 | cache_index_and_filter_blocks=false
196 | block_align=false
197 | optimize_filters_for_memory=false
198 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
199 |
200 |
--------------------------------------------------------------------------------
/options_files/default_options_files/bad_options.ini:
--------------------------------------------------------------------------------
1 | # This is a RocksDB option file.
2 | #
3 | # For detailed file format spec, please refer to the example file
4 | # in examples/rocksdb_option_file_example.ini
5 | #
6 |
7 | [Version]
8 | rocksdb_version=8.8.1
9 | options_file_version=1.1
10 |
11 | [DBOptions]
12 | max_background_flushes=1
13 | compaction_readahead_size=2097152
14 | wal_bytes_per_sync=0
15 | bytes_per_sync=0
16 | max_open_files=10
17 | stats_history_buffer_size=1048576
18 | stats_dump_period_sec=600
19 | stats_persist_period_sec=600
20 | delete_obsolete_files_period_micros=21600000000
21 | max_total_wal_size=0
22 | strict_bytes_per_sync=false
23 | delayed_write_rate=16777216
24 | avoid_flush_during_shutdown=false
25 | writable_file_max_buffer_size=1048576
26 | max_subcompactions=1
27 | max_background_compactions=1
28 | max_background_jobs=1
29 | lowest_used_cache_tier=kNonVolatileBlockTier
30 | bgerror_resume_retry_interval=1000000
31 | max_bgerror_resume_count=2147483647
32 | best_efforts_recovery=false
33 | write_dbid_to_manifest=false
34 | avoid_unnecessary_blocking_io=false
35 | atomic_flush=false
36 | log_readahead_size=0
37 | dump_malloc_stats=false
38 | info_log_level=INFO_LEVEL
39 | write_thread_max_yield_usec=100
40 | max_write_batch_group_size_bytes=1048576
41 | wal_compression=kNoCompression
42 | write_thread_slow_yield_usec=3
43 | enable_pipelined_write=false
44 | persist_stats_to_disk=false
45 | max_manifest_file_size=1073741824
46 | WAL_size_limit_MB=0
47 | fail_if_options_file_error=true
48 | max_log_file_size=0
49 | manifest_preallocation_size=4194304
50 | log_file_time_to_roll=0
51 | allow_data_in_errors=false
52 | WAL_ttl_seconds=0
53 | recycle_log_file_num=0
54 | file_checksum_gen_factory=nullptr
55 | keep_log_file_num=1000
56 | db_write_buffer_size=0
57 | table_cache_numshardbits=6
58 | use_adaptive_mutex=false
59 | allow_ingest_behind=false
60 | skip_checking_sst_file_sizes_on_db_open=false
61 | random_access_max_buffer_size=1048576
62 | access_hint_on_compaction_start=NORMAL
63 | allow_concurrent_memtable_write=true
64 | track_and_verify_wals_in_manifest=false
65 | skip_stats_update_on_db_open=false
66 | compaction_verify_record_count=true
67 | paranoid_checks=true
68 | max_file_opening_threads=16
69 | verify_sst_unique_id_in_manifest=true
70 | avoid_flush_during_recovery=false
71 | flush_verify_memtable_count=true
72 | db_host_id=__hostname__
73 | error_if_exists=false
74 | wal_recovery_mode=kPointInTimeRecovery
75 | enable_thread_tracking=false
76 | is_fd_close_on_exec=true
77 | enforce_single_del_contracts=true
78 | create_missing_column_families=false
79 | create_if_missing=true
80 | use_fsync=false
81 | wal_filter=nullptr
82 | allow_2pc=false
83 | use_direct_io_for_flush_and_compaction=false
84 | manual_wal_flush=false
85 | enable_write_thread_adaptive_yield=true
86 | use_direct_reads=false
87 | allow_mmap_writes=false
88 | allow_fallocate=true
89 | two_write_queues=false
90 | allow_mmap_reads=false
91 | unordered_write=false
92 | advise_random_on_open=true
93 |
94 |
95 | [CFOptions "default"]
96 | memtable_max_range_deletions=0
97 | block_protection_bytes_per_key=0
98 | memtable_protection_bytes_per_key=0
99 | sample_for_compression=0
100 | blob_file_starting_level=0
101 | blob_compaction_readahead_size=0
102 | blob_garbage_collection_force_threshold=1.000000
103 | enable_blob_garbage_collection=false
104 | min_blob_size=0
105 | last_level_temperature=kUnknown
106 | enable_blob_files=false
107 | target_file_size_base=16777216
108 | max_sequential_skip_in_iterations=8
109 | prepopulate_blob_cache=kDisable
110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 | max_bytes_for_level_multiplier=10.000000
112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 | max_bytes_for_level_base=268435456
114 | experimental_mempurge_threshold=0.000000
115 | write_buffer_size=16777216
116 | bottommost_compression=kDisableCompressionOption
117 | prefix_extractor=nullptr
118 | blob_file_size=268435456
119 | memtable_huge_page_size=0
120 | bottommost_file_compaction_delay=0
121 | max_successive_merges=0
122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 | arena_block_size=1048576
124 | memtable_whole_key_filtering=false
125 | target_file_size_multiplier=1
126 | max_write_buffer_number=2
127 | blob_compression_type=kNoCompression
128 | compression=kSnappyCompression
129 | level0_stop_writes_trigger=20
130 | level0_slowdown_writes_trigger=10
131 | level0_file_num_compaction_trigger=6
132 | ignore_max_compaction_bytes_for_input=true
133 | max_compaction_bytes=1677721600
134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 | memtable_prefix_bloom_size_ratio=0.000000
136 | hard_pending_compaction_bytes_limit=21474836480
137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 | blob_garbage_collection_age_cutoff=0.250000
139 | ttl=2592000
140 | soft_pending_compaction_bytes_limit=68719476736
141 | inplace_update_num_locks=10000
142 | paranoid_file_checks=false
143 | check_flush_compaction_key_order=true
144 | periodic_compaction_seconds=0
145 | disable_auto_compactions=false
146 | report_bg_io_stats=false
147 | compaction_pri=kMinOverlappingRatio
148 | compaction_style=kCompactionStyleLevel
149 | merge_operator=nullptr
150 | table_factory=BlockBasedTable
151 | memtable_factory=SkipListFactory
152 | comparator=leveldb.BytewiseComparator
153 | compaction_filter_factory=nullptr
154 | num_levels=7
155 | min_write_buffer_number_to_merge=1
156 | bloom_locality=0
157 | max_write_buffer_size_to_maintain=0
158 | sst_partitioner_factory=nullptr
159 | preserve_internal_time_seconds=0
160 | preclude_last_level_data_seconds=0
161 | max_write_buffer_number_to_maintain=0
162 | default_temperature=kUnknown
163 | optimize_filters_for_hits=false
164 | level_compaction_dynamic_file_size=false
165 | memtable_insert_with_hint_prefix_extractor=nullptr
166 | level_compaction_dynamic_level_bytes=true
167 | inplace_update_support=false
168 | persist_user_defined_timestamps=true
169 | compaction_filter=nullptr
170 | force_consistency_checks=true
171 |
172 | [TableOptions/BlockBasedTable "default"]
173 | num_file_reads_for_auto_readahead=2
174 | initial_auto_readahead_size=8192
175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 | enable_index_compression=true
177 | pin_top_level_index_and_filter=true
178 | read_amp_bytes_per_bit=0
179 | verify_compression=false
180 | prepopulate_block_cache=kDisable
181 | format_version=5
182 | partition_filters=false
183 | metadata_block_size=4096
184 | max_auto_readahead_size=262144
185 | index_block_restart_interval=1
186 | block_size_deviation=10
187 | block_size=4096
188 | detect_filter_construct_corruption=false
189 | no_block_cache=false
190 | checksum=kXXH3
191 | filter_policy=nullptr
192 | data_block_hash_table_util_ratio=0.750000
193 | block_restart_interval=16
194 | index_type=kBinarySearch
195 | pin_l0_filter_and_index_blocks_in_cache=false
196 | data_block_index_type=kDataBlockBinarySearch
197 | cache_index_and_filter_blocks_with_high_priority=true
198 | whole_key_filtering=true
199 | index_shortening=kShortenSeparators
200 | cache_index_and_filter_blocks=false
201 | block_align=false
202 | optimize_filters_for_memory=false
203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |
205 |
--------------------------------------------------------------------------------
/options_files/default_options_files/good_options.ini:
--------------------------------------------------------------------------------
1 | # This is a RocksDB option file.
2 | #
3 | # For detailed file format spec, please refer to the example file
4 | # in examples/rocksdb_option_file_example.ini
5 | #
6 |
7 | [Version]
8 | rocksdb_version=8.8.1
9 | options_file_version=1.1
10 |
11 | [DBOptions]
12 | max_background_flushes=-1
13 | compaction_readahead_size=2097152
14 | wal_bytes_per_sync=0
15 | bytes_per_sync=0
16 | max_open_files=-1
17 | stats_history_buffer_size=1048576
18 | stats_dump_period_sec=600
19 | stats_persist_period_sec=600
20 | delete_obsolete_files_period_micros=21600000000
21 | max_total_wal_size=0
22 | strict_bytes_per_sync=false
23 | delayed_write_rate=16777216
24 | avoid_flush_during_shutdown=false
25 | writable_file_max_buffer_size=1048576
26 | max_subcompactions=8
27 | max_background_compactions=-1
28 | max_background_jobs=12
29 | lowest_used_cache_tier=kNonVolatileBlockTier
30 | bgerror_resume_retry_interval=1000000
31 | max_bgerror_resume_count=2147483647
32 | best_efforts_recovery=false
33 | write_dbid_to_manifest=false
34 | avoid_unnecessary_blocking_io=false
35 | atomic_flush=false
36 | log_readahead_size=0
37 | dump_malloc_stats=false
38 | info_log_level=INFO_LEVEL
39 | write_thread_max_yield_usec=100
40 | max_write_batch_group_size_bytes=1048576
41 | wal_compression=kNoCompression
42 | write_thread_slow_yield_usec=3
43 | enable_pipelined_write=false
44 | persist_stats_to_disk=false
45 | max_manifest_file_size=1073741824
46 | WAL_size_limit_MB=0
47 | fail_if_options_file_error=true
48 | max_log_file_size=0
49 | manifest_preallocation_size=4194304
50 | log_file_time_to_roll=0
51 | allow_data_in_errors=false
52 | WAL_ttl_seconds=0
53 | recycle_log_file_num=0
54 | file_checksum_gen_factory=nullptr
55 | keep_log_file_num=1000
56 | db_write_buffer_size=0
57 | table_cache_numshardbits=6
58 | use_adaptive_mutex=false
59 | allow_ingest_behind=false
60 | skip_checking_sst_file_sizes_on_db_open=false
61 | random_access_max_buffer_size=1048576
62 | access_hint_on_compaction_start=NORMAL
63 | allow_concurrent_memtable_write=true
64 | track_and_verify_wals_in_manifest=false
65 | skip_stats_update_on_db_open=false
66 | compaction_verify_record_count=true
67 | paranoid_checks=true
68 | max_file_opening_threads=16
69 | verify_sst_unique_id_in_manifest=true
70 | avoid_flush_during_recovery=false
71 | flush_verify_memtable_count=true
72 | db_host_id=__hostname__
73 | error_if_exists=false
74 | wal_recovery_mode=kPointInTimeRecovery
75 | enable_thread_tracking=false
76 | is_fd_close_on_exec=true
77 | enforce_single_del_contracts=true
78 | create_missing_column_families=false
79 | create_if_missing=true
80 | use_fsync=false
81 | wal_filter=nullptr
82 | allow_2pc=false
83 | use_direct_io_for_flush_and_compaction=false
84 | manual_wal_flush=false
85 | enable_write_thread_adaptive_yield=true
86 | use_direct_reads=false
87 | allow_mmap_writes=false
88 | allow_fallocate=true
89 | two_write_queues=false
90 | allow_mmap_reads=false
91 | unordered_write=false
92 | advise_random_on_open=true
93 |
94 |
95 | [CFOptions "default"]
96 | memtable_max_range_deletions=0
97 | block_protection_bytes_per_key=0
98 | memtable_protection_bytes_per_key=0
99 | sample_for_compression=0
100 | blob_file_starting_level=0
101 | blob_compaction_readahead_size=0
102 | blob_garbage_collection_force_threshold=1.000000
103 | enable_blob_garbage_collection=false
104 | min_blob_size=0
105 | last_level_temperature=kUnknown
106 | enable_blob_files=false
107 | target_file_size_base=67108864
108 | max_sequential_skip_in_iterations=8
109 | prepopulate_blob_cache=kDisable
110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 | max_bytes_for_level_multiplier=10.000000
112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 | max_bytes_for_level_base=268435456
114 | experimental_mempurge_threshold=0.000000
115 | write_buffer_size=134217728
116 | bottommost_compression=kDisableCompressionOption
117 | prefix_extractor=nullptr
118 | blob_file_size=268435456
119 | memtable_huge_page_size=0
120 | bottommost_file_compaction_delay=0
121 | max_successive_merges=0
122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 | arena_block_size=1048576
124 | memtable_whole_key_filtering=false
125 | target_file_size_multiplier=1
126 | max_write_buffer_number=4
127 | blob_compression_type=kNoCompression
128 | compression=kSnappyCompression
129 | level0_stop_writes_trigger=36
130 | level0_slowdown_writes_trigger=24
131 | level0_file_num_compaction_trigger=4
132 | ignore_max_compaction_bytes_for_input=true
133 | max_compaction_bytes=1677721600
134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 | memtable_prefix_bloom_size_ratio=0.000000
136 | hard_pending_compaction_bytes_limit=274877906944
137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 | blob_garbage_collection_age_cutoff=0.250000
139 | ttl=2592000
140 | soft_pending_compaction_bytes_limit=68719476736
141 | inplace_update_num_locks=10000
142 | paranoid_file_checks=false
143 | check_flush_compaction_key_order=true
144 | periodic_compaction_seconds=0
145 | disable_auto_compactions=false
146 | report_bg_io_stats=false
147 | compaction_pri=kMinOverlappingRatio
148 | compaction_style=kCompactionStyleLevel
149 | merge_operator=nullptr
150 | table_factory=BlockBasedTable
151 | memtable_factory=SkipListFactory
152 | comparator=leveldb.BytewiseComparator
153 | compaction_filter_factory=nullptr
154 | num_levels=7
155 | min_write_buffer_number_to_merge=1
156 | bloom_locality=0
157 | max_write_buffer_size_to_maintain=0
158 | sst_partitioner_factory=nullptr
159 | preserve_internal_time_seconds=0
160 | preclude_last_level_data_seconds=0
161 | max_write_buffer_number_to_maintain=0
162 | default_temperature=kUnknown
163 | optimize_filters_for_hits=false
164 | level_compaction_dynamic_file_size=true
165 | memtable_insert_with_hint_prefix_extractor=nullptr
166 | level_compaction_dynamic_level_bytes=true
167 | inplace_update_support=false
168 | persist_user_defined_timestamps=true
169 | compaction_filter=nullptr
170 | force_consistency_checks=true
171 |
172 | [TableOptions/BlockBasedTable "default"]
173 | num_file_reads_for_auto_readahead=2
174 | initial_auto_readahead_size=8192
175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 | enable_index_compression=true
177 | pin_top_level_index_and_filter=true
178 | read_amp_bytes_per_bit=0
179 | verify_compression=false
180 | prepopulate_block_cache=kDisable
181 | format_version=5
182 | partition_filters=false
183 | metadata_block_size=4096
184 | max_auto_readahead_size=262144
185 | index_block_restart_interval=1
186 | block_size_deviation=10
187 | block_size=4096
188 | detect_filter_construct_corruption=false
189 | no_block_cache=false
190 | checksum=kXXH3
191 | filter_policy=nullptr
192 | data_block_hash_table_util_ratio=0.750000
193 | block_restart_interval=16
194 | index_type=kBinarySearch
195 | pin_l0_filter_and_index_blocks_in_cache=false
196 | data_block_index_type=kDataBlockBinarySearch
197 | cache_index_and_filter_blocks_with_high_priority=true
198 | whole_key_filtering=true
199 | index_shortening=kShortenSeparators
200 | cache_index_and_filter_blocks=false
201 | block_align=false
202 | optimize_filters_for_memory=false
203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |
205 |
--------------------------------------------------------------------------------
/options_files/default_options_files/rocksdb_default_options.ini:
--------------------------------------------------------------------------------
1 | # This is a RocksDB option file.
2 | #
3 | # For detailed file format spec, please refer to the example file
4 | # in examples/rocksdb_option_file_example.ini
5 | #
6 |
7 | [Version]
8 | rocksdb_version=8.8.1
9 | options_file_version=1.1
10 |
11 | [DBOptions]
12 | max_background_flushes=-1
13 | compaction_readahead_size=2097152
14 | wal_bytes_per_sync=0
15 | bytes_per_sync=0
16 | max_open_files=-1
17 | stats_history_buffer_size=1048576
18 | stats_dump_period_sec=600
19 | stats_persist_period_sec=600
20 | delete_obsolete_files_period_micros=21600000000
21 | max_total_wal_size=0
22 | strict_bytes_per_sync=false
23 | delayed_write_rate=16777216
24 | avoid_flush_during_shutdown=false
25 | writable_file_max_buffer_size=1048576
26 | max_subcompactions=1
27 | max_background_compactions=-1
28 | max_background_jobs=2
29 | lowest_used_cache_tier=kNonVolatileBlockTier
30 | bgerror_resume_retry_interval=1000000
31 | max_bgerror_resume_count=2147483647
32 | best_efforts_recovery=false
33 | write_dbid_to_manifest=false
34 | avoid_unnecessary_blocking_io=false
35 | atomic_flush=false
36 | log_readahead_size=0
37 | dump_malloc_stats=false
38 | info_log_level=INFO_LEVEL
39 | write_thread_max_yield_usec=100
40 | max_write_batch_group_size_bytes=1048576
41 | wal_compression=kNoCompression
42 | write_thread_slow_yield_usec=3
43 | enable_pipelined_write=false
44 | persist_stats_to_disk=false
45 | max_manifest_file_size=1073741824
46 | WAL_size_limit_MB=0
47 | fail_if_options_file_error=true
48 | max_log_file_size=0
49 | manifest_preallocation_size=4194304
50 | log_file_time_to_roll=0
51 | allow_data_in_errors=false
52 | WAL_ttl_seconds=0
53 | recycle_log_file_num=0
54 | file_checksum_gen_factory=nullptr
55 | keep_log_file_num=1000
56 | db_write_buffer_size=0
57 | table_cache_numshardbits=6
58 | use_adaptive_mutex=false
59 | allow_ingest_behind=false
60 | skip_checking_sst_file_sizes_on_db_open=false
61 | random_access_max_buffer_size=1048576
62 | access_hint_on_compaction_start=NORMAL
63 | allow_concurrent_memtable_write=true
64 | track_and_verify_wals_in_manifest=false
65 | skip_stats_update_on_db_open=false
66 | compaction_verify_record_count=true
67 | paranoid_checks=true
68 | max_file_opening_threads=16
69 | verify_sst_unique_id_in_manifest=true
70 | avoid_flush_during_recovery=false
71 | flush_verify_memtable_count=true
72 | db_host_id=__hostname__
73 | error_if_exists=false
74 | wal_recovery_mode=kPointInTimeRecovery
75 | enable_thread_tracking=false
76 | is_fd_close_on_exec=true
77 | enforce_single_del_contracts=true
78 | create_missing_column_families=false
79 | create_if_missing=false
80 | use_fsync=false
81 | wal_filter=nullptr
82 | allow_2pc=false
83 | use_direct_io_for_flush_and_compaction=false
84 | manual_wal_flush=false
85 | enable_write_thread_adaptive_yield=true
86 | use_direct_reads=false
87 | allow_mmap_writes=false
88 | allow_fallocate=true
89 | two_write_queues=false
90 | allow_mmap_reads=false
91 | unordered_write=false
92 | advise_random_on_open=true
93 |
94 |
95 | [CFOptions "default"]
96 | memtable_max_range_deletions=0
97 | block_protection_bytes_per_key=0
98 | memtable_protection_bytes_per_key=0
99 | sample_for_compression=0
100 | blob_file_starting_level=0
101 | blob_compaction_readahead_size=0
102 | blob_garbage_collection_force_threshold=1.000000
103 | enable_blob_garbage_collection=false
104 | min_blob_size=0
105 | last_level_temperature=kUnknown
106 | enable_blob_files=false
107 | target_file_size_base=67108864
108 | max_sequential_skip_in_iterations=8
109 | prepopulate_blob_cache=kDisable
110 | compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 | max_bytes_for_level_multiplier=10.000000
112 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 | max_bytes_for_level_base=268435456
114 | experimental_mempurge_threshold=0.000000
115 | write_buffer_size=67108864
116 | bottommost_compression=kDisableCompressionOption
117 | prefix_extractor=nullptr
118 | blob_file_size=268435456
119 | memtable_huge_page_size=0
120 | bottommost_file_compaction_delay=0
121 | max_successive_merges=0
122 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 | arena_block_size=1048576
124 | memtable_whole_key_filtering=false
125 | target_file_size_multiplier=1
126 | max_write_buffer_number=2
127 | blob_compression_type=kNoCompression
128 | compression=kSnappyCompression
129 | level0_stop_writes_trigger=36
130 | level0_slowdown_writes_trigger=20
131 | level0_file_num_compaction_trigger=4
132 | ignore_max_compaction_bytes_for_input=true
133 | max_compaction_bytes=1677721600
134 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 | memtable_prefix_bloom_size_ratio=0.000000
136 | hard_pending_compaction_bytes_limit=274877906944
137 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 | blob_garbage_collection_age_cutoff=0.250000
139 | ttl=2592000
140 | soft_pending_compaction_bytes_limit=68719476736
141 | inplace_update_num_locks=10000
142 | paranoid_file_checks=false
143 | check_flush_compaction_key_order=true
144 | periodic_compaction_seconds=0
145 | disable_auto_compactions=false
146 | report_bg_io_stats=false
147 | compaction_pri=kMinOverlappingRatio
148 | compaction_style=kCompactionStyleLevel
149 | merge_operator=nullptr
150 | table_factory=BlockBasedTable
151 | memtable_factory=SkipListFactory
152 | comparator=leveldb.BytewiseComparator
153 | compaction_filter_factory=nullptr
154 | num_levels=7
155 | min_write_buffer_number_to_merge=1
156 | bloom_locality=0
157 | max_write_buffer_size_to_maintain=0
158 | sst_partitioner_factory=nullptr
159 | preserve_internal_time_seconds=0
160 | preclude_last_level_data_seconds=0
161 | max_write_buffer_number_to_maintain=0
162 | default_temperature=kUnknown
163 | optimize_filters_for_hits=false
164 | level_compaction_dynamic_file_size=true
165 | memtable_insert_with_hint_prefix_extractor=nullptr
166 | level_compaction_dynamic_level_bytes=true
167 | inplace_update_support=false
168 | persist_user_defined_timestamps=true
169 | compaction_filter=nullptr
170 | force_consistency_checks=true
171 |
172 | [TableOptions/BlockBasedTable "default"]
173 | num_file_reads_for_auto_readahead=2
174 | initial_auto_readahead_size=8192
175 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 | enable_index_compression=true
177 | pin_top_level_index_and_filter=true
178 | read_amp_bytes_per_bit=0
179 | verify_compression=false
180 | prepopulate_block_cache=kDisable
181 | format_version=5
182 | partition_filters=false
183 | metadata_block_size=4096
184 | max_auto_readahead_size=262144
185 | index_block_restart_interval=1
186 | block_size_deviation=10
187 | block_size=4096
188 | detect_filter_construct_corruption=false
189 | no_block_cache=false
190 | checksum=kXXH3
191 | filter_policy=nullptr
192 | data_block_hash_table_util_ratio=0.750000
193 | block_restart_interval=16
194 | index_type=kBinarySearch
195 | pin_l0_filter_and_index_blocks_in_cache=false
196 | data_block_index_type=kDataBlockBinarySearch
197 | cache_index_and_filter_blocks_with_high_priority=true
198 | whole_key_filtering=true
199 | index_shortening=kShortenSeparators
200 | cache_index_and_filter_blocks=false
201 | block_align=false
202 | optimize_filters_for_memory=false
203 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |
205 |
--------------------------------------------------------------------------------
/options_files/default_options_files/dbbench_default_options-8.8.1.ini:
--------------------------------------------------------------------------------
1 | # This is a RocksDB option file.
2 | #
3 | # For detailed file format spec, please refer to the example file
4 | # in examples/rocksdb_option_file_example.ini
5 | #
6 |
7 | [Version]
8 | rocksdb_version=8.8.1
9 | options_file_version=1.1
10 |
11 | [DBOptions]
12 | max_background_flushes=-1
13 | compaction_readahead_size=2097152
14 | wal_bytes_per_sync=0
15 | bytes_per_sync=0
16 | max_open_files=-1
17 | stats_history_buffer_size=1048576
18 | stats_dump_period_sec=600
19 | stats_persist_period_sec=600
20 | delete_obsolete_files_period_micros=21600000000
21 | max_total_wal_size=0
22 | strict_bytes_per_sync=false
23 | delayed_write_rate=8388608
24 | avoid_flush_during_shutdown=false
25 | writable_file_max_buffer_size=1048576
26 | max_subcompactions=1
27 | max_background_compactions=-1
28 | max_background_jobs=2
29 | lowest_used_cache_tier=kNonVolatileBlockTier
30 | bgerror_resume_retry_interval=1000000
31 | max_bgerror_resume_count=2147483647
32 | best_efforts_recovery=false
33 | write_dbid_to_manifest=false
34 | avoid_unnecessary_blocking_io=false
35 | atomic_flush=false
36 | log_readahead_size=0
37 | dump_malloc_stats=true
38 | info_log_level=INFO_LEVEL
39 | write_thread_max_yield_usec=100
40 | max_write_batch_group_size_bytes=1048576
41 | wal_compression=kNoCompression
42 | write_thread_slow_yield_usec=3
43 | enable_pipelined_write=true
44 | persist_stats_to_disk=false
45 | max_manifest_file_size=1073741824
46 | WAL_size_limit_MB=0
47 | fail_if_options_file_error=true
48 | max_log_file_size=0
49 | manifest_preallocation_size=4194304
50 | listeners={ErrorHandlerListener:ErrorHandlerListener}
51 | log_file_time_to_roll=0
52 | allow_data_in_errors=false
53 | WAL_ttl_seconds=0
54 | recycle_log_file_num=0
55 | file_checksum_gen_factory=nullptr
56 | keep_log_file_num=1000
57 | db_write_buffer_size=0
58 | table_cache_numshardbits=4
59 | use_adaptive_mutex=false
60 | allow_ingest_behind=false
61 | skip_checking_sst_file_sizes_on_db_open=false
62 | random_access_max_buffer_size=1048576
63 | access_hint_on_compaction_start=NORMAL
64 | allow_concurrent_memtable_write=true
65 | track_and_verify_wals_in_manifest=false
66 | skip_stats_update_on_db_open=false
67 | compaction_verify_record_count=true
68 | paranoid_checks=true
69 | max_file_opening_threads=16
70 | verify_sst_unique_id_in_manifest=true
71 | avoid_flush_during_recovery=false
72 | flush_verify_memtable_count=true
73 | db_host_id=__hostname__
74 | error_if_exists=false
75 | wal_recovery_mode=kPointInTimeRecovery
76 | enable_thread_tracking=false
77 | is_fd_close_on_exec=true
78 | enforce_single_del_contracts=true
79 | create_missing_column_families=true
80 | create_if_missing=true
81 | use_fsync=false
82 | wal_filter=nullptr
83 | allow_2pc=false
84 | use_direct_io_for_flush_and_compaction=true
85 | manual_wal_flush=false
86 | enable_write_thread_adaptive_yield=true
87 | use_direct_reads=true
88 | allow_mmap_writes=false
89 | allow_fallocate=true
90 | two_write_queues=false
91 | allow_mmap_reads=false
92 | unordered_write=false
93 | advise_random_on_open=true
94 |
95 |
96 | [CFOptions "default"]
97 | memtable_max_range_deletions=0
98 | block_protection_bytes_per_key=0
99 | memtable_protection_bytes_per_key=0
100 | sample_for_compression=0
101 | blob_file_starting_level=0
102 | blob_compaction_readahead_size=0
103 | blob_garbage_collection_force_threshold=1.000000
104 | enable_blob_garbage_collection=false
105 | min_blob_size=0
106 | last_level_temperature=kUnknown
107 | enable_blob_files=false
108 | target_file_size_base=67108864
109 | max_sequential_skip_in_iterations=8
110 | prepopulate_blob_cache=kDisable
111 | compaction_options_fifo={allow_compaction=true;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=0;}
112 | max_bytes_for_level_multiplier=10.000000
113 | max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
114 | max_bytes_for_level_base=268435456
115 | experimental_mempurge_threshold=0.000000
116 | write_buffer_size=67108864
117 | bottommost_compression=kDisableCompressionOption
118 | prefix_extractor=nullptr
119 | blob_file_size=268435456
120 | memtable_huge_page_size=0
121 | bottommost_file_compaction_delay=0
122 | max_successive_merges=0
123 | compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
124 | arena_block_size=1048576
125 | memtable_whole_key_filtering=false
126 | target_file_size_multiplier=1
127 | max_write_buffer_number=2
128 | blob_compression_type=kNoCompression
129 | compression=kNoCompression
130 | level0_stop_writes_trigger=36
131 | level0_slowdown_writes_trigger=20
132 | level0_file_num_compaction_trigger=4
133 | ignore_max_compaction_bytes_for_input=true
134 | max_compaction_bytes=1677721600
135 | compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
136 | memtable_prefix_bloom_size_ratio=0.000000
137 | hard_pending_compaction_bytes_limit=137438953472
138 | bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
139 | blob_garbage_collection_age_cutoff=0.250000
140 | ttl=2592000
141 | soft_pending_compaction_bytes_limit=68719476736
142 | inplace_update_num_locks=10000
143 | paranoid_file_checks=false
144 | check_flush_compaction_key_order=true
145 | periodic_compaction_seconds=0
146 | disable_auto_compactions=false
147 | report_bg_io_stats=false
148 | compaction_pri=kMinOverlappingRatio
149 | compaction_style=kCompactionStyleLevel
150 | merge_operator=nullptr
151 | table_factory=BlockBasedTable
152 | memtable_factory=SkipListFactory
153 | comparator=leveldb.BytewiseComparator
154 | compaction_filter_factory=nullptr
155 | num_levels=7
156 | min_write_buffer_number_to_merge=1
157 | bloom_locality=0
158 | max_write_buffer_size_to_maintain=0
159 | sst_partitioner_factory=nullptr
160 | preserve_internal_time_seconds=0
161 | preclude_last_level_data_seconds=0
162 | max_write_buffer_number_to_maintain=0
163 | default_temperature=kUnknown
164 | optimize_filters_for_hits=false
165 | level_compaction_dynamic_file_size=true
166 | memtable_insert_with_hint_prefix_extractor=nullptr
167 | level_compaction_dynamic_level_bytes=false
168 | inplace_update_support=false
169 | persist_user_defined_timestamps=true
170 | compaction_filter=nullptr
171 | force_consistency_checks=true
172 |
173 | [TableOptions/BlockBasedTable "default"]
174 | num_file_reads_for_auto_readahead=2
175 | initial_auto_readahead_size=8192
176 | metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
177 | enable_index_compression=true
178 | pin_top_level_index_and_filter=false
179 | read_amp_bytes_per_bit=0
180 | verify_compression=false
181 | prepopulate_block_cache=kDisable
182 | format_version=5
183 | partition_filters=false
184 | metadata_block_size=4096
185 | max_auto_readahead_size=262144
186 | index_block_restart_interval=1
187 | block_size_deviation=10
188 | block_size=4096
189 | detect_filter_construct_corruption=false
190 | no_block_cache=false
191 | checksum=kXXH3
192 | filter_policy=nullptr
193 | data_block_hash_table_util_ratio=0.750000
194 | block_restart_interval=16
195 | index_type=kBinarySearch
196 | pin_l0_filter_and_index_blocks_in_cache=false
197 | data_block_index_type=kDataBlockBinarySearch
198 | cache_index_and_filter_blocks_with_high_priority=true
199 | whole_key_filtering=true
200 | index_shortening=kShortenSeparatorsAndSuccessor
201 | cache_index_and_filter_blocks=false
202 | block_align=false
203 | optimize_filters_for_memory=false
204 | flush_block_policy_factory=FlushBlockBySizePolicyFactory
205 |
206 |
--------------------------------------------------------------------------------
/rocksdb/parse_db_bench_output.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | from utils.utils import log_update
4 |
5 | def parse_db_bench_output(output):
6 |
7 | if re.match("Unable to load options file.*", output) is not None:
8 | return {
9 | "error": "Invalid options file"
10 | }
11 |
12 | # Regular expression to find and extract the number of Entries
13 | # Searches for the pattern "Entries:" followed by one or more digits
14 | entries_match = re.search(r"Entries:\s+(\d+)", output)
15 | # If a match is found, convert the captured digits to an integer
16 | entries = int(entries_match.group(1)) if entries_match else None
17 |
18 | # Regular expression to parse the output line
19 | # Captures various performance metrics and their units
20 | test_name = None
21 |
22 | if "readrandomwriterandom" in output:
23 | op_line = output.split("readrandomwriterandom")[1].split("\n")[0]
24 | test_name = "readrandomwriterandom"
25 | test_pattern = r"readrandomwriterandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
26 | elif "fillrandom" in output:
27 | op_line = output.split("fillrandom")[1].split("\n")[0]
28 | test_name = "fillrandom"
29 | test_pattern = r"fillrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\nMicroseconds per write:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
30 | elif "readrandom" in output:
31 | op_line = output.split("readrandom")[1].split("\n")[0]
32 | test_name = "readrandom"
33 | test_pattern = r"readrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\s+\((\d+)\s+of\s+(\d+)\s+found\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
34 | elif "mixgraph" in output:
35 | op_line = output.split("mixgraph :")[1].split("\n")[0]
36 | test_name = "mixgraph"
37 | test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
38 | # test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+\(\s+Gets:+(\d+)\s+Puts:+(\d+)\s+Seek:(\d+),\s+reads\s+(\d+)\s+in\s+(\d+)\s+found,\s+avg\s+size:\s+\d+\s+value,\s+-nan\s+scan\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
39 | elif "readwhilewriting" in output:
40 | op_line = output.split("readwhilewriting")[1].split("\n")[0]
41 | test_name = "readwhilewriting"
42 | test_pattern = r"readwhilewriting\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
43 | else:
44 | log_update(f"[PDB] Test name not found in output: {output}")
45 | op_line = "unknown test"
46 | test_name = "unknown"
47 | test_pattern = r"(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;(\s+\(.*found:\d+\))?\nMicroseconds per (read|write):\nCount: (\d+) Average: (\d+\.\d+) StdDev: (\d+\.\d+)\nMin: (\d+) Median: (\d+\.\d+) Max: (\d+)\nPercentiles: P50: (\d+\.\d+) P75: (\d+\.\d+) P99: (\d+\.\d+) P99.9: (\d+\.\d+) P99.99: (\d+\.\d+)"
48 |
49 | pattern_matches = re.findall(test_pattern, output)
50 | log_update(f"[PDB] Test name: {test_name}")
51 | log_update(f"[PDB] Matches: {pattern_matches}")
52 | log_update(f"[PDB] Output line: {op_line}")
53 | # Set all values to None if the pattern is not found
54 | micros_per_op = ops_per_sec = total_seconds = total_operations = data_speed = data_speed_unit = None
55 |
56 | # Extract the performance metrics if the pattern is found
57 | for pattern_match in pattern_matches:
58 | # Convert each captured group to the appropriate type (float or int)
59 | micros_per_op = float(pattern_match[0])
60 | ops_per_sec = int(pattern_match[1])
61 | total_seconds = float(pattern_match[2])
62 | total_operations = int(pattern_match[3])
63 | # Check for specific workloads to handle additional data
64 | if "readrandomwriterandom" in output:
65 | data_speed = ops_per_sec
66 | data_speed_unit = "ops/sec"
67 | reads_found = None
68 | elif "fillrandom" in output:
69 | data_speed = float(pattern_match[4])
70 | data_speed_unit = pattern_match[5]
71 | writes_data = {
72 | "count": int(pattern_match[6]),
73 | "average": float(pattern_match[7]),
74 | "std_dev": float(pattern_match[8]),
75 | "min": int(pattern_match[9]),
76 | "median": float(pattern_match[10]),
77 | "max": int(pattern_match[11]),
78 | "percentiles": {
79 | "P50": float(pattern_match[12]),
80 | "P75": float(pattern_match[13]),
81 | "P99": float(pattern_match[14]),
82 | "P99.9": float(pattern_match[15]),
83 | "P99.99": float(pattern_match[16])
84 | }
85 | }
86 | elif "readrandom" in output:
87 | data_speed = float(pattern_match[4])
88 | data_speed_unit = pattern_match[5]
89 | reads_found = {
90 | "count": int(pattern_match[6]),
91 | "total": int(pattern_match[7])
92 | }
93 | reads_data = {
94 | "count": int(pattern_match[8]),
95 | "average": float(pattern_match[9]),
96 | "std_dev": float(pattern_match[10]),
97 | "min": int(pattern_match[11]),
98 | "median": float(pattern_match[12]),
99 | "max": int(pattern_match[13]),
100 | "percentiles": {
101 | "P50": float(pattern_match[14]),
102 | "P75": float(pattern_match[15]),
103 | "P99": float(pattern_match[16]),
104 | "P99.9": float(pattern_match[17]),
105 | "P99.99": float(pattern_match[18])
106 | }
107 | }
108 | elif "readwhilewriting" in output:
109 | data_speed = float(pattern_match[4])
110 | data_speed_unit = pattern_match[5]
111 | # reads_found = {
112 | # "count": int(pattern_match[6]),
113 | # "total": int(pattern_match[7])
114 | # }
115 | # reads_data = {
116 | # "count": int(pattern_match[8]),
117 | # "average": float(pattern_match[9]),
118 | # "std_dev": float(pattern_match[10]),
119 | # "min": int(pattern_match[11]),
120 | # "median": float(pattern_match[12]),
121 | # "max": int(pattern_match[13]),
122 | # "percentiles": {
123 | # "P50": float(pattern_match[14]),
124 | # "P75": float(pattern_match[15]),
125 | # "P99": float(pattern_match[16]),
126 | # "P99.9": float(pattern_match[17]),
127 | # "P99.99": float(pattern_match[18])
128 | # }
129 | # }
130 | elif "mixgraph" in output:
131 | data_speed = ops_per_sec
132 | data_speed_unit = "ops/sec"
133 | else:
134 | log_update(f"[PDB] Test name not found in output: {output}")
135 | data_speed = ops_per_sec
136 | data_speed_unit = "ops/sec"
137 |
138 | log_update(f"[PDB] Ops per sec: {ops_per_sec} Total seconds: {total_seconds} Total operations: {total_operations} Data speed: {data_speed} {data_speed_unit}")
139 |
140 | ops_per_sec_points = re.findall("and \((.*),.*\) ops\/second in \(.*,(.*)\)", output)
141 |
142 | # Store all extracted values in a dictionary
143 | parsed_data = {
144 | "entries": entries,
145 | "micros_per_op": micros_per_op,
146 | "ops_per_sec": ops_per_sec,
147 | "total_seconds": total_seconds,
148 | "total_operations": total_operations,
149 | "data_speed": data_speed,
150 | "data_speed_unit": data_speed_unit,
151 | "ops_per_second_graph": [
152 | [float(a[1]) for a in ops_per_sec_points],
153 | [float(a[0]) for a in ops_per_sec_points],
154 | ]
155 | }
156 |
157 | # Grab the latency and push into the output logs file
158 | latency = re.findall("Percentiles:.*", output)
159 | for i in latency:
160 | log_update("[PDB] " + i)
161 |
162 | # Return the dictionary with the parsed data
163 | return parsed_data
164 |
--------------------------------------------------------------------------------
/rocksdb/subprocess_manager.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import os
3 | import time
4 |
5 | from cgroup_monitor import CGroupMonitor
6 |
7 | from utils.utils import log_update, path_of_db
8 | from utils.constants import TEST_NAME, DB_BENCH_PATH, OPTIONS_FILE_DIR, NUM_ENTRIES, SIDE_CHECKER, FIO_RESULT_PATH
9 | from rocksdb.parse_db_bench_output import parse_db_bench_output
10 | from utils.utils import store_db_bench_output
11 | from utils.graph import plot_2axis
12 | from gpt.prompts_generator import midway_options_file_generation
13 | from utils.system_operations.fio_runner import get_fio_result
14 | from utils.system_operations.get_sys_info import system_info
15 |
16 |
17 | def pre_tasks(database_path, run_count):
18 | '''
19 | Function to perform the pre-tasks before running the db_bench
20 | Parameters:
21 | - database_path (str): The path to the database
22 | - run_count (str): The current iteration of the benchmark
23 |
24 | Returns:
25 | - None
26 | '''
27 |
28 | # Try to delete the database if path exists
29 | proc = subprocess.run(
30 | f'rm -rf {database_path}',
31 | stdout=subprocess.PIPE,
32 | stderr=subprocess.STDOUT,
33 | shell=True,
34 | check=False
35 | )
36 |
37 | log_update("[SPM] Flushing the cache")
38 | print("[SPM] Flushing the cache")
39 | # Delay for all the current memory to be freed
40 | proc = subprocess.run(
41 | f'sync; echo 3 > /proc/sys/vm/drop_caches',
42 | stdout=subprocess.PIPE,
43 | stderr=subprocess.STDOUT,
44 | shell=True,
45 | check=False
46 | )
47 |
48 | # update_log_file("[SPM] Waiting for 90 seconds to free up memory, IO and other resources")
49 | print("[SPM] Waiting for 30 seconds to free up memory, IO and other resources")
50 | # Give a 1.5 min delay for all the current memory/IO/etc to be freed
51 | time.sleep(30)
52 |
53 |
54 | def generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name):
55 | '''
56 | Generate the DB bench command
57 |
58 | Parameters:
59 | - db_bench_path (str): The path to the db_bench executable
60 | - database_path (str): The path to the database
61 | - option_file (dict): The options file to be used
62 | - run_count (str): The current iteration of the benchmark
63 | - test_name (str): The name of the test
64 |
65 | Returns:
66 | - list: The db_bench command
67 | '''
68 |
69 | db_bench_command = [
70 | db_bench_path,
71 | f"--db={database_path}",
72 | f"--options_file={OPTIONS_FILE_DIR}",
73 | "--use_direct_io_for_flush_and_compaction",
74 | "--use_direct_reads", "--compression_type=none",
75 | "--stats_interval_seconds=1", "--histogram",
76 | f"--num={NUM_ENTRIES}", "--duration=100"
77 | ]
78 |
79 |
80 | if test_name == "fillrandom":
81 | db_bench_command.append("--benchmarks=fillrandom")
82 | elif test_name == "readrandomwriterandom":
83 | db_bench_command.append("--benchmarks=readrandomwriterandom")
84 | elif test_name == "readrandom":
85 | tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"]
86 | tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
87 | new_db_bench = db_bench_command[:-2] + ["--benchmarks=readrandom", "--use_existing_db", "--num=25000000", "--duration=1000"]
88 | db_bench_command = new_db_bench
89 | elif test_name == "mixgraph":
90 | tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"]
91 | tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
92 | new_db_bench = db_bench_command[:-1] + ["--benchmarks=mixgraph", "--use_existing_db", "--duration=1000", "--mix_get_ratio=0.5", "--mix_put_ratio=0.5", "--mix_seek_ratio=0.0", "--mix_get_ratio=0.5"]
93 | db_bench_command = new_db_bench
94 | elif test_name == "readwhilewriting":
95 | db_bench_command.append("--benchmarks=readwhilewriting")
96 | else:
97 | print(f"[SPM] Test name {test_name} not recognized")
98 | exit(1)
99 |
100 | log_update(f"[SPM] Command: {db_bench_command}")
101 | return db_bench_command
102 |
103 |
104 | def db_bench(db_bench_path, database_path, options, run_count, test_name, previous_throughput, options_files, bm_iter=0):
105 | '''
106 | Store the options in a file
107 | Do the benchmark
108 |
109 | Parameters:
110 | - db_bench_path (str): The path to the db_bench executable
111 | - database_path (str): The path to the database
112 | - option_file (dict): The options file to be used
113 | - run_count (str): The current iteration of the benchmark
114 |
115 | Returns:
116 | - None
117 | '''
118 | global proc_out
119 | with open(f"{OPTIONS_FILE_DIR}", "w") as f:
120 | f.write(options)
121 |
122 | # Perform pre-tasks to reset the environment
123 | pre_tasks(database_path, run_count)
124 | command = generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name)
125 |
126 | log_update(f"[SPM] Executing db_bench with command: {command}")
127 | print("[SPM] Executing db_bench")
128 |
129 |
130 | if SIDE_CHECKER and previous_throughput != None:
131 | cgroup_monitor = CGroupMonitor()
132 | cgroup_monitor.start_monitor()
133 | start_time = time.time()
134 |
135 | with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) as proc_out:
136 | output = ""
137 | check_interval = 30
138 | for line in proc_out.stdout:
139 | output += line
140 | if time.time() - start_time <= check_interval:
141 | continue
142 |
143 | start_time = time.time()
144 | if "ops/second" in line:
145 | current_avg_throughput = float(line.split("(")[2].split(",")[1].split(")")[0])
146 |
147 | if (current_avg_throughput < 0.9 * float(previous_throughput)) and (bm_iter < 3):
148 | print("[SQU] Throughput decreased, resetting the benchmark")
149 | log_update(f"[SQU] Throughput decreased {previous_throughput}->{current_avg_throughput}, resetting the benchmark")
150 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
151 | proc_out.kill()
152 |
153 | db_path = path_of_db()
154 | fio_result = get_fio_result(FIO_RESULT_PATH)
155 | device_info = system_info(db_path, fio_result)
156 |
157 | new_options, _, _ = midway_options_file_generation(options, avg_cpu_used, avg_mem_used, current_avg_throughput, device_info, options_files)
158 | output, avg_cpu_used, avg_mem_used, options = db_bench(db_bench_path, database_path, new_options, run_count, test_name, previous_throughput, options_files, bm_iter+1)
159 |
160 | log_update("[SPM] Finished running db_bench")
161 | return output, avg_cpu_used, avg_mem_used, options
162 | else:
163 | print("[SQU] No throughput found in the output")
164 | log_update("[SQU] No throughput found in the output")
165 | # exit(1)
166 |
167 | print("[SPM] Finished running db_bench")
168 | print("----------------------------------------------------------------------------")
169 | print("[SPM] Output: ", output)
170 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
171 | return output, avg_cpu_used, avg_mem_used, options
172 | else:
173 | cgroup_monitor = CGroupMonitor()
174 | cgroup_monitor.start_monitor()
175 | proc_out = subprocess.run(
176 | command,
177 | stdout=subprocess.PIPE,
178 | stderr=subprocess.STDOUT,
179 | check=False
180 | )
181 | avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
182 | return proc_out.stdout.decode(), avg_cpu_used, avg_mem_used, options
183 |
184 |
185 | def benchmark(db_path, options, output_file_dir, reasoning, iteration_count, previous_results, options_files):
186 | '''
187 | Function to run db_bench with the given options file and store the output in a file
188 |
189 | Parameters:
190 | - db_path (str): The path of database
191 | - options (dict): The options to be used
192 | - output_file_dir (str): the output directory
193 | - reasoning (str): The reasoning of the benchmark
194 |
195 | Returns:
196 | - is_error (bool):
197 | - benchmark_results (dict):
198 | '''
199 | if previous_results is None:
200 | output, average_cpu_usage, average_memory_usage, options = db_bench(
201 | DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, None, options_files)
202 | else:
203 | output, average_cpu_usage, average_memory_usage, options = db_bench(
204 | DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, previous_results['ops_per_sec'], options_files)
205 |
206 | # log_update(f"[SPM] Output: {output}")
207 | benchmark_results = parse_db_bench_output(output)
208 |
209 | contents = os.listdir(output_file_dir)
210 | ini_file_count = len([f for f in contents if f.endswith(".ini")])
211 |
212 | if benchmark_results.get("error") is not None:
213 | is_error = True
214 | log_update(f"[SPM] Benchmark failed, the error is: {benchmark_results.get('error')}")
215 | print("[SPM] Benchmark failed, the error is: ",
216 | benchmark_results.get("error"))
217 | # Save incorrect options in a file
218 | store_db_bench_output(output_file_dir,
219 | f"{ini_file_count}-incorrect_options.ini",
220 | benchmark_results, options, reasoning)
221 | elif benchmark_results['data_speed'] is None:
222 | is_error = True
223 | log_update(f"[SPM] Benchmark failed, the error is: Data speed is None. Check DB save path")
224 | print("[SPM] Benchmark failed, the error is: ",
225 | "Data speed is None. Check DB save path")
226 | # Save incorrect options in a file
227 | store_db_bench_output(output_file_dir,
228 | f"{ini_file_count}-incorrect_options.ini",
229 | benchmark_results, options, reasoning)
230 | else:
231 | is_error = False
232 | # Store the output of db_bench in a file
233 | store_db_bench_output(output_file_dir, f"{ini_file_count}.ini",
234 | benchmark_results, options, reasoning)
235 | plot_2axis(*benchmark_results["ops_per_second_graph"],
236 | "Ops Per Second",
237 | f"{output_file_dir}/ops_per_sec_{ini_file_count}.png")
238 | log_update(f"[SPM] Latest result: {benchmark_results['data_speed']}"
239 | f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.")
240 | log_update(f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%")
241 | print(
242 | f"[SPM] Latest result: {benchmark_results['data_speed']}",
243 | f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.\n",
244 | f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%"
245 | )
246 |
247 | return is_error, benchmark_results, average_cpu_usage, average_memory_usage, options
248 |
--------------------------------------------------------------------------------
/gpt/prompts_generator.py:
--------------------------------------------------------------------------------
1 | import re
2 | from difflib import Differ
3 | from options_files.ops_options_file import cleanup_options_file
4 | from gpt.gpt_request import request_gpt
5 | from utils.utils import log_update
6 | from dotenv import load_dotenv
7 | import utils.constants as constants
8 |
9 | load_dotenv()
10 |
11 | def generate_system_content(device_information, rocksdb_version):
12 | """
13 | Function to generate the system content with device info and rocksDB version.
14 |
15 | Parameters:
16 | device_information (str): Information about the device.
17 |
18 | Returns:
19 | str: A prompt for configuring RocksDB for enhanced performance.
20 | """
21 |
22 | content = (
23 | "You are a RocksDB Expert. "
24 | "You are being consulted by a company to help improve their RocksDB configuration "
25 | "by optimizing their options file based on their System information and benchmark output."
26 | f"Only provide option files for rocksdb version {rocksdb_version}. Also, Direct IO will always be used for both flush and compaction."
27 | "Additionally, compression type is set to none always."
28 | "First Explain the reasoning, only change 10 options, then show the option file in original format."
29 | f"The Device information is: {device_information}"
30 | )
31 | return content
32 |
33 | def generate_default_user_content(chunk_string, previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"):
34 | user_contents = []
35 | for _, benchmark_result, reasoning, _ in previous_option_files[1: -1]:
36 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
37 | user_content = f"The option file changes were:\n```\n{reasoning}\n```\nThe benchmark results are: {benchmark_line}"
38 | user_contents.append(user_content)
39 |
40 | _, benchmark_result, _, _ = previous_option_files[-1]
41 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
42 | user_content = f"Part of the current option file is:\n```\n{chunk_string}\n```\nThe benchmark results are: {benchmark_line}"
43 | user_contents.append(user_content)
44 | user_contents.append("Based on these information generate a new file in same format as the options_file to improve my database performance. Enclose the new options file in ```.")
45 | return user_contents
46 |
47 | def generate_user_content_with_difference(previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"):
48 | result =" "
49 | user_content = []
50 |
51 | if len(previous_option_files) == 1:
52 | m1_file, m1_benchmark_result, _, _ = previous_option_files[-1]
53 | benchmark_line = generate_benchmark_info(test_name, m1_benchmark_result, average_cpu_used, average_mem_used)
54 | user_content = f"The original file is:\n```\n{m1_file}\n```\nThe benchmark results for the original file are: {benchmark_line}"
55 |
56 | elif len(previous_option_files) > 1:
57 | previous_option_file1, _, _, _ = previous_option_files[-1]
58 | previous_option_file2, _, _, _ = previous_option_files[-2]
59 |
60 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
61 |
62 | file1_lines = pattern.findall(previous_option_file1)
63 | file2_lines = pattern.findall(previous_option_file2)
64 |
65 | file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines]
66 | file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines]
67 | differ = Differ()
68 | diff = list(differ.compare(file1_lines, file2_lines))
69 | lst= []
70 | for line in diff:
71 | if line.startswith('+'):
72 | lst.append(line)
73 | result = '\n'.join(line[2:] for line in lst)
74 | m2_file, m2_benchmark_result, _, _ = previous_option_files[-2]
75 | benchmark_line = generate_benchmark_info(test_name, m2_benchmark_result, average_cpu_used, average_mem_used)
76 | user_content = (
77 | f"The original file is:\n```\n{m2_file}\n```\n"
78 | f"The benchmark results for the original file are: {benchmark_line}\n"
79 | f"The previous file modifications are:\n```\n{result}\n```\n"
80 | )
81 |
82 | else:
83 | _, benchmark_result, _, _ = previous_option_files[-1]
84 | benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
85 |
86 | user_content = ("The previous file modifications are: "
87 | f"\n```\n{result}\n```\n"
88 | f"The benchmark results for the previous file are: {benchmark_line}")
89 |
90 |
91 | user_contents = [user_content, "Based on these information generate a new file in the same format as the options_file to improve my database performance. Enclose the new options file in ```."]
92 | return user_contents
93 |
94 | def generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used):
95 | """
96 | Function to create a formatted string with benchmark information.
97 |
98 | Parameters:
99 | - test_name: Name of the test.
100 | - benchmark_result: Dictionary with benchmark results.
101 | - average_cpu_used: Average CPU usage.
102 | - average_mem_used: Average Memory usage.
103 |
104 | Returns:
105 | - A formatted string with all benchmark information.
106 | """
107 | benchmark_line = (f"The use case for the database is perfectly simulated by the {test_name} test. "
108 | f"The db_bench benchmark results for {test_name} are: Write/Read speed: {benchmark_result['data_speed']} "
109 | f"{benchmark_result['data_speed_unit']}, Operations per second: {benchmark_result['ops_per_sec']}.")
110 |
111 | if average_cpu_used != -1 and average_mem_used != -1:
112 | benchmark_line += f" CPU used: {average_cpu_used}%, Memory used: {average_mem_used}% during test."
113 |
114 | return benchmark_line
115 |
116 | def midway_options_file_generation(options, avg_cpu_used, avg_mem_used, last_throughput, device_information, options_file):
117 | """
118 | Function to generate a prompt for the midway options file generation.
119 |
120 | Returns:
121 | - str: A prompt for the midway options file generation.
122 | """
123 |
124 | sys_content = (
125 | "You are a RocksDB Expert being consulted by a company to help improve their RocksDB performance "
126 | "by optimizing the options configured for a particular scenario they face."
127 | f"Only provide option files for rocksdb version {constants.VERSION}. Direct IO will always be used. "
128 | "Additionally, compression type is set to none always. "
129 | "Respond with the the reasoning first, then show the option file in original format."
130 | f"The Device information is: {device_information}"
131 | )
132 |
133 | user_content = []
134 | content = "Can you generate a new options file for RocksDB based on the following information?\n"
135 | content += "The previous options file is:\n"
136 |
137 | content += "```\n"
138 | content += options_file[-1][0]
139 | content += "```\n"
140 |
141 | content += (
142 | f"The throughput results for the above options file are: {options_file[-1][1]['ops_per_sec']}. "
143 | )
144 |
145 | user_content.append(content)
146 | content = ""
147 |
148 | content += "We then made the following changes to the options file:\n"
149 |
150 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
151 |
152 | file1_lines = pattern.findall(options)
153 | file2_lines = pattern.findall(options_file[-1][0])
154 |
155 | file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines]
156 | file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines]
157 | differ = Differ()
158 | diff = list(differ.compare(file1_lines, file2_lines))
159 | lst= []
160 | for line in diff:
161 | if line.startswith('+'):
162 | lst.append(line)
163 | result = '\n'.join(line[2:] for line in lst)
164 |
165 | content += "```\n"
166 | content += result
167 | content += "```\n"
168 |
169 | content += f"\nThe updated file has a throughput of: {last_throughput}. \n\n"
170 | user_content.append(content)
171 | content = ""
172 | content += "Based on this information generate a new file. Enclose the new options in ```. Feel free to use upto 100% of the CPU and Memory."
173 | user_content.append(content)
174 |
175 | log_update("[OG] Generating options file with differences")
176 | log_update("[OG] Prompt for midway options file generation")
177 | log_update(content)
178 |
179 | matches = request_gpt(sys_content, user_content, 0.4)
180 |
181 | if matches is not None:
182 | clean_options_file = cleanup_options_file(matches[1])
183 | reasoning = matches[0] + matches[2]
184 |
185 | return clean_options_file, reasoning, ""
186 |
187 | def generate_option_file_with_gpt(case, previous_option_files, device_information, temperature=0.4, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom", version="8.8.1"):
188 | """
189 | Function that generates an options file for RocksDB based on specified parameters and case scenarios.
190 | - This function selects one of three different approaches to generate a RocksDB configuration options file.
191 |
192 | Parameters:
193 | - case (int): Determines the approach to use for generating the options file. Valid values are 1, 2, or 3.
194 | - previous_option_files (list): A list of tuples containing past options file configurations and other relevant data.
195 | - device_information (str): Information about the device/system on which RocksDB is running.
196 | - temperature (float, optional): Controls the randomness/creativity of the generated output. Default is 0.4.
197 | - average_cpu_used (float, optional): Average CPU usage, used for tuning the configuration. Default is -1.0, indicating not specified.
198 | - average_mem_used (float, optional): Average memory usage, used for tuning the configuration. Default is -1.0, indicating not specified.
199 | - test_name (str, optional): Identifier for the type of test or configuration scenario. Default is "fillrandom".
200 |
201 | Returns:
202 | - tuple: A tuple containing the generated options file, reasoning behind the options, and an empty string as a placeholder.
203 |
204 | Raises:
205 | - ValueError: If the `case` parameter is not 1, 2, or 3.
206 | """
207 | def case_1(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
208 | log_update("[OG] Generating options file with long option changes")
209 | print("[OG] Generating options file with long option changes")
210 | system_content = generate_system_content(device_information, version)
211 | previous_option_file, _, _, _ = previous_option_files[-1]
212 | user_contents = generate_default_user_content(previous_option_file, previous_option_files, average_cpu_used, average_mem_used, test_name)
213 | matches = request_gpt(system_content, user_contents, temperature)
214 | # Process the GPT-generated response
215 | if matches is not None:
216 | clean_options_file = cleanup_options_file(matches[1])
217 | reasoning = matches[0] + matches[2]
218 |
219 | return clean_options_file, reasoning, ""
220 |
221 | def case_2(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
222 | log_update("[OG] Generating options file with short option changes")
223 | print("[OG] Generating options file with short option changes")
224 | system_content = (
225 | "You are a RocksDB Expert. "
226 | "You are being consulted by a company to help improve their RocksDB configuration "
227 | "by optimizing their options file based on their System information and benchmark output."
228 | f"Only provide option files for rocksdb version {version}. Also, Direct IO will always be used for both flush and compaction."
229 | "Additionally, compression type is set to none always."
230 | "First Explain the reasoning, only change the options I provided, then show the option file in original format."
231 | f"The Device information is: {device_information}")
232 | previous_option_file, _, _, _ = previous_option_files[-1]
233 |
234 | # Define a regular expression pattern to match key-value pairs
235 | pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
236 |
237 | # Extract key-value pairs from the string
238 | key_value_pairs = {match.group(1): match.group(
239 | 2) for match in pattern.finditer(previous_option_file)}
240 |
241 | # Remove key-value pairs where the key is "xxx" (case-insensitive)
242 | key_value_pairs = {key: value for key, value in key_value_pairs.items(
243 | ) if key.lower() not in {'rocksdb_version', 'options_file_version'}}
244 |
245 | # Split key-value pairs into chunks of 5 pairs each
246 | pairs_per_chunk = 20
247 | chunks = [list(key_value_pairs.items())[i:i + pairs_per_chunk]
248 | for i in range(0, len(key_value_pairs), pairs_per_chunk)]
249 |
250 | # Create strings for each chunk
251 | chunk_strings = [
252 | '\n'.join([f"{key}: {value}" for key, value in chunk]) for chunk in chunks]
253 |
254 | clean_options_file = ""
255 | reasoning = ""
256 |
257 | # Loop through each part and make API calls
258 | for chunk_string in chunk_strings:
259 | user_contents = generate_default_user_content(chunk_string, previous_option_files, average_cpu_used, average_mem_used, test_name)
260 | matches = request_gpt(system_content, user_contents, temperature)
261 | if matches is not None:
262 | clean_options_file = cleanup_options_file(matches[1])
263 | reasoning += matches[0] + matches[2]
264 |
265 | return clean_options_file, reasoning, ""
266 |
267 |
268 | def case_3(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
269 |
270 | log_update("[OG] Generating options file with differences")
271 | print("[OG] Generating options file with differences")
272 | system_content = generate_system_content(device_information, version)
273 | # Request GPT to generate new option
274 | user_contents = generate_user_content_with_difference(previous_option_files, average_cpu_used, average_mem_used, test_name)
275 | matches = request_gpt(system_content, user_contents, temperature)
276 | # Process the GPT response
277 | if matches is not None:
278 | clean_options_file = cleanup_options_file(matches[1])
279 | reasoning = matches[0] + matches[2]
280 |
281 | return clean_options_file, reasoning, ""
282 |
283 | switch = {
284 | 1: case_1,
285 | 2: case_2,
286 | 3: case_3,
287 | }
288 | func = switch.get(case)
289 | if func:
290 | return func(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version)
291 | else:
292 | raise ValueError(f"No function defined for case {case}")
--------------------------------------------------------------------------------