├── utils
    ├── filter.py
    ├── parse.py
    ├── constants.py
    ├── system_operations
    │   ├── fio_runner.py
    │   └── get_sys_info.py
    ├── graph.py
    └── utils.py
├── requirements.txt
├── docker
    ├── Dockerfile
    └── docker_runner.py
├── gpt
    ├── gpt_request.py
    └── prompts_generator.py
├── README.md
├── options_files
    ├── default_options_files
    │   ├── initial_options_file.ini
    │   ├── dbbench_default_options-7.10.2.ini
    │   ├── bad_options.ini
    │   ├── good_options.ini
    │   ├── rocksdb_default_options.ini
    │   └── dbbench_default_options-8.8.1.ini
    └── ops_options_file.py
├── main.py
└── rocksdb
    ├── parse_db_bench_output.py
    └── subprocess_manager.py


/utils/filter.py:
--------------------------------------------------------------------------------
 1 | def key_filter(key):
 2 |     if (key == 'wal_size_limit_mb'):
 3 |         key = 'WAL_size_limit_MB'
 4 |     if (key == 'wal_ttl_seconds'):
 5 |         key = 'WAL_ttl_seconds'
 6 |     return key
 7 | 
 8 | # Options that should not be changed
 9 | BLACKLIST = ['use_direct_io_for_flush_and_compaction',
10 |                 'use_direct_reads', 'compression_type']
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.6.0
 2 | anyio==4.1.0
 3 | black==23.11.0
 4 | certifi==2023.11.17
 5 | click==8.1.7
 6 | distro==1.8.0
 7 | exceptiongroup==1.2.0
 8 | h11==0.14.0
 9 | httpcore==1.0.2
10 | httpx==0.25.2
11 | idna==3.6
12 | mypy-extensions==1.0.0
13 | openai==1.3.8
14 | packaging==23.2
15 | pathspec==0.12.0
16 | platformdirs==4.1.0
17 | psutil==5.9.6
18 | py-cpuinfo==9.0.0
19 | pydantic==2.5.2
20 | pydantic_core==2.14.5
21 | python-dotenv==1.0.0
22 | sniffio==1.3.0
23 | tomli==2.0.1
24 | tqdm==4.66.1
25 | typing_extensions==4.8.0
26 | matplotlib==3.7.4
27 | deepdiff==6.7.1
28 | cgroup-monitor==0.1.2
29 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | ARG TARGETPLATFORM
 4 | ARG DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | # Install dependencies
 7 | RUN apt-get update && apt-get install -y \
 8 |     build-essential \
 9 |     libgflags-dev \
10 |     libsnappy-dev \
11 |     zlib1g-dev \
12 |     libbz2-dev \
13 |     liblz4-dev \
14 |     libzstd-dev \
15 |     cmake \
16 |     git \
17 |     python3 \
18 |     python3-pip \
19 |     wget \
20 |     fio \
21 |     libjemalloc2
22 | 
23 | # Setup RocksDB
24 | RUN wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz && \
25 |     tar -xzf v8.8.1.tar.gz && \
26 |     cd rocksdb-8.8.1 && \
27 |     make -j48 static_lib db_bench
28 | 
29 | # Setup GPT Repo's requirements
30 | ADD gpt-assisted-rocksdb-config/requirements.txt /requirements.txt
31 | 
32 | # Setup Python requirments
33 | RUN pip3 install -r requirements.txt
34 | 
35 | # Setup GPT Repo
36 | ADD gpt-assisted-rocksdb-config /gpt-assisted-rocksdb-config
37 | 
38 | WORKDIR /gpt-assisted-rocksdb-config
39 | CMD ["python3", "main.py"]
40 | 


--------------------------------------------------------------------------------
/utils/parse.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import configparser
 3 | from utils.filter import key_filter
 4 | 
 5 | def dict_to_configparser(dictionary):
 6 |     '''
 7 |     Function to convert a dictionary to a configparser object
 8 | 
 9 |     Parameters:
10 |     - dictionary (dict): The dictionary to be converted
11 | 
12 |     Returns: 
13 |     - config (configparser.ConfigParser): The configparser object
14 |     '''
15 |     config = configparser.ConfigParser()
16 | 
17 |     for section, options in dictionary.items():
18 |         config[section] = {}
19 |         for key, value in options.items():
20 |             config[section][key] = value
21 | 
22 |     return config
23 | 
24 | def configparser_to_string(config_parser):
25 |     '''
26 |     Function to convert a configparser object to a string
27 | 
28 |     Parameters:
29 |     - config_parser (configparser.ConfigParser): The configparser object
30 | 
31 |     Returns:
32 |     - string_representation (str): The string representation of the configparser object
33 |     '''
34 |     string_representation = ''
35 |     for section in config_parser.sections():
36 |         string_representation += f"[{section}]\n"
37 |         for key, value in config_parser[section].items():
38 |             key = key_filter(key)
39 |             string_representation += f"  {key}={value}\n"
40 |         string_representation += '\n'
41 |     return string_representation


--------------------------------------------------------------------------------
/gpt/gpt_request.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from openai import OpenAI
 4 | 
 5 | # Environment variables
 6 | client = OpenAI()
 7 | client.api_key = os.getenv("OPENAI_API_KEY")
 8 | 
 9 | def request_gpt(system_content, user_contents, temperature):
10 |     '''
11 |     Function to make an API call to GPT-4
12 | 
13 |     Parameters:
14 |     - system_content: string containing the system information
15 |     - chunk_string: string containing the chunk of the options file
16 |     - previous_option_files: list of tuples containing the previous option files and their benchmark results
17 |     - temperature: Float (0-1) controlling GPT-4's output randomness.
18 |     - average_cpu_used: Float indicating average CPU usage (default -1.0).
19 |     - average_mem_used: Float indicating average memory usage (default -1.0).
20 |     - test_name: String stating the benchmark test.
21 | 
22 |     Returns:
23 |     - matches: string containing the options file generated by GPT-4
24 |     '''
25 |     messages = [{"role": "system", "content": system_content}]
26 |     for content in user_contents:
27 |         messages.append({"role": "user", "content": content})
28 | 
29 | 
30 |     # Assuming 'client' is already defined and authenticated for GPT-4 API access
31 |     completion = client.chat.completions.create(
32 |         model="gpt-4-0125-preview",
33 |         messages=messages,
34 |         temperature=temperature,
35 |         max_tokens=4096,
36 |         frequency_penalty=0,
37 |         presence_penalty=0,
38 |     )
39 | 
40 |     # Extract the assistant's reply
41 |     assistant_reply = completion.choices[0].message.content
42 |     matches = re.match("[\s\S]*```([\s\S]*)```([\s\S]*)", assistant_reply)
43 | 
44 |     # Check if result is good
45 |     if matches is not None:
46 |         return matches 
47 | 
48 |     # Invalid response
49 |     with open("invalid_assistant_reply.txt", "a") as file:
50 |         file.write(assistant_reply + "\n\n" + "-" * 150 + "\n\n")
51 |     return None
52 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ELMo-Tune ([HotStorage'24 Best Paper] Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?)
 2 | 
 3 | 🏆HotStorage'24 Best Paper - Can Modern LLMs Tune and Configure LSM-based Key-Value Stores?<br>
 4 | Paper URL: [https://doi.org/10.1145/3655038.3665954](https://doi.org/10.1145/3655038.3665954)
 5 | 
 6 | ## Features
 7 | This project will run a series of tests using the db_bench tool. The tests will be run using the default configuration and a series of configurations that will be determined by the research. The results of the tests will be compared to determine the best configuration for RocksDB when using ELMo-Tune.
 8 | 
 9 | ## Prerequisites
10 | This project requires Python 3.6 or higher. The following dependencies are required:
11 | ```bash
12 | # Instructions for Ubuntu 20.04
13 | # Install dependencies
14 | apt-get update && apt-get install -y build-essential libgflags-dev libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev git python3 python3-pip wget fio 
15 | 
16 | # Install and Build RocksDB 8.8.1
17 | wget https://github.com/facebook/rocksdb/archive/refs/tags/v8.8.1.tar.gz
18 | tar -xzf v8.8.1.tar.gz
19 | cd rocksdb-8.8.1
20 | make -j static_lib db_bench
21 | 
22 | git clone https://github.com/asu-idi/ELMo-Tune
23 | cd ELMo-Tune
24 | 
25 | # Install requirements
26 | pip install -r requirements.txt
27 | ```
28 | 
29 | ## Setup
30 | To run the tests sucessfully, some variables need to be defined.
31 | ```bash
32 | # You need OpenAI's API to run the code sucessfully. 
33 | export OPENAI_API_KEY=<api key>
34 | ```
35 | Additionally, set the DB_BENCH_PATH in utils/constants.py along with any other paths required for your system setup. 
36 | 
37 | ## How to use
38 | To run the tests, run the following command:
39 | ```bash
40 | # e.g. Run a random write (fillrandom) test with the db stored in the '/data/gpt_project/db' folder and with output in the './output' directory 
41 | python3 main.py --workload=fillrandom --device=data --output=./output --num_entries=10000
42 | 
43 | # You can explore the options using the --help command (or using the constants.py file)
44 | #  -c --case            CASE            Specify the case number
45 | #  -d --device          DEVICE          Specify the device
46 | #  -t --workload        WORKLOAD        Specify the test name
47 | #  -v --version         VERSION         Specify the version of RocksDB
48 | #  -o --output          OUTPUT          Specify the output path
49 | #  -n --num_entries     NUM_ENTRIES     Specify the number of entries
50 | #  -s --side_checker    SIDE_CHECKER    Specify if side checker is enabled
51 | ```
52 | 
53 | > You can alternatively also use the Docker environment that can be built using the Dockerfile in the docker folder. 
54 | 


--------------------------------------------------------------------------------
/utils/constants.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv
 3 | import argparse
 4 | from datetime import datetime
 5 | 
 6 | load_dotenv()
 7 | 
 8 | def path_of_output_folder():
 9 |     '''
10 |     Set the output folder directory
11 | 
12 |     Parameters:
13 |     - None
14 | 
15 |     Returns:
16 |     - output_folder_dir (str): The output folder directory
17 |     '''
18 |     current_datetime = datetime.now()
19 |     date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
20 |     output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}"
21 | 
22 |     os.makedirs(output_folder_dir, exist_ok=True)
23 |     print(f"[UTL] Using output folder: {output_folder_dir}")
24 | 
25 |     return output_folder_dir
26 | 
27 | # Check the environement variables, set to default if not found
28 | env_DEVICE = os.getenv("DEVICE", None)
29 | env_TEST_NAME = os.getenv("TEST_NAME", None)
30 | env_CASE_NUMBER = os.getenv("CASE_NUMBER", 1)
31 | env_VERSION = os.getenv("VERSION", "8.8.1")
32 | env_OUTPUT_PATH = os.getenv("OUTPUT_PATH", None)
33 | env_NUM_ENTRIES = os.getenv("NUM_ENTRIES", 3000000000)
34 | env_SIDE_CHECKER = os.getenv("SIDE_CHECKER", True)
35 | 
36 | # Parse the arguments. They replace the environment variables if they are set
37 | parser = argparse.ArgumentParser(description='Description of your script')
38 | parser.add_argument('-c', '--case', type=int, default=env_CASE_NUMBER, help='Specify the case number')
39 | parser.add_argument('-d', '--device', type=str, default=env_DEVICE, help='Specify the device')
40 | parser.add_argument('-t', '--workload', type=str, default=env_TEST_NAME, help='Specify the test name')
41 | parser.add_argument('-v', '--version', type=str, default=env_VERSION, help='Specify the version of RocksDB')
42 | parser.add_argument('-o', '--output', type=str, default=env_OUTPUT_PATH, help='Specify the output path')
43 | parser.add_argument('-n', '--num_entries', type=int, default=env_NUM_ENTRIES, help='Specify the number of entries')
44 | parser.add_argument('-s', '--side_checker', type=bool, default=env_SIDE_CHECKER, help='Specify if side checker is enabled')
45 | 
46 | args = parser.parse_args()
47 | CASE_NUMBER = args.case
48 | DEVICE = args.device
49 | TEST_NAME = args.workload
50 | VERSION = args.version
51 | OUTPUT_PATH = args.output if args.output else path_of_output_folder()
52 | NUM_ENTRIES = args.num_entries
53 | SIDE_CHECKER = args.side_checker
54 | 
55 | # Constants
56 | # DB_BENCH_PATH = f"/data/gpt_project/rocksdb-{VERSION}/db_bench"
57 | DB_BENCH_PATH = f"/rocksdb-{VERSION}/db_bench"
58 | DB_PATH = f"/{DEVICE}/gpt_project/db"
59 | FIO_RESULT_PATH = f"/data/gpt_project/gpt-assisted-rocksdb-config/data/fio/fio_output_{DEVICE}.txt"
60 | DEFAULT_OPTION_FILE_DIR = "options_files/default_options_files"
61 | INITIAL_OPTIONS_FILE_NAME = f"dbbench_default_options-{VERSION}.ini"
62 | OPTIONS_FILE_DIR = f"{OUTPUT_PATH}/options_file.ini"
63 | 


--------------------------------------------------------------------------------
/docker/docker_runner.py:
--------------------------------------------------------------------------------
 1 | import docker
 2 | import os
 3 | 
 4 | client = docker.from_env()
 5 | 
 6 | def main():
 7 |     '''
 8 |     Main function to run multiple docker containers one after the other. All containers mount a volume to the host machine.
 9 |     Additionally, before mounting, the environment variables are updated to reflect the current iteration number and the status 
10 |     of the for loop which is controlling the memory and cpus. 
11 |     '''
12 | 
13 |     cpu_list = [2, 4]
14 |     memory_list = [4, 8]
15 |     devices = ["nvme", "data"]
16 |     tests = ["fillrandom", "readrandom", "readrandomwriterandom"]
17 |     base_output_path = f"/data/gpt_project/gpt-assisted-rocksdb-config/output/output"
18 |     base_db_path = f"gpt_project/dbr"
19 | 
20 |     for memory_cap in memory_list:
21 |         for cpu_cap in cpu_list:
22 |             for test in tests:
23 |                 print("-" * 50)
24 |                 print(f"Running Iteration for CPU: {cpu_cap} Memory: {memory_cap} on /{devices[0]} and /{devices[1]} for {test}")
25 | 
26 |                 # Run docker container with mount and environment variables as in cpu and memory
27 |                 container = client.containers.run(
28 |                     "gptproject:latest", 
29 |                     detach=True, 
30 |                     name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[0]}_{test}",
31 |                     environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}",
32 |                                 f"OUTPUT_PATH={base_output_path}_{devices[0]}/c{cpu_cap}_m{memory_cap}_{test}", 
33 |                                 f"DEVICE={devices[0]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[0]}/{base_db_path}/{cpu_cap}_{test}"],
34 |                     cpu_count=cpu_cap,
35 |                     mem_limit=f"{memory_cap}g", 
36 |                     volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'},
37 |                                 "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}}
38 |                 )
39 | 
40 |                 # Run docker container with mount and environment variables as in cpu and memory
41 |                 container2 = client.containers.run(
42 |                     "gptproject:latest", 
43 |                     detach=True, 
44 |                     name=f"gpt_project_c{cpu_cap}_m{memory_cap}_{devices[1]}_{test}",
45 |                     environment=[f"ITERATION=c{cpu_cap}m{memory_cap}", f"CPU_COUNT={cpu_cap}", f"MEMORY_MAX={memory_cap}",
46 |                                 f"OUTPUT_PATH={base_output_path}_{devices[1]}/c{cpu_cap}_m{memory_cap}_{test}",
47 |                                 f"DEVICE={devices[1]}", f"TEST_NAME={test}", f"DB_PATH=/{devices[1]}/{base_db_path}/{cpu_cap}_{test}"],
48 |                     cpu_count=cpu_cap,
49 |                     mem_limit=f"{memory_cap}g", 
50 |                     volumes={"/nvme/gpt_project": {'bind': '/nvme/gpt_project', 'mode': 'rw'},
51 |                                 "/data/gpt_project": {'bind': '/data/gpt_project', 'mode': 'rw'}}
52 |                 )
53 | 
54 |             # Wait for the container to finish
55 |             container.wait()
56 |             container2.wait()
57 | 
58 | if __name__ == "__main__":
59 |     main()


--------------------------------------------------------------------------------
/utils/system_operations/fio_runner.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import re
  3 | import os
  4 | 
  5 | 
  6 | def fio_run(test_type, file_path):
  7 |     '''
  8 |     Function to run fio benchmark
  9 | 
 10 |     Parameters:
 11 |     - test_type: string containing the type of test to run
 12 | 
 13 |     Returns:
 14 |     - parsed_res: string containing the parsed result of the fio test
 15 |     '''
 16 |     command = [
 17 |         "fio",
 18 |         "--name=test",
 19 |         "--ioengine=posixaio",
 20 |         f"--rw={test_type}",
 21 |         "--bs=4k",
 22 |         "--numjobs=1",
 23 |         "--size=10G",
 24 |         "--runtime=60",
 25 |         "--time_based"
 26 |     ]
 27 | 
 28 |     print("[FIO] running fio test now", test_type + "\n")
 29 |     proc = subprocess.run(
 30 |         command,
 31 |         stdout=subprocess.PIPE,
 32 |         stderr=subprocess.STDOUT,
 33 |     )
 34 | 
 35 |     output = proc.stdout.decode()
 36 |     print("[FIO] output :", output)
 37 | 
 38 |     parsed_res = parse_fio_output(output, test_type)
 39 | 
 40 |     with open(file_path, "a") as file:
 41 |         file.write(parsed_res + '\n')
 42 | 
 43 |     return parsed_res
 44 | 
 45 | 
 46 | def get_fio_result(file_path):
 47 |     '''
 48 |     Function to get the fio result
 49 | 
 50 |     Parameters:
 51 |     - file_path: string containing the path to the fio result file
 52 | 
 53 |     Returns:
 54 |     - content: string containing the content of the fio result
 55 |     '''
 56 |     if (os.path.exists(file_path) and os.path.getsize(file_path) != 0):
 57 |         print("[FIO] File exists and is not empty. Reading file.")
 58 |         with open(file_path, 'r') as file:
 59 |             content = file.read()
 60 |         return content
 61 | 
 62 |      # List of test types
 63 |     test_types = ["randwrite", "randread", "read", "write"]
 64 |     for test_type in test_types:
 65 |         fio_result = fio_run(test_type, file_path)
 66 |         combined_result = '\n'.join(fio_result)
 67 | 
 68 |     print(f"[FIO] result : \n {combined_result}")
 69 |     delete_test_file()
 70 |     return combined_result
 71 | 
 72 | 
 73 | def parse_fio_output(fio_result, test_type):
 74 |     '''
 75 |     Function to parse the fio output
 76 | 
 77 |     Parameters:
 78 |     - fio_result: string containing the fio result
 79 |     - test_type: string containing the type of test to run
 80 | 
 81 |     Returns:
 82 |     - result_string: string containing the parsed result of the fio test
 83 |     '''
 84 |     if test_type in ["randwrite", "write"]:
 85 |         pattern = re.compile(r'WRITE: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)')
 86 |     elif test_type in ["randread", "read"]:
 87 |         pattern = re.compile(r'READ: bw=(.*?)\s\(.*?\),\s(.*?)\s\(.*?\)')
 88 |     else:
 89 |         print(f"[FIO] Unsupported test type: {test_type}")
 90 | 
 91 |     match = pattern.search(fio_result)
 92 |     if match:
 93 |         values_list = [match.group(1), match.group(2)]
 94 |         result_string = f"{test_type} bandwidth is {values_list[0]} ({values_list[1]})"
 95 |         print(f"[FIO] result string : {result_string}")
 96 |     else:
 97 |         print("[FIO] Pattern not found in the fio result.")
 98 | 
 99 |     return result_string
100 | 
101 | 
102 | def delete_test_file():
103 |     '''
104 |     Function to delete the test file
105 |     '''
106 |     proc = subprocess.run(
107 |         f'rm test.0.0',
108 |         stdout=subprocess.PIPE,
109 |         stderr=subprocess.STDOUT,
110 |         shell=True
111 |     )
112 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/initial_options_file.ini:
--------------------------------------------------------------------------------
  1 | [Version]
  2 |   rocksdb_version=4.3.0
  3 |   options_file_version=1.1
  4 | 
  5 | [DBOptions]
  6 |   stats_dump_period_sec=600
  7 |   max_manifest_file_size=18446744073709551615
  8 |   bytes_per_sync=8388608
  9 |   delayed_write_rate=2097152
 10 |   WAL_ttl_seconds=0
 11 |   WAL_size_limit_MB=0
 12 |   max_subcompactions=1
 13 |   wal_bytes_per_sync=0
 14 |   db_write_buffer_size=0
 15 |   keep_log_file_num=1000
 16 |   table_cache_numshardbits=4
 17 |   max_file_opening_threads=1
 18 |   writable_file_max_buffer_size=1048576
 19 |   random_access_max_buffer_size=1048576
 20 |   use_fsync=false
 21 |   max_total_wal_size=0
 22 |   max_open_files=-1
 23 |   skip_stats_update_on_db_open=false
 24 |   max_background_compactions=16
 25 |   manifest_preallocation_size=4194304
 26 |   max_background_flushes=7
 27 |   is_fd_close_on_exec=true
 28 |   max_log_file_size=0
 29 |   advise_random_on_open=true
 30 |   create_missing_column_families=false
 31 |   paranoid_checks=true
 32 |   delete_obsolete_files_period_micros=21600000000
 33 |   log_file_time_to_roll=0
 34 |   compaction_readahead_size=0
 35 |   create_if_missing=false
 36 |   use_adaptive_mutex=false
 37 |   enable_thread_tracking=false
 38 |   allow_fallocate=true
 39 |   error_if_exists=false
 40 |   recycle_log_file_num=0
 41 |   skip_log_error_on_recovery=false
 42 |   new_table_reader_for_compaction_inputs=true
 43 |   allow_mmap_reads=false
 44 |   allow_mmap_writes=false
 45 |   use_direct_reads=false
 46 |   use_direct_writes=false
 47 | 
 48 | 
 49 | [CFOptions "default"]
 50 |   compaction_style=kCompactionStyleLevel
 51 |   compaction_filter=nullptr
 52 |   num_levels=6
 53 |   table_factory=BlockBasedTable
 54 |   comparator=leveldb.BytewiseComparator
 55 |   max_sequential_skip_in_iterations=8
 56 |   max_bytes_for_level_base=1073741824
 57 |   memtable_prefix_bloom_probes=6
 58 |   memtable_prefix_bloom_bits=0
 59 |   memtable_prefix_bloom_huge_page_tlb_size=0
 60 |   max_successive_merges=0
 61 |   arena_block_size=16777216
 62 |   min_write_buffer_number_to_merge=1
 63 |   target_file_size_multiplier=1
 64 |   source_compaction_factor=1
 65 |   max_bytes_for_level_multiplier=8
 66 |   max_bytes_for_level_multiplier_additional=2:3:5
 67 |   compaction_filter_factory=nullptr
 68 |   max_write_buffer_number=8
 69 |   level0_stop_writes_trigger=20
 70 |   compression=kSnappyCompression
 71 |   level0_file_num_compaction_trigger=4
 72 |   purge_redundant_kvs_while_flush=true
 73 |   max_write_buffer_size_to_maintain=0
 74 |   memtable_factory=SkipListFactory
 75 |   max_grandparent_overlap_factor=8
 76 |   expanded_compaction_factor=25
 77 |   hard_pending_compaction_bytes_limit=137438953472
 78 |   inplace_update_num_locks=10000
 79 |   level_compaction_dynamic_level_bytes=true
 80 |   level0_slowdown_writes_trigger=12
 81 |   filter_deletes=false
 82 |   verify_checksums_in_compaction=true
 83 |   min_partial_merge_operands=2
 84 |   paranoid_file_checks=false
 85 |   target_file_size_base=134217728
 86 |   optimize_filters_for_hits=false
 87 |   merge_operator=PutOperator
 88 |   compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression
 89 |   compaction_measure_io_stats=false
 90 |   prefix_extractor=nullptr
 91 |   bloom_locality=0
 92 |   write_buffer_size=134217728
 93 |   disable_auto_compactions=false
 94 |   inplace_update_support=false
 95 | 
 96 | [TableOptions/BlockBasedTable "default"]
 97 |   format_version=2
 98 |   whole_key_filtering=true
 99 |   no_block_cache=false
100 |   checksum=kCRC32c
101 |   filter_policy=rocksdb.BuiltinBloomFilter
102 |   block_size_deviation=10
103 |   block_size=8192
104 |   block_restart_interval=16
105 |   cache_index_and_filter_blocks=false
106 |   pin_l0_filter_and_index_blocks_in_cache=false
107 |   pin_top_level_index_and_filter=false
108 |   index_type=kBinarySearch
109 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory


--------------------------------------------------------------------------------
/options_files/ops_options_file.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import configparser
  4 | from utils.constants import DEFAULT_OPTION_FILE_DIR, INITIAL_OPTIONS_FILE_NAME, OPTIONS_FILE_DIR
  5 | from utils.filter import BLACKLIST
  6 | from utils.parse import dict_to_configparser, configparser_to_string
  7 | 
  8 | def parse_gpt_text_to_dict(gpt_output_text):
  9 |     '''
 10 |     Function to parse the gpt output text with filters
 11 | 
 12 |     Parameters:
 13 |     - gpt_output_text (str): The output generated by gpt
 14 | 
 15 |     Returns:
 16 |     - options_dict (dict): A dictionary containing the parsed data
 17 |     '''
 18 |     options_dict = {}
 19 | 
 20 |     for line in gpt_output_text.split("\n"):
 21 |         # Ignore lines starting with '#' as they are comments
 22 |         if not line.startswith('#'):
 23 |             # Split the line at the first '=' and strip whitespace
 24 |             parts = line.split(':', 1)
 25 |             if len(parts) == 1:
 26 |                 parts = line.split('=', 1)
 27 |             if len(parts) == 2:
 28 |                 # filters options that start with { - k
 29 |                 if '{' not in parts[1].strip():
 30 |                     # filters options that are in the blacklist
 31 |                     if parts[0].strip() not in BLACKLIST:
 32 |                         key, value = parts[0].strip(), parts[1].strip()
 33 |                         options_dict[key] = value
 34 | 
 35 |     return options_dict
 36 | 
 37 | def cleanup_options_file(gpt_options_text):
 38 |     """
 39 |     Function to clean up the options file generated by GPT
 40 |     - replace the values of the options in the original options file with the values generated by GPT-4
 41 |         eliminate 2 secnarios:
 42 |         1. ```ini<code>```
 43 |         2. ```<code>...``` w/ multiple code blocks
 44 | 
 45 |     Parameters:
 46 |     - gpt_options_text: string containing the options file generated by GPT-4
 47 | 
 48 |     Returns:
 49 |     - config_string: string containing the options file in the original format
 50 |     """
 51 |     clean_output_dict = parse_option_file_to_dict(open(f"{OPTIONS_FILE_DIR}").read())
 52 | 
 53 |     # Parse the GPT-4 generated options
 54 |     gpt_output_dict = parse_gpt_text_to_dict(gpt_options_text)
 55 | 
 56 |     # Update the original options with GPT-4 generated value
 57 |     for key, value in gpt_output_dict.items():
 58 |         for internal_dict in clean_output_dict:
 59 |             if key in clean_output_dict[internal_dict]:
 60 |                 clean_output_dict[internal_dict][key] = gpt_output_dict[key]
 61 | 
 62 |     # Convert dictionary to configparser
 63 |     config_parser = dict_to_configparser(clean_output_dict)
 64 |     config_string = configparser_to_string(config_parser)
 65 | 
 66 |     # Save to a file
 67 |     with open(f"{OPTIONS_FILE_DIR}", "w") as file:
 68 |         file.write(config_string)
 69 |     return config_string
 70 | 
 71 | def get_initial_options_file():
 72 |     '''
 73 |     Get the initial options file
 74 | 
 75 |     Parameters:
 76 |     - None
 77 | 
 78 |     Returns:
 79 |     - options (str): The initial options file
 80 |     - reasoning (str): The reasoning behind the options file
 81 |     '''
 82 |     initial_options_file_path = os.path.join(DEFAULT_OPTION_FILE_DIR,
 83 |                                         INITIAL_OPTIONS_FILE_NAME)
 84 |     with open(initial_options_file_path, "r") as f:
 85 |         options = f.read()
 86 | 
 87 |     reasoning = f"Initial options file: {initial_options_file_path}"
 88 | 
 89 |     return options, reasoning
 90 | 
 91 | 
 92 | def parse_option_file_to_dict(option_file):
 93 |     '''
 94 |     Function to parse the given option file to a dictionary
 95 | 
 96 |     Parameters:
 97 |     - option_file (str): The path to the option file
 98 | 
 99 |     Returns:
100 |     - parsed (dict): A dictionary containing the parsed data
101 |     '''
102 |     pat = re.compile("(.*)\s*([#].*)?")
103 |     config = configparser.ConfigParser()
104 |     config.read_string(option_file)
105 |     parsed = {section: dict(config.items(section))
106 |               for section in config.sections()}
107 |     for section_name, section in parsed.items():
108 |         for k, v in section.items():
109 |             m = pat.match(v)
110 |             section[k] = m[1]
111 |     return parsed
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/utils/graph.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | 
  3 | def plot(values, title, file):
  4 |     '''
  5 |     Plots a single line graph based on a list of values.
  6 | 
  7 |     This function plots a simple line graph where the X-axis represents the index of each value in the list, and the Y-axis represents the value itself.
  8 | 
  9 |     Parameters:
 10 |     values (list): A list of numerical values to be plotted.
 11 |     title (str): The title of the plot.
 12 |     file (str): The file path where the plot image will be saved.
 13 | 
 14 |     Returns:
 15 |     - None. The plot is saved to the specified file path.
 16 | 
 17 |     '''
 18 |     # Plotting
 19 |     plt.figure(figsize=(12, 6))
 20 |     plt.plot(range(len(values)), values, label=title, linestyle='-')
 21 | 
 22 |     plt.title(title)
 23 |     plt.legend()
 24 |     plt.grid(True)
 25 | 
 26 |     plt.ylim(0, 400000)
 27 | 
 28 |     # Save the plot to a file
 29 |     plt.savefig(file)
 30 | 
 31 | 
 32 | def plot_2axis(keys, values, title, file):
 33 |     '''
 34 |     Plots a line graph with specified keys and values.
 35 | 
 36 |     This function is designed to plot a line graph where the X-axis is determined by the 'keys' parameter and the Y-axis by the 'values' parameter.
 37 | 
 38 |     Parameters:
 39 |     keys (list): A list of keys or indices for the X-axis.
 40 |     values (list): A list of numerical values for the Y-axis.
 41 |     title (str): The title of the plot.
 42 |     file (str): The file path where the plot image will be saved.
 43 | 
 44 |     Returns:
 45 |     - None. The plot is saved to the specified file path.
 46 |     '''
 47 |     # Plotting
 48 |     plt.figure(figsize=(12, 6))
 49 |     plt.plot(keys, values, label=title, linestyle='-')
 50 | 
 51 |     plt.title(title)
 52 |     plt.legend()
 53 |     plt.grid(True)
 54 | 
 55 |     plt.ylim(0, 400000)
 56 | 
 57 |     # Save the plot to a file
 58 |     plt.savefig(file)
 59 | 
 60 | 
 61 | def plot_multiple(data, title, file):
 62 |     '''
 63 |     Plots multiple line graphs from a list of data sets.
 64 | 
 65 |     This function is used to plot multiple line graphs on the same plot. Each item in the 'data' list represents a different line on the graph.
 66 | 
 67 |     Parameters:
 68 |     data (list of tuples): Each tuple contains two elements - a list of keys for the X-axis and a list of values for the Y-axis.
 69 |     title (str): The title of the plot.
 70 |     file (str): The file path where the plot image will be saved.
 71 | 
 72 |     Each line is labeled as 'Iteration-i' where i is the index of the data set in the 'data' list.
 73 | 
 74 |     Returns:
 75 |     - None. The plot is saved to the specified file path.
 76 | 
 77 |     '''
 78 | 
 79 |     # Plotting setup
 80 |     plt.figure(figsize=(12, 6))
 81 |     for i, iteration in enumerate(data):
 82 |         keys, values = iteration[1]["ops_per_second_graph"]
 83 |         plt.plot(keys, values, label=f"Iteration-{i}", linestyle='-')
 84 | 
 85 |     plt.title(title)
 86 |     plt.legend()
 87 |     plt.grid(True)
 88 | 
 89 |     plt.ylim(0, 400000)
 90 | 
 91 |     # Save the plot to a file
 92 |     plt.savefig(file)
 93 | 
 94 | def plot_multiple_manual(data, file):
 95 |     # Plotting
 96 |     plt.figure(figsize=(16.5, 8))
 97 |     # labels = ["Default file", "Iteration 3", "Iteration 3", "Iteration 7"]
 98 |     labels = ["Default file", "Iteration 2", "Iteration 4", "Iteration 6"]
 99 |     colors = ['red', 'orange', 'royalblue', 'green'] 
100 |     for i, ops in enumerate(data):
101 |         plt.plot(ops, label=f"{labels[i]}", linestyle='-',color=colors[i])
102 |     plt.xlabel("Time (seconds)")  
103 |     plt.ylabel("Throughput (kops/s)")  
104 |     plt.legend()
105 | 
106 | 
107 |     plt.ylim(0, 400)
108 |     plt.tight_layout()
109 | 
110 |     # Save the plot to a file
111 |     plt.savefig(file)
112 | 
113 | 
114 | # pattern = r"\((\d+),(\d+)\) ops and \((\d+\.\d+),(\d+\.\d+)\) ops/second in \((\d+\.\d+),(\d+\.\d+)\) seconds"
115 | 
116 | # folder_path = "/data/gpt_project/gpt-assisted-rocksdb-config/saved_output/fillrandom/output_nvme_v2/c4_m4"
117 | # file_names = ['0.ini', '2.ini', '4.ini', '6.ini']
118 | # pattern = r'"ops_per_second_graph": \[\[([\d.,\s]+)\],\s+\[([\d.,\s]+)\]\]'
119 | 
120 | # data = []
121 | 
122 | # for file_name in file_names:
123 | #     file_path = os.path.join(folder_path, file_name)
124 | #     with open(file_path, 'r') as f:
125 | #         file_contents = f.read()
126 | #         matches = re.findall(pattern, file_contents)
127 | #         ops = [float(x)/1000 for x in matches[0][1].split(', ')]
128 | #         data.append(ops)
129 | 
130 | # plot_multiple_manual(data, "Ops_per_Second_combined.png")
131 | 
132 | 


--------------------------------------------------------------------------------
/utils/system_operations/get_sys_info.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import psutil
  3 | import subprocess
  4 | import platform
  5 | from cpuinfo import get_cpu_info
  6 | from cgroup_monitor import CGroupMonitor
  7 | 
  8 | def get_system_data(db_path):
  9 |     '''
 10 |     Function to get the system data
 11 |     
 12 |     Parameters:
 13 |     - db_path (str): The path of database
 14 |     
 15 |     Returns:
 16 |     - brand_raw_value (str): The CPU model name
 17 |     - memory_total (int): The total memory
 18 |     - swap (int): The swap memory
 19 |     - total_disk_size (int): The total disk size
 20 |     - device (str): The device name
 21 |     '''
 22 |     cgroup_monitor = CGroupMonitor()
 23 |     try:
 24 |         cpu_count = os.getenv("CPU_COUNT", str(cgroup_monitor.get_cpu_limit()))
 25 |         mem_max = os.getenv("MEMORY_MAX", str(cgroup_monitor.get_memory_limit()))
 26 | 
 27 |         # gets the CPU op-modes
 28 |         system_info = platform.uname()
 29 |         cpu_op_modes = system_info.processor
 30 | 
 31 |         # gets the CPU model name
 32 |         cpu_model = platform.processor()
 33 | 
 34 |         # get all the CPU cache sizes
 35 |         cpu_info = get_cpu_info()
 36 |         brand_raw_value = cpu_count + " cores of " + cpu_info['brand_raw']
 37 | 
 38 |         l1_data_cache_size = cpu_info.get('l1_data_cache_size', 'N/A')
 39 |         l1_instruction_cache_size = cpu_info.get(
 40 |             'l1_instruction_cache_size', 'N/A')
 41 |         l2_cache_size = cpu_info.get('l2_cache_size', 'N/A')
 42 |         l3_cache_size = cpu_info.get('l3_cache_size', 'N/A')
 43 | 
 44 |         # get the total memory
 45 |         # memory_total = psutil.virtual_memory().total
 46 |         memory_total = float(mem_max)
 47 | 
 48 |         # gets the percentage of RAM used
 49 |         memory_used = psutil.virtual_memory().percent
 50 | 
 51 |         # gets the percentage of RAM available
 52 |         memeory_remaining = psutil.virtual_memory().available * 100 / \
 53 |             psutil.virtual_memory().total
 54 | 
 55 |         # gets the disk information
 56 |         # partitions = psutil.disk_partitions(all=True)
 57 | 
 58 |         swap = psutil.swap_memory()
 59 | 
 60 |         partitions = psutil.disk_partitions(all=False)
 61 |         path = os.path.dirname(db_path)
 62 |         total_disk_size = -1
 63 |         device = ""
 64 |         all_devices = check_drive_type()
 65 |         data_directory = path[:5]
 66 |         for partition in partitions:
 67 |             usage = psutil.disk_usage(partition.mountpoint)
 68 |             if (partition.mountpoint[:5] == data_directory):
 69 |                 total_disk_size = usage.total
 70 |                 if (partition.device.split('/')[-1] in all_devices):
 71 |                     device = all_devices[partition.device.split('/')[-1]]
 72 |                 elif (partition.device.split('/')[-1][:-1] in all_devices):
 73 |                     device = all_devices[partition.device.split('/')[-1][:-1]]
 74 | 
 75 |         # returns all the system data required
 76 |         return brand_raw_value, memory_total, swap, total_disk_size, device
 77 | 
 78 |     except Exception as e:
 79 |         print(f"[SYS] Error in fetching system data: {e}")
 80 |         return None
 81 | 
 82 | # Check drive type
 83 | def check_drive_type():
 84 |     '''
 85 |     Function to check the drive type
 86 |     
 87 |     Returns:
 88 |     - drive_types (dict): A dictionary containing the drive types
 89 |     '''
 90 |     # Path where the drive information is stored
 91 |     sys_block_path = "/sys/block"
 92 |     # Check if the path exists
 93 |     if os.path.exists(sys_block_path):
 94 |         # List of all devices
 95 |         devices = os.listdir(sys_block_path)
 96 |         drive_types = {}
 97 |         # Iterate through each device
 98 |         for device in devices:
 99 |             try:
100 |                 with open(f"{sys_block_path}/{device}/queue/rotational", "r") as file:
101 |                     rotational = file.read().strip()
102 |                     if rotational == "0":
103 |                         drive_types[device] = "SSD"
104 |                     else:
105 |                         drive_types[device] = "HDD"
106 |             except IOError:
107 |                 # Unable to read the rotational file for this device
108 |                 pass
109 |         return drive_types
110 |     else:
111 |         return "System block path does not exist."
112 | 
113 | def system_info(db_path, fio_result):
114 |     '''
115 |     Fetch system data for further runs 
116 | 
117 |     Parameters:
118 |     - db_path (str): The path of database
119 |     - fio_result (str): The result of fio benchmark
120 |     '''
121 |     system_data = get_system_data(db_path)
122 |     data = (f"{system_data[0]} with {system_data[1]}GiB of Memory and {system_data[1]}GiB of Swap space."
123 |             f"{system_data[4]} size : {system_data[3]/(1024 ** 4):.2f}T. A single instance of RocksDB is the always going to be the only process running. "
124 |             f"{fio_result}")
125 |     return data
126 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import getpass
  4 | from datetime import datetime
  5 | from collections import defaultdict
  6 | from deepdiff import DeepDiff
  7 | from utils.constants import OUTPUT_PATH, DEVICE, DB_PATH
  8 | 
  9 | # LOG UTILS
 10 | def log_update(update_string):
 11 |     '''
 12 |     Update the log file with the given string
 13 | 
 14 |     Parameters:
 15 |     - update_string (str): The string to be updated in the log file
 16 | 
 17 |     Returns:
 18 |     - None
 19 |     '''
 20 |     current_datetime = datetime.now()
 21 |     date_time_string = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
 22 |     update_string = f"[{date_time_string}] {update_string}"
 23 |     
 24 |     if OUTPUT_PATH is None:
 25 |         with open(f"log.txt", "a+") as f:
 26 |             f.write(update_string + "\n")
 27 |     else:
 28 |         with open(f"{OUTPUT_PATH}/log.txt", "a+") as f:
 29 |             f.write(update_string + "\n")
 30 | 
 31 | # STORE FILE UTILS
 32 | def store_db_bench_output(output_folder_name, output_file_name,
 33 |                           benchmark_results, options_file, reasoning):
 34 |     '''
 35 |     Store the output of db_bench in a file
 36 | 
 37 |     Parameters:
 38 |     - output_folder_name (str): Name of the folder to store the output file
 39 |     - output_file_name (str): Name of the output file
 40 |     - benchmark_results (dict): Dictionary containing the benchmark results
 41 |     - options_file (str): The options file used to generate the benchmark results
 42 |     - reasoning (str): The reasoning behind the options file
 43 | 
 44 |     Returns:
 45 |     - None
 46 |     '''
 47 |     with open(f"{output_folder_name}/{output_file_name}", "a+") as f:
 48 |         f.write("# " + json.dumps(benchmark_results) + "\n\n")
 49 |         f.write(options_file + "\n")
 50 |         for line in reasoning.splitlines():
 51 |             f.write("# " + line + "\n")
 52 | 
 53 | def store_best_option_file(options_files, output_folder_dir):
 54 |     '''
 55 |     Save the best option file
 56 | 
 57 |     Parameters:
 58 |     - options_files (list): List of options files
 59 |     - output_folder_dir (str): The output directory
 60 |     '''
 61 |     best_result = max(options_files, key=lambda x: x[1]["ops_per_sec"])
 62 |     best_options = best_result[0]
 63 |     best_reasoning = best_result[2]
 64 |     with open(f"{output_folder_dir}/best_options.ini", "w") as f:
 65 |         f.write(best_options)
 66 |         for line in best_reasoning.splitlines():
 67 |             f.write("# " + line + "\n")
 68 | 
 69 | def store_diff_options_list(options_list, output_folder_dir):
 70 |     # Calculate differences between options_list
 71 |     differences = calculate_differences(options_list)
 72 |     changed_fields_frequency = defaultdict(lambda: 0)
 73 | 
 74 |     with open(f"{output_folder_dir}/diffOptions.txt", 'w') as f:
 75 |         for i, diff in enumerate(differences, start=1):
 76 |             f.write(f"[MFN] Differences between iteration {i} and iteration {i + 1}: \n")
 77 |             f.write(json.dumps(diff, indent=4))
 78 |             f.write("\n")
 79 |             f.write("=" * 50)
 80 |             f.write("\n\n")
 81 | 
 82 |             for key in diff["values_changed"]:
 83 |                 changed_fields_frequency[key] += 1
 84 | 
 85 |         f.write("\n\n[MFN] Changed Fields Frequency:\n")
 86 |         f.write(json.dumps(changed_fields_frequency, indent=4))
 87 | 
 88 | # PATH UTILS
 89 | def path_of_db():
 90 |     '''
 91 |     Choose the database path
 92 | 
 93 |     Parameters:
 94 |     - None
 95 | 
 96 |     Returns:
 97 |     - db_path (str): The path of the database
 98 |     '''
 99 |     user_name = getpass.getuser()
100 |     db_path_name = DB_PATH + user_name[0].lower()
101 |     db_path = os.getenv("DB_PATH", db_path_name)
102 |     # log_update(f"[UTL] Using database path: {db_path}")
103 |     print(f"[UTL] Using database path: {db_path}")
104 | 
105 |     return db_path
106 | 
107 | def path_of_output_folder():
108 |     '''
109 |     Set the output folder directory
110 | 
111 |     Parameters:
112 |     - None
113 | 
114 |     Returns:
115 |     - output_folder_dir (str): The output folder directory
116 |     '''
117 |     current_datetime = datetime.now()
118 |     date_time_string = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
119 |     if OUTPUT_PATH is None:
120 |         output_folder_dir = f"output/output_{DEVICE}/output_{date_time_string}"
121 |     else:
122 |         output_folder_dir = OUTPUT_PATH
123 | 
124 |     os.makedirs(output_folder_dir, exist_ok=True)
125 |     log_update(f"[UTL] Using output folder: {output_folder_dir}")
126 |     print(f"[UTL] Using output folder: {output_folder_dir}")
127 | 
128 |     return output_folder_dir
129 | 
130 | # OTHER UTILS
131 | def calculate_differences(iterations):
132 |     '''
133 |     Function to calculate the differences between the iterations
134 | 
135 |     Parameters:
136 |     - iterations (list): A list of the iterations
137 | 
138 |     Returns:
139 |     - differences (list): A list of the differences between the iterations
140 |     '''
141 |     differences = []
142 |     for i in range(1, len(iterations)):
143 |         diff = DeepDiff(iterations[i-1], iterations[i])
144 |         differences.append(diff)
145 |     return differences
146 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import utils.constants as constants
  2 | from utils.graph import plot, plot_multiple
  3 | from utils.system_operations.fio_runner import get_fio_result
  4 | from options_files.ops_options_file import parse_option_file_to_dict, get_initial_options_file
  5 | 
  6 | import rocksdb.subprocess_manager as spm
  7 | from utils.utils import log_update, store_best_option_file, path_of_db, store_diff_options_list
  8 | from utils.system_operations.get_sys_info import system_info
  9 | from gpt.prompts_generator import generate_option_file_with_gpt
 10 | import os
 11 | 
 12 | def main():
 13 |     '''
 14 |     Main function to run the project. This function will run the db_bench with the initial options file and then
 15 |     generate new options files using GPT API and run db_bench with the new options file. This function will also
 16 |     store the output of db_bench in a file. The output file will contain the benchmark results, the options file
 17 |     used to generate the benchmark results and the reasoning behind the options file as provided by the GPT API.
 18 |     There will be a separate file for each iteration.
 19 | 
 20 |     Parameters:
 21 |     - None
 22 | 
 23 |     Returns:
 24 |     - None
 25 |     '''
 26 | 
 27 |     # initialize variables
 28 |     options_files = []
 29 |     options_list = []
 30 | 
 31 |     # Set up the path
 32 |     output_folder_dir = constants.OUTPUT_PATH
 33 |     os.makedirs(output_folder_dir, exist_ok=True)
 34 |     db_path = path_of_db()
 35 |     fio_result = get_fio_result(constants.FIO_RESULT_PATH)
 36 | 
 37 |     log_update(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}")
 38 |     print(f"[MFN] Starting the program with the case number: {constants.CASE_NUMBER}")
 39 | 
 40 |     # First run, Initial options file and see how the results are
 41 |     options, reasoning = get_initial_options_file()
 42 | 
 43 |     is_error, benchmark_results, average_cpu_usage, average_memory_usage, options = spm.benchmark(
 44 |         db_path, options, output_folder_dir, reasoning, 0, None, options_files)
 45 | 
 46 |     if is_error:
 47 |         # If the initial options file fails, exit the program
 48 |         log_update("[MFN] Failed to benchmark with the initial options file. Exiting.")
 49 |         print("[MFN] Failed to benchmark with the initial options file. Exiting.")
 50 |         exit(1)
 51 |     else:
 52 |         # If the initial options file succeeds, store the options file and benchmark results, pass it to the GPT API to generate a new options file
 53 |         parsed_options = parse_option_file_to_dict(options)
 54 |         options_list.append(parsed_options)
 55 | 
 56 |         # Maintain a list of options files, benchmark results and why that option file was generated
 57 |         options_files.append((options, benchmark_results, reasoning, ""))
 58 | 
 59 |         iteration_count = 7
 60 | 
 61 |         for i in range(1, iteration_count + 1):
 62 | 
 63 |             log_update(f"[MFN] Starting iteration {i}")
 64 |             log_update(f"[MFN] Querying ChatGPT for next options file")
 65 | 
 66 |             print("-" * 50)
 67 |             print(f"[MFN] Starting iteration {i}")
 68 | 
 69 |             print("[MFN] Querying ChatGPT for next options file")
 70 |             temperature = 0.4
 71 |             retry_counter = 5
 72 |             generated = False
 73 | 
 74 |             for gpt_query_count in range(retry_counter, 0, -1):
 75 |                 # Generate new options file with retry limit of 5
 76 | 
 77 |                 new_options_file, reasoning, summary_of_changes = generate_option_file_with_gpt(
 78 |                     constants.CASE_NUMBER, options_files,
 79 |                     system_info(db_path, fio_result), temperature,
 80 |                     average_cpu_usage, average_memory_usage, 
 81 |                     constants.TEST_NAME, constants.VERSION)
 82 |                 if new_options_file is None:
 83 |                     log_update(f"[MFN] Failed to generate options file. Retrying. Retries left: {gpt_query_count - 1}")
 84 |                     print("[MFN] Failed to generate options file. Retrying. Retries left: ", gpt_query_count - 1)
 85 |                     continue
 86 | 
 87 |                 # Parse output
 88 |                 is_error, benchmark_results, average_cpu_usage, average_memory_usage, new_options_file = spm.benchmark(
 89 |                     db_path, new_options_file, output_folder_dir, reasoning, iteration_count, benchmark_results, options_files)
 90 |                 if is_error:
 91 |                     log_update(f"[MFN] Benchmark failed. Retrying with new options file. Retries left: {gpt_query_count - 1}")
 92 |                     print("[MFN] Benchmark failed. Retrying with new options file. Retries left: ", gpt_query_count - 1)
 93 |                     temperature += 0.1
 94 |                     continue
 95 |                 else:
 96 |                     generated = True
 97 |                     break
 98 | 
 99 |             if generated:
100 |                 options = new_options_file
101 |                 options_files.append((options, benchmark_results, reasoning,
102 |                                       summary_of_changes))
103 |                 parsed_options = parse_option_file_to_dict(options)
104 |                 options_list.append(parsed_options)
105 |             else:
106 |                 log_update("[MFN] Failed to generate options file over 5 times. Exiting.")
107 |                 print("[MFN] Failed to generate options file over 5 times. Exiting.")
108 |                 exit(1)
109 | 
110 |         store_best_option_file(options_files, output_folder_dir)
111 | 
112 |         # Graph Ops/Sec
113 |         plot([e[1]["ops_per_sec"] for e in options_files], "OpsPerSec",
114 |              f"{output_folder_dir}/OpsPerSec.png")
115 |         plot_multiple(options_files, "Ops Per Second",
116 |                       f"{output_folder_dir}/opsM_per_sec.png")
117 |         
118 |         store_diff_options_list(options_list, output_folder_dir)
119 | 
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     main()
124 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/dbbench_default_options-7.10.2.ini:
--------------------------------------------------------------------------------
  1 | # This is a RocksDB option file.
  2 | #
  3 | # For detailed file format spec, please refer to the example file
  4 | # in examples/rocksdb_option_file_example.ini
  5 | #
  6 | 
  7 | [Version]
  8 |   rocksdb_version=7.10.2
  9 |   options_file_version=1.1
 10 | 
 11 | [DBOptions]
 12 |   max_open_files=-1
 13 |   stats_history_buffer_size=1048576
 14 |   stats_persist_period_sec=600
 15 |   max_background_flushes=-1
 16 |   stats_dump_period_sec=600
 17 |   compaction_readahead_size=0
 18 |   bytes_per_sync=0
 19 |   delete_obsolete_files_period_micros=21600000000
 20 |   max_total_wal_size=0
 21 |   delayed_write_rate=8388608
 22 |   wal_bytes_per_sync=0
 23 |   writable_file_max_buffer_size=1048576
 24 |   avoid_flush_during_shutdown=false
 25 |   max_subcompactions=1
 26 |   strict_bytes_per_sync=false
 27 |   max_background_compactions=-1
 28 |   max_background_jobs=2
 29 |   lowest_used_cache_tier=kNonVolatileBlockTier
 30 |   bgerror_resume_retry_interval=1000000
 31 |   max_bgerror_resume_count=2147483647
 32 |   best_efforts_recovery=false
 33 |   write_dbid_to_manifest=false
 34 |   avoid_unnecessary_blocking_io=false
 35 |   atomic_flush=false
 36 |   log_readahead_size=0
 37 |   dump_malloc_stats=true
 38 |   info_log_level=INFO_LEVEL
 39 |   write_thread_max_yield_usec=100
 40 |   max_write_batch_group_size_bytes=1048576
 41 |   wal_compression=kNoCompression
 42 |   write_thread_slow_yield_usec=3
 43 |   enable_pipelined_write=true
 44 |   persist_stats_to_disk=false
 45 |   max_manifest_file_size=1073741824
 46 |   WAL_size_limit_MB=0
 47 |   fail_if_options_file_error=false
 48 |   max_log_file_size=0
 49 |   manifest_preallocation_size=4194304
 50 |   listeners={ErrorHandlerListener:ErrorHandlerListener}
 51 |   log_file_time_to_roll=0
 52 |   allow_data_in_errors=false
 53 |   WAL_ttl_seconds=0
 54 |   recycle_log_file_num=0
 55 |   file_checksum_gen_factory=nullptr
 56 |   keep_log_file_num=1000
 57 |   db_write_buffer_size=0
 58 |   table_cache_numshardbits=4
 59 |   use_adaptive_mutex=false
 60 |   allow_ingest_behind=false
 61 |   skip_checking_sst_file_sizes_on_db_open=false
 62 |   skip_stats_update_on_db_open=false
 63 |   random_access_max_buffer_size=1048576
 64 |   access_hint_on_compaction_start=NORMAL
 65 |   allow_concurrent_memtable_write=true
 66 |   track_and_verify_wals_in_manifest=false
 67 |   paranoid_checks=true
 68 |   max_file_opening_threads=16
 69 |   verify_sst_unique_id_in_manifest=true
 70 |   avoid_flush_during_recovery=false
 71 |   flush_verify_memtable_count=true
 72 |   db_host_id=__hostname__
 73 |   error_if_exists=false
 74 |   wal_recovery_mode=kPointInTimeRecovery
 75 |   enable_thread_tracking=false
 76 |   is_fd_close_on_exec=true
 77 |   enforce_single_del_contracts=true
 78 |   create_missing_column_families=true
 79 |   create_if_missing=true
 80 |   use_fsync=false
 81 |   wal_filter=nullptr
 82 |   allow_2pc=false
 83 |   use_direct_io_for_flush_and_compaction=false
 84 |   manual_wal_flush=false
 85 |   enable_write_thread_adaptive_yield=true
 86 |   use_direct_reads=false
 87 |   allow_mmap_writes=false
 88 |   allow_fallocate=true
 89 |   two_write_queues=false
 90 |   allow_mmap_reads=false
 91 |   unordered_write=false
 92 |   advise_random_on_open=true
 93 |   
 94 | 
 95 | [CFOptions "default"]
 96 |   memtable_protection_bytes_per_key=0
 97 |   sample_for_compression=0
 98 |   blob_file_starting_level=0
 99 |   blob_compaction_readahead_size=0
100 |   blob_garbage_collection_force_threshold=1.000000
101 |   enable_blob_garbage_collection=false
102 |   min_blob_size=0
103 |   last_level_temperature=kUnknown
104 |   enable_blob_files=false
105 |   target_file_size_base=67108864
106 |   max_sequential_skip_in_iterations=8
107 |   prepopulate_blob_cache=kDisable
108 |   compaction_options_fifo={allow_compaction=true;age_for_warm=0;max_table_files_size=0;}
109 |   max_bytes_for_level_multiplier=10.000000
110 |   max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
111 |   max_bytes_for_level_base=268435456
112 |   experimental_mempurge_threshold=0.000000
113 |   write_buffer_size=67108864
114 |   bottommost_compression=kDisableCompressionOption
115 |   prefix_extractor=nullptr
116 |   blob_file_size=268435456
117 |   memtable_huge_page_size=0
118 |   max_successive_merges=0
119 |   compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;}
120 |   arena_block_size=1048576
121 |   memtable_whole_key_filtering=false
122 |   target_file_size_multiplier=1
123 |   max_write_buffer_number=2
124 |   blob_compression_type=kNoCompression
125 |   compression=kSnappyCompression
126 |   level0_stop_writes_trigger=36
127 |   level0_slowdown_writes_trigger=20
128 |   level0_file_num_compaction_trigger=4
129 |   ignore_max_compaction_bytes_for_input=true
130 |   max_compaction_bytes=1677721600
131 |   compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
132 |   memtable_prefix_bloom_size_ratio=0.000000
133 |   hard_pending_compaction_bytes_limit=137438953472
134 |   bottommost_compression_opts={max_dict_buffer_bytes=0;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;level=32767;window_bits=-14;}
135 |   blob_garbage_collection_age_cutoff=0.250000
136 |   ttl=2592000
137 |   soft_pending_compaction_bytes_limit=68719476736
138 |   inplace_update_num_locks=10000
139 |   paranoid_file_checks=false
140 |   check_flush_compaction_key_order=true
141 |   periodic_compaction_seconds=0
142 |   disable_auto_compactions=false
143 |   report_bg_io_stats=false
144 |   compaction_style=kCompactionStyleLevel
145 |   merge_operator=nullptr
146 |   compaction_filter_factory=nullptr
147 |   sst_partitioner_factory=nullptr
148 |   table_factory=BlockBasedTable
149 |   memtable_factory=SkipListFactory
150 |   comparator=leveldb.BytewiseComparator
151 |   compaction_pri=kMinOverlappingRatio
152 |   bloom_locality=0
153 |   num_levels=7
154 |   min_write_buffer_number_to_merge=1
155 |   compaction_filter=nullptr
156 |   max_write_buffer_size_to_maintain=0
157 |   max_write_buffer_number_to_maintain=0
158 |   memtable_insert_with_hint_prefix_extractor=nullptr
159 |   preclude_last_level_data_seconds=0
160 |   force_consistency_checks=true
161 |   optimize_filters_for_hits=false
162 |   level_compaction_dynamic_file_size=true
163 |   level_compaction_dynamic_level_bytes=false
164 |   preserve_internal_time_seconds=0
165 |   inplace_update_support=false
166 |   
167 | [TableOptions/BlockBasedTable "default"]
168 |   num_file_reads_for_auto_readahead=2
169 |   initial_auto_readahead_size=8192
170 |   metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
171 |   enable_index_compression=true
172 |   pin_top_level_index_and_filter=false
173 |   read_amp_bytes_per_bit=0
174 |   verify_compression=false
175 |   prepopulate_block_cache=kDisable
176 |   format_version=5
177 |   partition_filters=false
178 |   metadata_block_size=4096
179 |   max_auto_readahead_size=262144
180 |   index_block_restart_interval=1
181 |   block_size_deviation=10
182 |   block_size=4096
183 |   detect_filter_construct_corruption=false
184 |   no_block_cache=false
185 |   checksum=kXXH3
186 |   filter_policy=nullptr
187 |   data_block_hash_table_util_ratio=0.750000
188 |   block_restart_interval=16
189 |   index_type=kBinarySearch
190 |   pin_l0_filter_and_index_blocks_in_cache=false
191 |   data_block_index_type=kDataBlockBinarySearch
192 |   cache_index_and_filter_blocks_with_high_priority=true
193 |   whole_key_filtering=true
194 |   index_shortening=kShortenSeparatorsAndSuccessor
195 |   cache_index_and_filter_blocks=false
196 |   block_align=false
197 |   optimize_filters_for_memory=false
198 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory
199 |   
200 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/bad_options.ini:
--------------------------------------------------------------------------------
  1 | # This is a RocksDB option file.
  2 | #
  3 | # For detailed file format spec, please refer to the example file
  4 | # in examples/rocksdb_option_file_example.ini
  5 | #
  6 | 
  7 | [Version]
  8 |   rocksdb_version=8.8.1
  9 |   options_file_version=1.1
 10 | 
 11 | [DBOptions]
 12 |   max_background_flushes=1
 13 |   compaction_readahead_size=2097152
 14 |   wal_bytes_per_sync=0
 15 |   bytes_per_sync=0
 16 |   max_open_files=10
 17 |   stats_history_buffer_size=1048576
 18 |   stats_dump_period_sec=600
 19 |   stats_persist_period_sec=600
 20 |   delete_obsolete_files_period_micros=21600000000
 21 |   max_total_wal_size=0
 22 |   strict_bytes_per_sync=false
 23 |   delayed_write_rate=16777216
 24 |   avoid_flush_during_shutdown=false
 25 |   writable_file_max_buffer_size=1048576
 26 |   max_subcompactions=1
 27 |   max_background_compactions=1
 28 |   max_background_jobs=1
 29 |   lowest_used_cache_tier=kNonVolatileBlockTier
 30 |   bgerror_resume_retry_interval=1000000
 31 |   max_bgerror_resume_count=2147483647
 32 |   best_efforts_recovery=false
 33 |   write_dbid_to_manifest=false
 34 |   avoid_unnecessary_blocking_io=false
 35 |   atomic_flush=false
 36 |   log_readahead_size=0
 37 |   dump_malloc_stats=false
 38 |   info_log_level=INFO_LEVEL
 39 |   write_thread_max_yield_usec=100
 40 |   max_write_batch_group_size_bytes=1048576
 41 |   wal_compression=kNoCompression
 42 |   write_thread_slow_yield_usec=3
 43 |   enable_pipelined_write=false
 44 |   persist_stats_to_disk=false
 45 |   max_manifest_file_size=1073741824
 46 |   WAL_size_limit_MB=0
 47 |   fail_if_options_file_error=true
 48 |   max_log_file_size=0
 49 |   manifest_preallocation_size=4194304
 50 |   log_file_time_to_roll=0
 51 |   allow_data_in_errors=false
 52 |   WAL_ttl_seconds=0
 53 |   recycle_log_file_num=0
 54 |   file_checksum_gen_factory=nullptr
 55 |   keep_log_file_num=1000
 56 |   db_write_buffer_size=0
 57 |   table_cache_numshardbits=6
 58 |   use_adaptive_mutex=false
 59 |   allow_ingest_behind=false
 60 |   skip_checking_sst_file_sizes_on_db_open=false
 61 |   random_access_max_buffer_size=1048576
 62 |   access_hint_on_compaction_start=NORMAL
 63 |   allow_concurrent_memtable_write=true
 64 |   track_and_verify_wals_in_manifest=false
 65 |   skip_stats_update_on_db_open=false
 66 |   compaction_verify_record_count=true
 67 |   paranoid_checks=true
 68 |   max_file_opening_threads=16
 69 |   verify_sst_unique_id_in_manifest=true
 70 |   avoid_flush_during_recovery=false
 71 |   flush_verify_memtable_count=true
 72 |   db_host_id=__hostname__
 73 |   error_if_exists=false
 74 |   wal_recovery_mode=kPointInTimeRecovery
 75 |   enable_thread_tracking=false
 76 |   is_fd_close_on_exec=true
 77 |   enforce_single_del_contracts=true
 78 |   create_missing_column_families=false
 79 |   create_if_missing=true
 80 |   use_fsync=false
 81 |   wal_filter=nullptr
 82 |   allow_2pc=false
 83 |   use_direct_io_for_flush_and_compaction=false
 84 |   manual_wal_flush=false
 85 |   enable_write_thread_adaptive_yield=true
 86 |   use_direct_reads=false
 87 |   allow_mmap_writes=false
 88 |   allow_fallocate=true
 89 |   two_write_queues=false
 90 |   allow_mmap_reads=false
 91 |   unordered_write=false
 92 |   advise_random_on_open=true
 93 |   
 94 | 
 95 | [CFOptions "default"]
 96 |   memtable_max_range_deletions=0
 97 |   block_protection_bytes_per_key=0
 98 |   memtable_protection_bytes_per_key=0
 99 |   sample_for_compression=0
100 |   blob_file_starting_level=0
101 |   blob_compaction_readahead_size=0
102 |   blob_garbage_collection_force_threshold=1.000000
103 |   enable_blob_garbage_collection=false
104 |   min_blob_size=0
105 |   last_level_temperature=kUnknown
106 |   enable_blob_files=false
107 |   target_file_size_base=16777216
108 |   max_sequential_skip_in_iterations=8
109 |   prepopulate_blob_cache=kDisable
110 |   compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 |   max_bytes_for_level_multiplier=10.000000
112 |   max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 |   max_bytes_for_level_base=268435456
114 |   experimental_mempurge_threshold=0.000000
115 |   write_buffer_size=16777216
116 |   bottommost_compression=kDisableCompressionOption
117 |   prefix_extractor=nullptr
118 |   blob_file_size=268435456
119 |   memtable_huge_page_size=0
120 |   bottommost_file_compaction_delay=0
121 |   max_successive_merges=0
122 |   compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 |   arena_block_size=1048576
124 |   memtable_whole_key_filtering=false
125 |   target_file_size_multiplier=1
126 |   max_write_buffer_number=2
127 |   blob_compression_type=kNoCompression
128 |   compression=kSnappyCompression
129 |   level0_stop_writes_trigger=20
130 |   level0_slowdown_writes_trigger=10
131 |   level0_file_num_compaction_trigger=6
132 |   ignore_max_compaction_bytes_for_input=true
133 |   max_compaction_bytes=1677721600
134 |   compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 |   memtable_prefix_bloom_size_ratio=0.000000
136 |   hard_pending_compaction_bytes_limit=21474836480
137 |   bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 |   blob_garbage_collection_age_cutoff=0.250000
139 |   ttl=2592000
140 |   soft_pending_compaction_bytes_limit=68719476736
141 |   inplace_update_num_locks=10000
142 |   paranoid_file_checks=false
143 |   check_flush_compaction_key_order=true
144 |   periodic_compaction_seconds=0
145 |   disable_auto_compactions=false
146 |   report_bg_io_stats=false
147 |   compaction_pri=kMinOverlappingRatio
148 |   compaction_style=kCompactionStyleLevel
149 |   merge_operator=nullptr
150 |   table_factory=BlockBasedTable
151 |   memtable_factory=SkipListFactory
152 |   comparator=leveldb.BytewiseComparator
153 |   compaction_filter_factory=nullptr
154 |   num_levels=7
155 |   min_write_buffer_number_to_merge=1
156 |   bloom_locality=0
157 |   max_write_buffer_size_to_maintain=0
158 |   sst_partitioner_factory=nullptr
159 |   preserve_internal_time_seconds=0
160 |   preclude_last_level_data_seconds=0
161 |   max_write_buffer_number_to_maintain=0
162 |   default_temperature=kUnknown
163 |   optimize_filters_for_hits=false
164 |   level_compaction_dynamic_file_size=false
165 |   memtable_insert_with_hint_prefix_extractor=nullptr
166 |   level_compaction_dynamic_level_bytes=true
167 |   inplace_update_support=false
168 |   persist_user_defined_timestamps=true
169 |   compaction_filter=nullptr
170 |   force_consistency_checks=true
171 |   
172 | [TableOptions/BlockBasedTable "default"]
173 |   num_file_reads_for_auto_readahead=2
174 |   initial_auto_readahead_size=8192
175 |   metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 |   enable_index_compression=true
177 |   pin_top_level_index_and_filter=true
178 |   read_amp_bytes_per_bit=0
179 |   verify_compression=false
180 |   prepopulate_block_cache=kDisable
181 |   format_version=5
182 |   partition_filters=false
183 |   metadata_block_size=4096
184 |   max_auto_readahead_size=262144
185 |   index_block_restart_interval=1
186 |   block_size_deviation=10
187 |   block_size=4096
188 |   detect_filter_construct_corruption=false
189 |   no_block_cache=false
190 |   checksum=kXXH3
191 |   filter_policy=nullptr
192 |   data_block_hash_table_util_ratio=0.750000
193 |   block_restart_interval=16
194 |   index_type=kBinarySearch
195 |   pin_l0_filter_and_index_blocks_in_cache=false
196 |   data_block_index_type=kDataBlockBinarySearch
197 |   cache_index_and_filter_blocks_with_high_priority=true
198 |   whole_key_filtering=true
199 |   index_shortening=kShortenSeparators
200 |   cache_index_and_filter_blocks=false
201 |   block_align=false
202 |   optimize_filters_for_memory=false
203 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |   
205 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/good_options.ini:
--------------------------------------------------------------------------------
  1 | # This is a RocksDB option file.
  2 | #
  3 | # For detailed file format spec, please refer to the example file
  4 | # in examples/rocksdb_option_file_example.ini
  5 | #
  6 | 
  7 | [Version]
  8 |   rocksdb_version=8.8.1
  9 |   options_file_version=1.1
 10 | 
 11 | [DBOptions]
 12 |   max_background_flushes=-1
 13 |   compaction_readahead_size=2097152
 14 |   wal_bytes_per_sync=0
 15 |   bytes_per_sync=0
 16 |   max_open_files=-1
 17 |   stats_history_buffer_size=1048576
 18 |   stats_dump_period_sec=600
 19 |   stats_persist_period_sec=600
 20 |   delete_obsolete_files_period_micros=21600000000
 21 |   max_total_wal_size=0
 22 |   strict_bytes_per_sync=false
 23 |   delayed_write_rate=16777216
 24 |   avoid_flush_during_shutdown=false
 25 |   writable_file_max_buffer_size=1048576
 26 |   max_subcompactions=8
 27 |   max_background_compactions=-1
 28 |   max_background_jobs=12
 29 |   lowest_used_cache_tier=kNonVolatileBlockTier
 30 |   bgerror_resume_retry_interval=1000000
 31 |   max_bgerror_resume_count=2147483647
 32 |   best_efforts_recovery=false
 33 |   write_dbid_to_manifest=false
 34 |   avoid_unnecessary_blocking_io=false
 35 |   atomic_flush=false
 36 |   log_readahead_size=0
 37 |   dump_malloc_stats=false
 38 |   info_log_level=INFO_LEVEL
 39 |   write_thread_max_yield_usec=100
 40 |   max_write_batch_group_size_bytes=1048576
 41 |   wal_compression=kNoCompression
 42 |   write_thread_slow_yield_usec=3
 43 |   enable_pipelined_write=false
 44 |   persist_stats_to_disk=false
 45 |   max_manifest_file_size=1073741824
 46 |   WAL_size_limit_MB=0
 47 |   fail_if_options_file_error=true
 48 |   max_log_file_size=0
 49 |   manifest_preallocation_size=4194304
 50 |   log_file_time_to_roll=0
 51 |   allow_data_in_errors=false
 52 |   WAL_ttl_seconds=0
 53 |   recycle_log_file_num=0
 54 |   file_checksum_gen_factory=nullptr
 55 |   keep_log_file_num=1000
 56 |   db_write_buffer_size=0
 57 |   table_cache_numshardbits=6
 58 |   use_adaptive_mutex=false
 59 |   allow_ingest_behind=false
 60 |   skip_checking_sst_file_sizes_on_db_open=false
 61 |   random_access_max_buffer_size=1048576
 62 |   access_hint_on_compaction_start=NORMAL
 63 |   allow_concurrent_memtable_write=true
 64 |   track_and_verify_wals_in_manifest=false
 65 |   skip_stats_update_on_db_open=false
 66 |   compaction_verify_record_count=true
 67 |   paranoid_checks=true
 68 |   max_file_opening_threads=16
 69 |   verify_sst_unique_id_in_manifest=true
 70 |   avoid_flush_during_recovery=false
 71 |   flush_verify_memtable_count=true
 72 |   db_host_id=__hostname__
 73 |   error_if_exists=false
 74 |   wal_recovery_mode=kPointInTimeRecovery
 75 |   enable_thread_tracking=false
 76 |   is_fd_close_on_exec=true
 77 |   enforce_single_del_contracts=true
 78 |   create_missing_column_families=false
 79 |   create_if_missing=true
 80 |   use_fsync=false
 81 |   wal_filter=nullptr
 82 |   allow_2pc=false
 83 |   use_direct_io_for_flush_and_compaction=false
 84 |   manual_wal_flush=false
 85 |   enable_write_thread_adaptive_yield=true
 86 |   use_direct_reads=false
 87 |   allow_mmap_writes=false
 88 |   allow_fallocate=true
 89 |   two_write_queues=false
 90 |   allow_mmap_reads=false
 91 |   unordered_write=false
 92 |   advise_random_on_open=true
 93 |   
 94 | 
 95 | [CFOptions "default"]
 96 |   memtable_max_range_deletions=0
 97 |   block_protection_bytes_per_key=0
 98 |   memtable_protection_bytes_per_key=0
 99 |   sample_for_compression=0
100 |   blob_file_starting_level=0
101 |   blob_compaction_readahead_size=0
102 |   blob_garbage_collection_force_threshold=1.000000
103 |   enable_blob_garbage_collection=false
104 |   min_blob_size=0
105 |   last_level_temperature=kUnknown
106 |   enable_blob_files=false
107 |   target_file_size_base=67108864
108 |   max_sequential_skip_in_iterations=8
109 |   prepopulate_blob_cache=kDisable
110 |   compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 |   max_bytes_for_level_multiplier=10.000000
112 |   max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 |   max_bytes_for_level_base=268435456
114 |   experimental_mempurge_threshold=0.000000
115 |   write_buffer_size=134217728
116 |   bottommost_compression=kDisableCompressionOption
117 |   prefix_extractor=nullptr
118 |   blob_file_size=268435456
119 |   memtable_huge_page_size=0
120 |   bottommost_file_compaction_delay=0
121 |   max_successive_merges=0
122 |   compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 |   arena_block_size=1048576
124 |   memtable_whole_key_filtering=false
125 |   target_file_size_multiplier=1
126 |   max_write_buffer_number=4
127 |   blob_compression_type=kNoCompression
128 |   compression=kSnappyCompression
129 |   level0_stop_writes_trigger=36
130 |   level0_slowdown_writes_trigger=24
131 |   level0_file_num_compaction_trigger=4
132 |   ignore_max_compaction_bytes_for_input=true
133 |   max_compaction_bytes=1677721600
134 |   compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 |   memtable_prefix_bloom_size_ratio=0.000000
136 |   hard_pending_compaction_bytes_limit=274877906944
137 |   bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 |   blob_garbage_collection_age_cutoff=0.250000
139 |   ttl=2592000
140 |   soft_pending_compaction_bytes_limit=68719476736
141 |   inplace_update_num_locks=10000
142 |   paranoid_file_checks=false
143 |   check_flush_compaction_key_order=true
144 |   periodic_compaction_seconds=0
145 |   disable_auto_compactions=false
146 |   report_bg_io_stats=false
147 |   compaction_pri=kMinOverlappingRatio
148 |   compaction_style=kCompactionStyleLevel
149 |   merge_operator=nullptr
150 |   table_factory=BlockBasedTable
151 |   memtable_factory=SkipListFactory
152 |   comparator=leveldb.BytewiseComparator
153 |   compaction_filter_factory=nullptr
154 |   num_levels=7
155 |   min_write_buffer_number_to_merge=1
156 |   bloom_locality=0
157 |   max_write_buffer_size_to_maintain=0
158 |   sst_partitioner_factory=nullptr
159 |   preserve_internal_time_seconds=0
160 |   preclude_last_level_data_seconds=0
161 |   max_write_buffer_number_to_maintain=0
162 |   default_temperature=kUnknown
163 |   optimize_filters_for_hits=false
164 |   level_compaction_dynamic_file_size=true
165 |   memtable_insert_with_hint_prefix_extractor=nullptr
166 |   level_compaction_dynamic_level_bytes=true
167 |   inplace_update_support=false
168 |   persist_user_defined_timestamps=true
169 |   compaction_filter=nullptr
170 |   force_consistency_checks=true
171 |   
172 | [TableOptions/BlockBasedTable "default"]
173 |   num_file_reads_for_auto_readahead=2
174 |   initial_auto_readahead_size=8192
175 |   metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 |   enable_index_compression=true
177 |   pin_top_level_index_and_filter=true
178 |   read_amp_bytes_per_bit=0
179 |   verify_compression=false
180 |   prepopulate_block_cache=kDisable
181 |   format_version=5
182 |   partition_filters=false
183 |   metadata_block_size=4096
184 |   max_auto_readahead_size=262144
185 |   index_block_restart_interval=1
186 |   block_size_deviation=10
187 |   block_size=4096
188 |   detect_filter_construct_corruption=false
189 |   no_block_cache=false
190 |   checksum=kXXH3
191 |   filter_policy=nullptr
192 |   data_block_hash_table_util_ratio=0.750000
193 |   block_restart_interval=16
194 |   index_type=kBinarySearch
195 |   pin_l0_filter_and_index_blocks_in_cache=false
196 |   data_block_index_type=kDataBlockBinarySearch
197 |   cache_index_and_filter_blocks_with_high_priority=true
198 |   whole_key_filtering=true
199 |   index_shortening=kShortenSeparators
200 |   cache_index_and_filter_blocks=false
201 |   block_align=false
202 |   optimize_filters_for_memory=false
203 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |   
205 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/rocksdb_default_options.ini:
--------------------------------------------------------------------------------
  1 | # This is a RocksDB option file.
  2 | #
  3 | # For detailed file format spec, please refer to the example file
  4 | # in examples/rocksdb_option_file_example.ini
  5 | #
  6 | 
  7 | [Version]
  8 |   rocksdb_version=8.8.1
  9 |   options_file_version=1.1
 10 | 
 11 | [DBOptions]
 12 |   max_background_flushes=-1
 13 |   compaction_readahead_size=2097152
 14 |   wal_bytes_per_sync=0
 15 |   bytes_per_sync=0
 16 |   max_open_files=-1
 17 |   stats_history_buffer_size=1048576
 18 |   stats_dump_period_sec=600
 19 |   stats_persist_period_sec=600
 20 |   delete_obsolete_files_period_micros=21600000000
 21 |   max_total_wal_size=0
 22 |   strict_bytes_per_sync=false
 23 |   delayed_write_rate=16777216
 24 |   avoid_flush_during_shutdown=false
 25 |   writable_file_max_buffer_size=1048576
 26 |   max_subcompactions=1
 27 |   max_background_compactions=-1
 28 |   max_background_jobs=2
 29 |   lowest_used_cache_tier=kNonVolatileBlockTier
 30 |   bgerror_resume_retry_interval=1000000
 31 |   max_bgerror_resume_count=2147483647
 32 |   best_efforts_recovery=false
 33 |   write_dbid_to_manifest=false
 34 |   avoid_unnecessary_blocking_io=false
 35 |   atomic_flush=false
 36 |   log_readahead_size=0
 37 |   dump_malloc_stats=false
 38 |   info_log_level=INFO_LEVEL
 39 |   write_thread_max_yield_usec=100
 40 |   max_write_batch_group_size_bytes=1048576
 41 |   wal_compression=kNoCompression
 42 |   write_thread_slow_yield_usec=3
 43 |   enable_pipelined_write=false
 44 |   persist_stats_to_disk=false
 45 |   max_manifest_file_size=1073741824
 46 |   WAL_size_limit_MB=0
 47 |   fail_if_options_file_error=true
 48 |   max_log_file_size=0
 49 |   manifest_preallocation_size=4194304
 50 |   log_file_time_to_roll=0
 51 |   allow_data_in_errors=false
 52 |   WAL_ttl_seconds=0
 53 |   recycle_log_file_num=0
 54 |   file_checksum_gen_factory=nullptr
 55 |   keep_log_file_num=1000
 56 |   db_write_buffer_size=0
 57 |   table_cache_numshardbits=6
 58 |   use_adaptive_mutex=false
 59 |   allow_ingest_behind=false
 60 |   skip_checking_sst_file_sizes_on_db_open=false
 61 |   random_access_max_buffer_size=1048576
 62 |   access_hint_on_compaction_start=NORMAL
 63 |   allow_concurrent_memtable_write=true
 64 |   track_and_verify_wals_in_manifest=false
 65 |   skip_stats_update_on_db_open=false
 66 |   compaction_verify_record_count=true
 67 |   paranoid_checks=true
 68 |   max_file_opening_threads=16
 69 |   verify_sst_unique_id_in_manifest=true
 70 |   avoid_flush_during_recovery=false
 71 |   flush_verify_memtable_count=true
 72 |   db_host_id=__hostname__
 73 |   error_if_exists=false
 74 |   wal_recovery_mode=kPointInTimeRecovery
 75 |   enable_thread_tracking=false
 76 |   is_fd_close_on_exec=true
 77 |   enforce_single_del_contracts=true
 78 |   create_missing_column_families=false
 79 |   create_if_missing=false
 80 |   use_fsync=false
 81 |   wal_filter=nullptr
 82 |   allow_2pc=false
 83 |   use_direct_io_for_flush_and_compaction=false
 84 |   manual_wal_flush=false
 85 |   enable_write_thread_adaptive_yield=true
 86 |   use_direct_reads=false
 87 |   allow_mmap_writes=false
 88 |   allow_fallocate=true
 89 |   two_write_queues=false
 90 |   allow_mmap_reads=false
 91 |   unordered_write=false
 92 |   advise_random_on_open=true
 93 |   
 94 | 
 95 | [CFOptions "default"]
 96 |   memtable_max_range_deletions=0
 97 |   block_protection_bytes_per_key=0
 98 |   memtable_protection_bytes_per_key=0
 99 |   sample_for_compression=0
100 |   blob_file_starting_level=0
101 |   blob_compaction_readahead_size=0
102 |   blob_garbage_collection_force_threshold=1.000000
103 |   enable_blob_garbage_collection=false
104 |   min_blob_size=0
105 |   last_level_temperature=kUnknown
106 |   enable_blob_files=false
107 |   target_file_size_base=67108864
108 |   max_sequential_skip_in_iterations=8
109 |   prepopulate_blob_cache=kDisable
110 |   compaction_options_fifo={allow_compaction=false;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=1073741824;}
111 |   max_bytes_for_level_multiplier=10.000000
112 |   max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
113 |   max_bytes_for_level_base=268435456
114 |   experimental_mempurge_threshold=0.000000
115 |   write_buffer_size=67108864
116 |   bottommost_compression=kDisableCompressionOption
117 |   prefix_extractor=nullptr
118 |   blob_file_size=268435456
119 |   memtable_huge_page_size=0
120 |   bottommost_file_compaction_delay=0
121 |   max_successive_merges=0
122 |   compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
123 |   arena_block_size=1048576
124 |   memtable_whole_key_filtering=false
125 |   target_file_size_multiplier=1
126 |   max_write_buffer_number=2
127 |   blob_compression_type=kNoCompression
128 |   compression=kSnappyCompression
129 |   level0_stop_writes_trigger=36
130 |   level0_slowdown_writes_trigger=20
131 |   level0_file_num_compaction_trigger=4
132 |   ignore_max_compaction_bytes_for_input=true
133 |   max_compaction_bytes=1677721600
134 |   compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
135 |   memtable_prefix_bloom_size_ratio=0.000000
136 |   hard_pending_compaction_bytes_limit=274877906944
137 |   bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
138 |   blob_garbage_collection_age_cutoff=0.250000
139 |   ttl=2592000
140 |   soft_pending_compaction_bytes_limit=68719476736
141 |   inplace_update_num_locks=10000
142 |   paranoid_file_checks=false
143 |   check_flush_compaction_key_order=true
144 |   periodic_compaction_seconds=0
145 |   disable_auto_compactions=false
146 |   report_bg_io_stats=false
147 |   compaction_pri=kMinOverlappingRatio
148 |   compaction_style=kCompactionStyleLevel
149 |   merge_operator=nullptr
150 |   table_factory=BlockBasedTable
151 |   memtable_factory=SkipListFactory
152 |   comparator=leveldb.BytewiseComparator
153 |   compaction_filter_factory=nullptr
154 |   num_levels=7
155 |   min_write_buffer_number_to_merge=1
156 |   bloom_locality=0
157 |   max_write_buffer_size_to_maintain=0
158 |   sst_partitioner_factory=nullptr
159 |   preserve_internal_time_seconds=0
160 |   preclude_last_level_data_seconds=0
161 |   max_write_buffer_number_to_maintain=0
162 |   default_temperature=kUnknown
163 |   optimize_filters_for_hits=false
164 |   level_compaction_dynamic_file_size=true
165 |   memtable_insert_with_hint_prefix_extractor=nullptr
166 |   level_compaction_dynamic_level_bytes=true
167 |   inplace_update_support=false
168 |   persist_user_defined_timestamps=true
169 |   compaction_filter=nullptr
170 |   force_consistency_checks=true
171 |   
172 | [TableOptions/BlockBasedTable "default"]
173 |   num_file_reads_for_auto_readahead=2
174 |   initial_auto_readahead_size=8192
175 |   metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
176 |   enable_index_compression=true
177 |   pin_top_level_index_and_filter=true
178 |   read_amp_bytes_per_bit=0
179 |   verify_compression=false
180 |   prepopulate_block_cache=kDisable
181 |   format_version=5
182 |   partition_filters=false
183 |   metadata_block_size=4096
184 |   max_auto_readahead_size=262144
185 |   index_block_restart_interval=1
186 |   block_size_deviation=10
187 |   block_size=4096
188 |   detect_filter_construct_corruption=false
189 |   no_block_cache=false
190 |   checksum=kXXH3
191 |   filter_policy=nullptr
192 |   data_block_hash_table_util_ratio=0.750000
193 |   block_restart_interval=16
194 |   index_type=kBinarySearch
195 |   pin_l0_filter_and_index_blocks_in_cache=false
196 |   data_block_index_type=kDataBlockBinarySearch
197 |   cache_index_and_filter_blocks_with_high_priority=true
198 |   whole_key_filtering=true
199 |   index_shortening=kShortenSeparators
200 |   cache_index_and_filter_blocks=false
201 |   block_align=false
202 |   optimize_filters_for_memory=false
203 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory
204 |   
205 | 


--------------------------------------------------------------------------------
/options_files/default_options_files/dbbench_default_options-8.8.1.ini:
--------------------------------------------------------------------------------
  1 | # This is a RocksDB option file.
  2 | #
  3 | # For detailed file format spec, please refer to the example file
  4 | # in examples/rocksdb_option_file_example.ini
  5 | #
  6 | 
  7 | [Version]
  8 |   rocksdb_version=8.8.1
  9 |   options_file_version=1.1
 10 | 
 11 | [DBOptions]
 12 |   max_background_flushes=-1
 13 |   compaction_readahead_size=2097152
 14 |   wal_bytes_per_sync=0
 15 |   bytes_per_sync=0
 16 |   max_open_files=-1
 17 |   stats_history_buffer_size=1048576
 18 |   stats_dump_period_sec=600
 19 |   stats_persist_period_sec=600
 20 |   delete_obsolete_files_period_micros=21600000000
 21 |   max_total_wal_size=0
 22 |   strict_bytes_per_sync=false
 23 |   delayed_write_rate=8388608
 24 |   avoid_flush_during_shutdown=false
 25 |   writable_file_max_buffer_size=1048576
 26 |   max_subcompactions=1
 27 |   max_background_compactions=-1
 28 |   max_background_jobs=2
 29 |   lowest_used_cache_tier=kNonVolatileBlockTier
 30 |   bgerror_resume_retry_interval=1000000
 31 |   max_bgerror_resume_count=2147483647
 32 |   best_efforts_recovery=false
 33 |   write_dbid_to_manifest=false
 34 |   avoid_unnecessary_blocking_io=false
 35 |   atomic_flush=false
 36 |   log_readahead_size=0
 37 |   dump_malloc_stats=true
 38 |   info_log_level=INFO_LEVEL
 39 |   write_thread_max_yield_usec=100
 40 |   max_write_batch_group_size_bytes=1048576
 41 |   wal_compression=kNoCompression
 42 |   write_thread_slow_yield_usec=3
 43 |   enable_pipelined_write=true
 44 |   persist_stats_to_disk=false
 45 |   max_manifest_file_size=1073741824
 46 |   WAL_size_limit_MB=0
 47 |   fail_if_options_file_error=true
 48 |   max_log_file_size=0
 49 |   manifest_preallocation_size=4194304
 50 |   listeners={ErrorHandlerListener:ErrorHandlerListener}
 51 |   log_file_time_to_roll=0
 52 |   allow_data_in_errors=false
 53 |   WAL_ttl_seconds=0
 54 |   recycle_log_file_num=0
 55 |   file_checksum_gen_factory=nullptr
 56 |   keep_log_file_num=1000
 57 |   db_write_buffer_size=0
 58 |   table_cache_numshardbits=4
 59 |   use_adaptive_mutex=false
 60 |   allow_ingest_behind=false
 61 |   skip_checking_sst_file_sizes_on_db_open=false
 62 |   random_access_max_buffer_size=1048576
 63 |   access_hint_on_compaction_start=NORMAL
 64 |   allow_concurrent_memtable_write=true
 65 |   track_and_verify_wals_in_manifest=false
 66 |   skip_stats_update_on_db_open=false
 67 |   compaction_verify_record_count=true
 68 |   paranoid_checks=true
 69 |   max_file_opening_threads=16
 70 |   verify_sst_unique_id_in_manifest=true
 71 |   avoid_flush_during_recovery=false
 72 |   flush_verify_memtable_count=true
 73 |   db_host_id=__hostname__
 74 |   error_if_exists=false
 75 |   wal_recovery_mode=kPointInTimeRecovery
 76 |   enable_thread_tracking=false
 77 |   is_fd_close_on_exec=true
 78 |   enforce_single_del_contracts=true
 79 |   create_missing_column_families=true
 80 |   create_if_missing=true
 81 |   use_fsync=false
 82 |   wal_filter=nullptr
 83 |   allow_2pc=false
 84 |   use_direct_io_for_flush_and_compaction=true
 85 |   manual_wal_flush=false
 86 |   enable_write_thread_adaptive_yield=true
 87 |   use_direct_reads=true
 88 |   allow_mmap_writes=false
 89 |   allow_fallocate=true
 90 |   two_write_queues=false
 91 |   allow_mmap_reads=false
 92 |   unordered_write=false
 93 |   advise_random_on_open=true
 94 |   
 95 | 
 96 | [CFOptions "default"]
 97 |   memtable_max_range_deletions=0
 98 |   block_protection_bytes_per_key=0
 99 |   memtable_protection_bytes_per_key=0
100 |   sample_for_compression=0
101 |   blob_file_starting_level=0
102 |   blob_compaction_readahead_size=0
103 |   blob_garbage_collection_force_threshold=1.000000
104 |   enable_blob_garbage_collection=false
105 |   min_blob_size=0
106 |   last_level_temperature=kUnknown
107 |   enable_blob_files=false
108 |   target_file_size_base=67108864
109 |   max_sequential_skip_in_iterations=8
110 |   prepopulate_blob_cache=kDisable
111 |   compaction_options_fifo={allow_compaction=true;age_for_warm=0;file_temperature_age_thresholds=;max_table_files_size=0;}
112 |   max_bytes_for_level_multiplier=10.000000
113 |   max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
114 |   max_bytes_for_level_base=268435456
115 |   experimental_mempurge_threshold=0.000000
116 |   write_buffer_size=67108864
117 |   bottommost_compression=kDisableCompressionOption
118 |   prefix_extractor=nullptr
119 |   blob_file_size=268435456
120 |   memtable_huge_page_size=0
121 |   bottommost_file_compaction_delay=0
122 |   max_successive_merges=0
123 |   compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
124 |   arena_block_size=1048576
125 |   memtable_whole_key_filtering=false
126 |   target_file_size_multiplier=1
127 |   max_write_buffer_number=2
128 |   blob_compression_type=kNoCompression
129 |   compression=kNoCompression
130 |   level0_stop_writes_trigger=36
131 |   level0_slowdown_writes_trigger=20
132 |   level0_file_num_compaction_trigger=4
133 |   ignore_max_compaction_bytes_for_input=true
134 |   max_compaction_bytes=1677721600
135 |   compaction_options_universal={allow_trivial_move=false;incremental=false;stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;min_merge_width=2;size_ratio=1;}
136 |   memtable_prefix_bloom_size_ratio=0.000000
137 |   hard_pending_compaction_bytes_limit=137438953472
138 |   bottommost_compression_opts={max_dict_buffer_bytes=0;checksum=false;use_zstd_dict_trainer=true;enabled=false;parallel_threads=1;zstd_max_train_bytes=0;strategy=0;max_dict_bytes=0;max_compressed_bytes_per_kb=896;level=32767;window_bits=-14;}
139 |   blob_garbage_collection_age_cutoff=0.250000
140 |   ttl=2592000
141 |   soft_pending_compaction_bytes_limit=68719476736
142 |   inplace_update_num_locks=10000
143 |   paranoid_file_checks=false
144 |   check_flush_compaction_key_order=true
145 |   periodic_compaction_seconds=0
146 |   disable_auto_compactions=false
147 |   report_bg_io_stats=false
148 |   compaction_pri=kMinOverlappingRatio
149 |   compaction_style=kCompactionStyleLevel
150 |   merge_operator=nullptr
151 |   table_factory=BlockBasedTable
152 |   memtable_factory=SkipListFactory
153 |   comparator=leveldb.BytewiseComparator
154 |   compaction_filter_factory=nullptr
155 |   num_levels=7
156 |   min_write_buffer_number_to_merge=1
157 |   bloom_locality=0
158 |   max_write_buffer_size_to_maintain=0
159 |   sst_partitioner_factory=nullptr
160 |   preserve_internal_time_seconds=0
161 |   preclude_last_level_data_seconds=0
162 |   max_write_buffer_number_to_maintain=0
163 |   default_temperature=kUnknown
164 |   optimize_filters_for_hits=false
165 |   level_compaction_dynamic_file_size=true
166 |   memtable_insert_with_hint_prefix_extractor=nullptr
167 |   level_compaction_dynamic_level_bytes=false
168 |   inplace_update_support=false
169 |   persist_user_defined_timestamps=true
170 |   compaction_filter=nullptr
171 |   force_consistency_checks=true
172 |   
173 | [TableOptions/BlockBasedTable "default"]
174 |   num_file_reads_for_auto_readahead=2
175 |   initial_auto_readahead_size=8192
176 |   metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
177 |   enable_index_compression=true
178 |   pin_top_level_index_and_filter=false
179 |   read_amp_bytes_per_bit=0
180 |   verify_compression=false
181 |   prepopulate_block_cache=kDisable
182 |   format_version=5
183 |   partition_filters=false
184 |   metadata_block_size=4096
185 |   max_auto_readahead_size=262144
186 |   index_block_restart_interval=1
187 |   block_size_deviation=10
188 |   block_size=4096
189 |   detect_filter_construct_corruption=false
190 |   no_block_cache=false
191 |   checksum=kXXH3
192 |   filter_policy=nullptr
193 |   data_block_hash_table_util_ratio=0.750000
194 |   block_restart_interval=16
195 |   index_type=kBinarySearch
196 |   pin_l0_filter_and_index_blocks_in_cache=false
197 |   data_block_index_type=kDataBlockBinarySearch
198 |   cache_index_and_filter_blocks_with_high_priority=true
199 |   whole_key_filtering=true
200 |   index_shortening=kShortenSeparatorsAndSuccessor
201 |   cache_index_and_filter_blocks=false
202 |   block_align=false
203 |   optimize_filters_for_memory=false
204 |   flush_block_policy_factory=FlushBlockBySizePolicyFactory
205 |   
206 | 


--------------------------------------------------------------------------------
/rocksdb/parse_db_bench_output.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | from utils.utils import log_update
  4 | 
  5 | def parse_db_bench_output(output):
  6 |     
  7 |     if re.match("Unable to load options file.*", output) is not None:
  8 |         return {
  9 |             "error": "Invalid options file"
 10 |         }
 11 | 
 12 |     # Regular expression to find and extract the number of Entries
 13 |     # Searches for the pattern "Entries:" followed by one or more digits
 14 |     entries_match = re.search(r"Entries:\s+(\d+)", output)
 15 |     # If a match is found, convert the captured digits to an integer
 16 |     entries = int(entries_match.group(1)) if entries_match else None
 17 | 
 18 |     # Regular expression to parse the output line
 19 |     # Captures various performance metrics and their units
 20 |     test_name = None
 21 | 
 22 |     if "readrandomwriterandom" in output:
 23 |         op_line = output.split("readrandomwriterandom")[1].split("\n")[0]
 24 |         test_name = "readrandomwriterandom"
 25 |         test_pattern = r"readrandomwriterandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
 26 |     elif "fillrandom" in output:
 27 |         op_line = output.split("fillrandom")[1].split("\n")[0]
 28 |         test_name = "fillrandom"
 29 |         test_pattern = r"fillrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\nMicroseconds per write:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
 30 |     elif "readrandom" in output:
 31 |         op_line = output.split("readrandom")[1].split("\n")[0]
 32 |         test_name = "readrandom"
 33 |         test_pattern = r"readrandom\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+(\d+\.\d+)\s+(\w+/s)\s+\((\d+)\s+of\s+(\d+)\s+found\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
 34 |     elif "mixgraph" in output:
 35 |         op_line = output.split("mixgraph     :")[1].split("\n")[0]
 36 |         test_name = "mixgraph"
 37 |         test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
 38 |         # test_pattern = r"mixgraph\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;\s+\(\s+Gets:+(\d+)\s+Puts:+(\d+)\s+Seek:(\d+),\s+reads\s+(\d+)\s+in\s+(\d+)\s+found,\s+avg\s+size:\s+\d+\s+value,\s+-nan\s+scan\)\n\nMicroseconds per read:\nCount:\s+(\d+)\s+Average:\s+(\d+\.\d+)\s+StdDev:\s+(\d+\.\d+)\nMin:\s+(\d+)\s+Median:\s+(\d+\.\d+)\s+Max:\s+(\d+)\nPercentiles:\s+P50:\s+(\d+\.\d+)\s+P75:\s+(\d+\.\d+)\s+P99:\s+(\d+\.\d+)\s+P99\.9:\s+(\d+\.\d+)\s+P99\.99:\s+(\d+\.\d+)\n-{50}"
 39 |     elif "readwhilewriting" in output:
 40 |         op_line = output.split("readwhilewriting")[1].split("\n")[0]
 41 |         test_name = "readwhilewriting"
 42 |         test_pattern = r"readwhilewriting\s+:\s+(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;"
 43 |     else:
 44 |         log_update(f"[PDB] Test name not found in output: {output}")
 45 |         op_line = "unknown test"
 46 |         test_name = "unknown"
 47 |         test_pattern = r"(\d+\.\d+)\s+micros/op\s+(\d+)\s+ops/sec\s+(\d+\.\d+)\s+seconds\s+(\d+)\s+operations;(\s+\(.*found:\d+\))?\nMicroseconds per (read|write):\nCount: (\d+) Average: (\d+\.\d+)  StdDev: (\d+\.\d+)\nMin: (\d+)  Median: (\d+\.\d+)  Max: (\d+)\nPercentiles: P50: (\d+\.\d+) P75: (\d+\.\d+) P99: (\d+\.\d+) P99.9: (\d+\.\d+) P99.99: (\d+\.\d+)"
 48 | 
 49 |     pattern_matches = re.findall(test_pattern, output)
 50 |     log_update(f"[PDB] Test name: {test_name}")
 51 |     log_update(f"[PDB] Matches: {pattern_matches}")
 52 |     log_update(f"[PDB] Output line: {op_line}")
 53 |     # Set all values to None if the pattern is not found
 54 |     micros_per_op = ops_per_sec = total_seconds = total_operations = data_speed = data_speed_unit = None
 55 | 
 56 |     # Extract the performance metrics if the pattern is found
 57 |     for pattern_match in pattern_matches:
 58 |         # Convert each captured group to the appropriate type (float or int)
 59 |         micros_per_op = float(pattern_match[0])
 60 |         ops_per_sec = int(pattern_match[1])
 61 |         total_seconds = float(pattern_match[2])
 62 |         total_operations = int(pattern_match[3])
 63 |          # Check for specific workloads to handle additional data
 64 |         if "readrandomwriterandom" in output:
 65 |             data_speed = ops_per_sec
 66 |             data_speed_unit = "ops/sec"
 67 |             reads_found = None
 68 |         elif "fillrandom" in output:
 69 |             data_speed = float(pattern_match[4])
 70 |             data_speed_unit = pattern_match[5]
 71 |             writes_data = {
 72 |                 "count": int(pattern_match[6]),
 73 |                 "average": float(pattern_match[7]),
 74 |                 "std_dev": float(pattern_match[8]),
 75 |                 "min": int(pattern_match[9]),
 76 |                 "median": float(pattern_match[10]),
 77 |                 "max": int(pattern_match[11]),
 78 |                 "percentiles": {
 79 |                     "P50": float(pattern_match[12]),
 80 |                     "P75": float(pattern_match[13]),
 81 |                     "P99": float(pattern_match[14]),
 82 |                     "P99.9": float(pattern_match[15]),
 83 |                     "P99.99": float(pattern_match[16])
 84 |                 }
 85 |             }
 86 |         elif "readrandom" in output:
 87 |             data_speed = float(pattern_match[4])
 88 |             data_speed_unit = pattern_match[5]
 89 |             reads_found = {
 90 |                 "count": int(pattern_match[6]),
 91 |                 "total": int(pattern_match[7])
 92 |             }
 93 |             reads_data = {
 94 |                 "count": int(pattern_match[8]),
 95 |                 "average": float(pattern_match[9]),
 96 |                 "std_dev": float(pattern_match[10]),
 97 |                 "min": int(pattern_match[11]),
 98 |                 "median": float(pattern_match[12]),
 99 |                 "max": int(pattern_match[13]),
100 |                 "percentiles": {
101 |                     "P50": float(pattern_match[14]),
102 |                     "P75": float(pattern_match[15]),
103 |                     "P99": float(pattern_match[16]),
104 |                     "P99.9": float(pattern_match[17]),
105 |                     "P99.99": float(pattern_match[18])
106 |                 }
107 |             }
108 |         elif "readwhilewriting" in output:
109 |             data_speed = float(pattern_match[4])
110 |             data_speed_unit = pattern_match[5]
111 |             # reads_found = {
112 |             #     "count": int(pattern_match[6]),
113 |             #     "total": int(pattern_match[7])
114 |             # }
115 |             # reads_data = {
116 |             #     "count": int(pattern_match[8]),
117 |             #     "average": float(pattern_match[9]),
118 |             #     "std_dev": float(pattern_match[10]),
119 |             #     "min": int(pattern_match[11]),
120 |             #     "median": float(pattern_match[12]),
121 |             #     "max": int(pattern_match[13]),
122 |             #     "percentiles": {
123 |             #         "P50": float(pattern_match[14]),
124 |             #         "P75": float(pattern_match[15]),
125 |             #         "P99": float(pattern_match[16]),
126 |             #         "P99.9": float(pattern_match[17]),
127 |             #         "P99.99": float(pattern_match[18])
128 |             #     }
129 |             # }
130 |         elif "mixgraph" in output:
131 |             data_speed = ops_per_sec
132 |             data_speed_unit = "ops/sec"
133 |         else:
134 |             log_update(f"[PDB] Test name not found in output: {output}")
135 |             data_speed = ops_per_sec
136 |             data_speed_unit = "ops/sec"
137 |    
138 |         log_update(f"[PDB] Ops per sec: {ops_per_sec} Total seconds: {total_seconds} Total operations: {total_operations} Data speed: {data_speed} {data_speed_unit}")
139 | 
140 |     ops_per_sec_points = re.findall("and \((.*),.*\) ops\/second in \(.*,(.*)\)", output)
141 | 
142 |     # Store all extracted values in a dictionary
143 |     parsed_data = {
144 |         "entries": entries,
145 |         "micros_per_op": micros_per_op,
146 |         "ops_per_sec": ops_per_sec,
147 |         "total_seconds": total_seconds,
148 |         "total_operations": total_operations,
149 |         "data_speed": data_speed,
150 |         "data_speed_unit": data_speed_unit,
151 |         "ops_per_second_graph": [
152 |             [float(a[1]) for a in ops_per_sec_points],
153 |             [float(a[0]) for a in ops_per_sec_points],
154 |         ]
155 |     }
156 | 
157 |     # Grab the latency and push into the output logs file
158 |     latency = re.findall("Percentiles:.*", output)
159 |     for i in latency:
160 |         log_update("[PDB] " + i)
161 | 
162 |     # Return the dictionary with the parsed data
163 |     return parsed_data
164 | 


--------------------------------------------------------------------------------
/rocksdb/subprocess_manager.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import os
  3 | import time 
  4 | 
  5 | from cgroup_monitor import CGroupMonitor
  6 | 
  7 | from utils.utils import log_update, path_of_db
  8 | from utils.constants import TEST_NAME, DB_BENCH_PATH, OPTIONS_FILE_DIR, NUM_ENTRIES, SIDE_CHECKER, FIO_RESULT_PATH
  9 | from rocksdb.parse_db_bench_output import parse_db_bench_output
 10 | from utils.utils import store_db_bench_output
 11 | from utils.graph import plot_2axis
 12 | from gpt.prompts_generator import midway_options_file_generation
 13 | from utils.system_operations.fio_runner import get_fio_result
 14 | from utils.system_operations.get_sys_info import system_info
 15 | 
 16 | 
 17 | def pre_tasks(database_path, run_count):
 18 |     '''
 19 |     Function to perform the pre-tasks before running the db_bench
 20 |     Parameters:
 21 |     - database_path (str): The path to the database
 22 |     - run_count (str): The current iteration of the benchmark
 23 | 
 24 |     Returns:
 25 |     - None
 26 |     '''
 27 | 
 28 |     # Try to delete the database if path exists 
 29 |     proc = subprocess.run(
 30 |         f'rm -rf {database_path}',
 31 |         stdout=subprocess.PIPE,
 32 |         stderr=subprocess.STDOUT,
 33 |         shell=True,
 34 |         check=False
 35 |     )
 36 | 
 37 |     log_update("[SPM] Flushing the cache")
 38 |     print("[SPM] Flushing the cache")
 39 |     # Delay for all the current memory to be freed
 40 |     proc = subprocess.run(
 41 |         f'sync; echo 3 > /proc/sys/vm/drop_caches',
 42 |         stdout=subprocess.PIPE,
 43 |         stderr=subprocess.STDOUT,
 44 |         shell=True,
 45 |         check=False
 46 |     )
 47 | 
 48 |     # update_log_file("[SPM] Waiting for 90 seconds to free up memory, IO and other resources")
 49 |     print("[SPM] Waiting for 30 seconds to free up memory, IO and other resources")
 50 |     # Give a 1.5 min delay for all the current memory/IO/etc to be freed
 51 |     time.sleep(30)
 52 | 
 53 | 
 54 | def generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name):
 55 |     '''
 56 |     Generate the DB bench command
 57 | 
 58 |     Parameters:
 59 |     - db_bench_path (str): The path to the db_bench executable
 60 |     - database_path (str): The path to the database
 61 |     - option_file (dict): The options file to be used
 62 |     - run_count (str): The current iteration of the benchmark
 63 |     - test_name (str): The name of the test
 64 | 
 65 |     Returns:
 66 |     - list: The db_bench command
 67 |     '''
 68 | 
 69 |     db_bench_command = [
 70 |         db_bench_path,
 71 |         f"--db={database_path}",
 72 |         f"--options_file={OPTIONS_FILE_DIR}",
 73 |         "--use_direct_io_for_flush_and_compaction",
 74 |         "--use_direct_reads", "--compression_type=none",
 75 |         "--stats_interval_seconds=1", "--histogram", 
 76 |         f"--num={NUM_ENTRIES}", "--duration=100"
 77 |     ]
 78 | 
 79 | 
 80 |     if test_name == "fillrandom":
 81 |         db_bench_command.append("--benchmarks=fillrandom")
 82 |     elif test_name == "readrandomwriterandom":
 83 |         db_bench_command.append("--benchmarks=readrandomwriterandom")
 84 |     elif test_name == "readrandom":
 85 |         tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"]
 86 |         tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
 87 |         new_db_bench = db_bench_command[:-2] + ["--benchmarks=readrandom", "--use_existing_db", "--num=25000000", "--duration=1000"]
 88 |         db_bench_command = new_db_bench
 89 |     elif test_name == "mixgraph":
 90 |         tmp_runner = db_bench_command[:-2] + ["--num=25000000", "--benchmarks=fillrandom"]
 91 |         tmp_proc = subprocess.run(tmp_runner, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
 92 |         new_db_bench = db_bench_command[:-1] + ["--benchmarks=mixgraph", "--use_existing_db", "--duration=1000", "--mix_get_ratio=0.5", "--mix_put_ratio=0.5", "--mix_seek_ratio=0.0", "--mix_get_ratio=0.5"]
 93 |         db_bench_command = new_db_bench
 94 |     elif test_name == "readwhilewriting":
 95 |         db_bench_command.append("--benchmarks=readwhilewriting")
 96 |     else:
 97 |         print(f"[SPM] Test name {test_name} not recognized")
 98 |         exit(1)
 99 | 
100 |     log_update(f"[SPM] Command: {db_bench_command}")
101 |     return db_bench_command
102 | 
103 | 
104 | def db_bench(db_bench_path, database_path, options, run_count, test_name, previous_throughput, options_files, bm_iter=0):
105 |     '''
106 |     Store the options in a file
107 |     Do the benchmark
108 | 
109 |     Parameters:
110 |     - db_bench_path (str): The path to the db_bench executable
111 |     - database_path (str): The path to the database
112 |     - option_file (dict): The options file to be used
113 |     - run_count (str): The current iteration of the benchmark
114 | 
115 |     Returns:
116 |     - None
117 |     '''
118 |     global proc_out
119 |     with open(f"{OPTIONS_FILE_DIR}", "w") as f:
120 |         f.write(options)
121 | 
122 |     # Perform pre-tasks to reset the environment
123 |     pre_tasks(database_path, run_count)
124 |     command = generate_db_bench_command(db_bench_path, database_path, options, run_count, test_name)
125 | 
126 |     log_update(f"[SPM] Executing db_bench with command: {command}")
127 |     print("[SPM] Executing db_bench")
128 | 
129 | 
130 |     if SIDE_CHECKER and previous_throughput != None:
131 |         cgroup_monitor = CGroupMonitor()
132 |         cgroup_monitor.start_monitor()
133 |         start_time = time.time()
134 | 
135 |         with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) as proc_out:
136 |             output = ""
137 |             check_interval = 30
138 |             for line in proc_out.stdout:
139 |                 output += line
140 |                 if time.time() - start_time <= check_interval:
141 |                     continue
142 | 
143 |                 start_time = time.time()
144 |                 if "ops/second" in line:
145 |                     current_avg_throughput = float(line.split("(")[2].split(",")[1].split(")")[0])
146 | 
147 |                     if (current_avg_throughput < 0.9 * float(previous_throughput)) and (bm_iter < 3):
148 |                         print("[SQU] Throughput decreased, resetting the benchmark")
149 |                         log_update(f"[SQU] Throughput decreased {previous_throughput}->{current_avg_throughput}, resetting the benchmark")
150 |                         avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
151 |                         proc_out.kill()
152 | 
153 |                         db_path = path_of_db()
154 |                         fio_result = get_fio_result(FIO_RESULT_PATH)
155 |                         device_info = system_info(db_path, fio_result)
156 | 
157 |                         new_options, _, _ = midway_options_file_generation(options, avg_cpu_used, avg_mem_used, current_avg_throughput, device_info, options_files)
158 |                         output, avg_cpu_used, avg_mem_used, options = db_bench(db_bench_path, database_path, new_options, run_count, test_name, previous_throughput, options_files, bm_iter+1)
159 | 
160 |                         log_update("[SPM] Finished running db_bench")
161 |                         return output, avg_cpu_used, avg_mem_used, options
162 |                 else:
163 |                     print("[SQU] No throughput found in the output")
164 |                     log_update("[SQU] No throughput found in the output")
165 |                     # exit(1)
166 | 
167 |         print("[SPM] Finished running db_bench")
168 |         print("----------------------------------------------------------------------------")
169 |         print("[SPM] Output: ", output)
170 |         avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
171 |         return output, avg_cpu_used, avg_mem_used, options
172 |     else:
173 |         cgroup_monitor = CGroupMonitor()
174 |         cgroup_monitor.start_monitor()
175 |         proc_out = subprocess.run(
176 |             command,
177 |             stdout=subprocess.PIPE,
178 |             stderr=subprocess.STDOUT,
179 |             check=False
180 |         )
181 |         avg_cpu_used, avg_mem_used = cgroup_monitor.stop_monitor()
182 |         return proc_out.stdout.decode(), avg_cpu_used, avg_mem_used, options
183 | 
184 | 
185 | def benchmark(db_path, options, output_file_dir, reasoning, iteration_count, previous_results, options_files):
186 |     '''
187 |     Function to run db_bench with the given options file and store the output in a file
188 | 
189 |     Parameters:
190 |     - db_path (str): The path of database
191 |     - options (dict): The options to be used
192 |     - output_file_dir (str): the output directory
193 |     - reasoning (str): The reasoning of the benchmark
194 | 
195 |     Returns:
196 |     - is_error (bool): 
197 |     - benchmark_results (dict):
198 |     '''
199 |     if previous_results is None:
200 |         output, average_cpu_usage, average_memory_usage, options = db_bench(
201 |             DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, None, options_files)
202 |     else:
203 |         output, average_cpu_usage, average_memory_usage, options = db_bench(
204 |             DB_BENCH_PATH, db_path, options, iteration_count, TEST_NAME, previous_results['ops_per_sec'], options_files)
205 | 
206 |     # log_update(f"[SPM] Output: {output}")
207 |     benchmark_results = parse_db_bench_output(output)
208 | 
209 |     contents = os.listdir(output_file_dir)
210 |     ini_file_count = len([f for f in contents if f.endswith(".ini")])
211 | 
212 |     if benchmark_results.get("error") is not None:
213 |         is_error = True
214 |         log_update(f"[SPM] Benchmark failed, the error is: {benchmark_results.get('error')}")
215 |         print("[SPM] Benchmark failed, the error is: ",
216 |               benchmark_results.get("error"))
217 |         # Save incorrect options in a file
218 |         store_db_bench_output(output_file_dir,
219 |                               f"{ini_file_count}-incorrect_options.ini",
220 |                               benchmark_results, options, reasoning)
221 |     elif benchmark_results['data_speed'] is None:
222 |         is_error = True
223 |         log_update(f"[SPM] Benchmark failed, the error is: Data speed is None. Check DB save path")
224 |         print("[SPM] Benchmark failed, the error is: ",
225 |               "Data speed is None. Check DB save path")
226 |         # Save incorrect options in a file
227 |         store_db_bench_output(output_file_dir,
228 |                               f"{ini_file_count}-incorrect_options.ini",
229 |                               benchmark_results, options, reasoning)
230 |     else:
231 |         is_error = False
232 |         # Store the output of db_bench in a file
233 |         store_db_bench_output(output_file_dir, f"{ini_file_count}.ini",
234 |                               benchmark_results, options, reasoning)
235 |         plot_2axis(*benchmark_results["ops_per_second_graph"],
236 |                    "Ops Per Second",
237 |                    f"{output_file_dir}/ops_per_sec_{ini_file_count}.png")
238 |         log_update(f"[SPM] Latest result: {benchmark_results['data_speed']}"
239 |                         f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.")
240 |         log_update(f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%")
241 |         print(
242 |             f"[SPM] Latest result: {benchmark_results['data_speed']}",
243 |             f"{benchmark_results['data_speed_unit']} and {benchmark_results['ops_per_sec']} ops/sec.\n",
244 |             f"[SPM] Avg CPU and Memory usage: {average_cpu_usage}% and {average_memory_usage}%"
245 |         )
246 | 
247 |     return is_error, benchmark_results, average_cpu_usage, average_memory_usage, options
248 | 


--------------------------------------------------------------------------------
/gpt/prompts_generator.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from difflib import Differ
  3 | from options_files.ops_options_file import cleanup_options_file
  4 | from gpt.gpt_request import request_gpt
  5 | from utils.utils import log_update
  6 | from dotenv import load_dotenv
  7 | import utils.constants as constants
  8 | 
  9 | load_dotenv()
 10 | 
 11 | def generate_system_content(device_information, rocksdb_version):
 12 |     """
 13 |     Function to generate the system content with device info and rocksDB version.
 14 |     
 15 |     Parameters:
 16 |         device_information (str): Information about the device.
 17 |         
 18 |     Returns:
 19 |         str: A prompt for configuring RocksDB for enhanced performance.
 20 |     """
 21 | 
 22 |     content = (
 23 |         "You are a RocksDB Expert. "
 24 |         "You are being consulted by a company to help improve their RocksDB configuration "
 25 |         "by optimizing their options file based on their System information and benchmark output."
 26 |         f"Only provide option files for rocksdb version {rocksdb_version}. Also, Direct IO will always be used for both flush and compaction."
 27 |         "Additionally, compression type is set to none always."
 28 |         "First Explain the reasoning, only change 10 options, then show the option file in original format."
 29 |         f"The Device information is: {device_information}"
 30 |     )
 31 |     return content
 32 | 
 33 | def generate_default_user_content(chunk_string, previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"):
 34 |     user_contents = []
 35 |     for _, benchmark_result, reasoning, _ in previous_option_files[1: -1]:
 36 |         benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
 37 |         user_content = f"The option file changes were:\n```\n{reasoning}\n```\nThe benchmark results are: {benchmark_line}"
 38 |         user_contents.append(user_content)
 39 | 
 40 |     _, benchmark_result, _, _ = previous_option_files[-1]
 41 |     benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
 42 |     user_content = f"Part of the current option file is:\n```\n{chunk_string}\n```\nThe benchmark results are: {benchmark_line}"
 43 |     user_contents.append(user_content)
 44 |     user_contents.append("Based on these information generate a new file in same format as the options_file to improve my database performance. Enclose the new options file in ```.")
 45 |     return user_contents
 46 | 
 47 | def generate_user_content_with_difference(previous_option_files, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom"):
 48 |     result =" "
 49 |     user_content = []
 50 | 
 51 |     if len(previous_option_files) == 1:
 52 |         m1_file, m1_benchmark_result, _, _ = previous_option_files[-1]
 53 |         benchmark_line = generate_benchmark_info(test_name, m1_benchmark_result, average_cpu_used, average_mem_used)
 54 |         user_content = f"The original file is:\n```\n{m1_file}\n```\nThe benchmark results for the original file are: {benchmark_line}"
 55 |     
 56 |     elif len(previous_option_files) > 1:
 57 |         previous_option_file1, _, _, _ = previous_option_files[-1]
 58 |         previous_option_file2, _, _, _ = previous_option_files[-2]
 59 | 
 60 |         pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
 61 | 
 62 |         file1_lines = pattern.findall(previous_option_file1)
 63 |         file2_lines = pattern.findall(previous_option_file2)
 64 | 
 65 |         file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines]
 66 |         file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines]
 67 |         differ = Differ()
 68 |         diff = list(differ.compare(file1_lines, file2_lines))
 69 |         lst= []
 70 |         for line in diff:
 71 |             if line.startswith('+'):
 72 |                 lst.append(line)
 73 |         result = '\n'.join(line[2:] for line in lst)
 74 |         m2_file, m2_benchmark_result, _, _ = previous_option_files[-2]
 75 |         benchmark_line = generate_benchmark_info(test_name, m2_benchmark_result, average_cpu_used, average_mem_used)
 76 |         user_content = (
 77 |             f"The original file is:\n```\n{m2_file}\n```\n"
 78 |             f"The benchmark results for the original file are: {benchmark_line}\n"
 79 |             f"The previous file modifications are:\n```\n{result}\n```\n"
 80 |         )
 81 |     
 82 |     else:
 83 |         _, benchmark_result, _, _ = previous_option_files[-1]
 84 |         benchmark_line = generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used)
 85 | 
 86 |         user_content = ("The previous file modifications are: "
 87 |                          f"\n```\n{result}\n```\n"
 88 |                          f"The benchmark results for the previous file are: {benchmark_line}")
 89 |     
 90 |     
 91 |     user_contents = [user_content, "Based on these information generate a new file in the same format as the options_file to improve my database performance. Enclose the new options file in ```."]
 92 |     return user_contents
 93 | 
 94 | def generate_benchmark_info(test_name, benchmark_result, average_cpu_used, average_mem_used):
 95 |     """
 96 |     Function to create a formatted string with benchmark information.
 97 | 
 98 |     Parameters:
 99 |     - test_name: Name of the test.
100 |     - benchmark_result: Dictionary with benchmark results.
101 |     - average_cpu_used: Average CPU usage.
102 |     - average_mem_used: Average Memory usage.
103 | 
104 |     Returns:
105 |     - A formatted string with all benchmark information.
106 |     """
107 |     benchmark_line = (f"The use case for the database is perfectly simulated by the {test_name} test. "
108 |                       f"The db_bench benchmark results for {test_name} are: Write/Read speed: {benchmark_result['data_speed']} "
109 |                       f"{benchmark_result['data_speed_unit']}, Operations per second: {benchmark_result['ops_per_sec']}.")
110 |     
111 |     if average_cpu_used != -1 and average_mem_used != -1:
112 |         benchmark_line += f" CPU used: {average_cpu_used}%, Memory used: {average_mem_used}% during test."
113 |     
114 |     return benchmark_line
115 | 
116 | def midway_options_file_generation(options, avg_cpu_used, avg_mem_used, last_throughput, device_information, options_file):
117 |     """
118 |     Function to generate a prompt for the midway options file generation.
119 |     
120 |     Returns:
121 |     - str: A prompt for the midway options file generation.
122 |     """
123 | 
124 |     sys_content = (
125 |         "You are a RocksDB Expert being consulted by a company to help improve their RocksDB performance "
126 |         "by optimizing the options configured for a particular scenario they face."
127 |         f"Only provide option files for rocksdb version {constants.VERSION}. Direct IO will always be used. "
128 |         "Additionally, compression type is set to none always. "
129 |         "Respond with the the reasoning first, then show the option file in original format."
130 |         f"The Device information is: {device_information}"
131 |     )
132 | 
133 |     user_content = []
134 |     content = "Can you generate a new options file for RocksDB based on the following information?\n"
135 |     content += "The previous options file is:\n"
136 | 
137 |     content += "```\n"
138 |     content += options_file[-1][0]
139 |     content += "```\n"
140 | 
141 |     content += (
142 |         f"The throughput results for the above options file are: {options_file[-1][1]['ops_per_sec']}. "
143 |     )
144 | 
145 |     user_content.append(content)
146 |     content = ""
147 | 
148 |     content += "We then made the following changes to the options file:\n"
149 | 
150 |     pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
151 | 
152 |     file1_lines = pattern.findall(options)
153 |     file2_lines = pattern.findall(options_file[-1][0])
154 | 
155 |     file1_lines = ["{} = {}".format(k, v) for k, v in file1_lines]
156 |     file2_lines = ["{} = {}".format(k, v) for k, v in file2_lines]
157 |     differ = Differ()
158 |     diff = list(differ.compare(file1_lines, file2_lines))
159 |     lst= []
160 |     for line in diff:
161 |         if line.startswith('+'):
162 |             lst.append(line)
163 |     result = '\n'.join(line[2:] for line in lst)
164 | 
165 |     content += "```\n"
166 |     content += result
167 |     content += "```\n"
168 | 
169 |     content += f"\nThe updated file has a throughput of: {last_throughput}. \n\n"
170 |     user_content.append(content)
171 |     content = ""
172 |     content += "Based on this information generate a new file. Enclose the new options in ```. Feel free to use upto 100% of the CPU and Memory."
173 |     user_content.append(content)
174 | 
175 |     log_update("[OG] Generating options file with differences")
176 |     log_update("[OG] Prompt for midway options file generation")
177 |     log_update(content)
178 | 
179 |     matches = request_gpt(sys_content, user_content, 0.4)
180 | 
181 |     if matches is not None:
182 |         clean_options_file = cleanup_options_file(matches[1])
183 |         reasoning = matches[0] + matches[2]
184 | 
185 |     return clean_options_file, reasoning, ""
186 | 
187 | def generate_option_file_with_gpt(case, previous_option_files, device_information, temperature=0.4, average_cpu_used=-1.0, average_mem_used=-1.0, test_name="fillrandom", version="8.8.1"):
188 |     """
189 |     Function that generates an options file for RocksDB based on specified parameters and case scenarios.
190 |     - This function selects one of three different approaches to generate a RocksDB configuration options file. 
191 |     
192 |     Parameters:
193 |     - case (int): Determines the approach to use for generating the options file. Valid values are 1, 2, or 3.
194 |     - previous_option_files (list): A list of tuples containing past options file configurations and other relevant data.
195 |     - device_information (str): Information about the device/system on which RocksDB is running.
196 |     - temperature (float, optional): Controls the randomness/creativity of the generated output. Default is 0.4.
197 |     - average_cpu_used (float, optional): Average CPU usage, used for tuning the configuration. Default is -1.0, indicating not specified.
198 |     - average_mem_used (float, optional): Average memory usage, used for tuning the configuration. Default is -1.0, indicating not specified.
199 |     - test_name (str, optional): Identifier for the type of test or configuration scenario. Default is "fillrandom".
200 | 
201 |     Returns:
202 |     - tuple: A tuple containing the generated options file, reasoning behind the options, and an empty string as a placeholder.
203 | 
204 |     Raises:
205 |     - ValueError: If the `case` parameter is not 1, 2, or 3.
206 |     """
207 |     def case_1(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
208 |         log_update("[OG] Generating options file with long option changes")
209 |         print("[OG] Generating options file with long option changes")
210 |         system_content = generate_system_content(device_information, version)
211 |         previous_option_file, _, _, _ = previous_option_files[-1]
212 |         user_contents = generate_default_user_content(previous_option_file, previous_option_files, average_cpu_used, average_mem_used, test_name)
213 |         matches = request_gpt(system_content, user_contents, temperature)
214 |         # Process the GPT-generated response 
215 |         if matches is not None:
216 |             clean_options_file = cleanup_options_file(matches[1])
217 |             reasoning = matches[0] + matches[2]
218 | 
219 |         return clean_options_file, reasoning, ""
220 | 
221 |     def case_2(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
222 |         log_update("[OG] Generating options file with short option changes")
223 |         print("[OG] Generating options file with short option changes")
224 |         system_content = (
225 |             "You are a RocksDB Expert. "
226 |             "You are being consulted by a company to help improve their RocksDB configuration "
227 |             "by optimizing their options file based on their System information and benchmark output."
228 |             f"Only provide option files for rocksdb version {version}. Also, Direct IO will always be used for both flush and compaction."
229 |             "Additionally, compression type is set to none always."
230 |             "First Explain the reasoning, only change the options I provided, then show the option file in original format."
231 |             f"The Device information is: {device_information}")
232 |         previous_option_file, _, _, _ = previous_option_files[-1]
233 | 
234 |         # Define a regular expression pattern to match key-value pairs
235 |         pattern = re.compile(r'\s*([^=\s]+)\s*=\s*([^=\s]+)\s*')
236 | 
237 |         # Extract key-value pairs from the string
238 |         key_value_pairs = {match.group(1): match.group(
239 |             2) for match in pattern.finditer(previous_option_file)}
240 | 
241 |         # Remove key-value pairs where the key is "xxx" (case-insensitive)
242 |         key_value_pairs = {key: value for key, value in key_value_pairs.items(
243 |         ) if key.lower() not in {'rocksdb_version', 'options_file_version'}}
244 | 
245 |         # Split key-value pairs into chunks of 5 pairs each
246 |         pairs_per_chunk = 20
247 |         chunks = [list(key_value_pairs.items())[i:i + pairs_per_chunk]
248 |                 for i in range(0, len(key_value_pairs), pairs_per_chunk)]
249 | 
250 |         # Create strings for each chunk
251 |         chunk_strings = [
252 |             '\n'.join([f"{key}: {value}" for key, value in chunk]) for chunk in chunks]
253 | 
254 |         clean_options_file = ""
255 |         reasoning = ""
256 | 
257 |         # Loop through each part and make API calls
258 |         for chunk_string in chunk_strings:
259 |             user_contents = generate_default_user_content(chunk_string, previous_option_files, average_cpu_used, average_mem_used, test_name)
260 |             matches = request_gpt(system_content, user_contents, temperature)
261 |             if matches is not None:
262 |                 clean_options_file = cleanup_options_file(matches[1])
263 |                 reasoning += matches[0] + matches[2]
264 | 
265 |         return clean_options_file, reasoning, ""
266 | 
267 | 
268 |     def case_3(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version):
269 |         
270 |         log_update("[OG] Generating options file with differences")
271 |         print("[OG] Generating options file with differences")
272 |         system_content = generate_system_content(device_information, version)
273 |         # Request GPT to generate new option
274 |         user_contents = generate_user_content_with_difference(previous_option_files, average_cpu_used, average_mem_used, test_name)
275 |         matches = request_gpt(system_content, user_contents, temperature)
276 |         # Process the GPT response
277 |         if matches is not None:
278 |             clean_options_file = cleanup_options_file(matches[1])
279 |             reasoning = matches[0] + matches[2]
280 | 
281 |         return clean_options_file, reasoning, ""
282 |     
283 |     switch = {
284 |         1: case_1,
285 |         2: case_2,
286 |         3: case_3,
287 |     }
288 |     func = switch.get(case)
289 |     if func:
290 |         return func(previous_option_files, device_information, temperature,average_cpu_used, average_mem_used, test_name, version)
291 |     else:
292 |         raise ValueError(f"No function defined for case {case}")


--------------------------------------------------------------------------------