├── config_example.ini
├── configs.py
├── draw.py
├── environment.py
├── generate_sysbench_insert_and_delete_query.py
├── get_res_data_from_file.py
├── get_sysbench_workload_from_slow_file.py
├── get_workload_from_file.py
├── knob-effect-test.py
├── main.py
├── model.py
├── pre-processing
    └── knob_rankiing.py
├── requirements.txt
├── run_job.py
├── run_workload.py
├── saved_model_weights-141threads
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights-2021-07-22-2
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights-2021-07-22
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights
    ├── actor_weights.h5
    ├── critic_weights.h5
    └── rewards.txt
├── saved_model_weights_2021-07-07
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_2021-07-08
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_2021-07-09
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_2021_07_02_23_39_12
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_2021_07_11
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_2021_07_11_2
    ├── actor_weights.h5
    └── critic_weights.h5
├── saved_model_weights_tmp
    ├── actor_weights.h5
    └── critic_weights.h5
├── scripts
    ├── output.res
    └── run_job.sh
├── sql2resource.py
├── training-data
    └── trainData_sql.txt
├── workload
    ├── workload_read_10000.txt
    └── workload_sysbench_readonly_table20_tablesize8000000.txt
├── workload_file_example.txt
└── 配置文档.md


/config_example.ini:
--------------------------------------------------------------------------------
 1 | [predictor]
 2 | # SELECT count(*) FROM INFORMATION_SCHEMA.INNODB_METRICS where status='enabled';
 3 | predictor_output_dim = 65
 4 | # predictor_epoch
 5 | predictor_epoch = 100
 6 | 
 7 | 
 8 | [database_tune]
 9 | # Host IP Address
10 | host = XX.XXX.XX.9
11 | # Host Port Number
12 | port = 3306
13 | # Database User Name
14 | user = root
15 | # Database Name
16 | database = XXX
17 | # Database Password
18 | password = XXXXXX
19 | # Limit for the requests (queries) in a workload'
20 | num_event = 1000
21 | # Read percentage
22 | p_r_range = 0.6
23 | # Update percentage
24 | p_u_index = 0.2
25 | # Insert percentage
26 | p_i = 0.1
27 | # Delete percentage
28 | p_d = 0.1
29 | # Iteration Number
30 | num_trial = 500
31 | # Maximum sample number cached in RL
32 | maxlen_mem = 2000
33 | # maxlen_predict_mem
34 | maxlen_predict_mem = 2000
35 | # learning_rate
36 | learning_rate = 1e-3
37 | # Sample threshold to train RL
38 | train_min_size = 3
39 | # Training finish if the accumulated score is over the value
40 | stopping_score = 90
41 | # Training finish if the throughput improvement percentage is over the value
42 | stopping_throughput_improvement_percentage = 0.12
43 | # Training Performance Comparision
44 | linelist = ['res_predict-1619930491','res_random-1619930491']
45 | # [Latency, Throughput]
46 | performance_metric = ['Latency']
47 | # workload_file, like workload_file_example.txt
48 | workload_file_path = /XX/CXX/XXX/sysbench_workload.txt
49 | # thread_num
50 | thread_num = 500
51 | # thread_num dependent on the max connections   1/true  0/false
52 | thread_num_auto = 1
53 | 
54 | [knob_config]
55 | # you should increase the value of the table_open_cache variable if the numer of opened tables is large
56 | table_open_cache = {'type':'infer','min_value':1,'max_value':2000,'length':1}
57 | # The maximum permitted number of simultaneous client connections
58 | max_connections = {'type':'infer','min_value': 20,'max_value': 100000,'length':1}
59 | # The minimum size of the buffer that is used for plain index scans, range index scans, and joins that do not use indexes and thus perform full table scans.
60 | join_buffer_size = {'type':'infer','min_value': 128,'max_value': 134217728,'length':32}
61 | # The size of the buffer that is allocated when preloading indexes.
62 | preload_buffer_size = {'type':'infer','min_value': 1024,'max_value': 1073741824,'length':32}
63 | # Each session that must perform a sort allocates a buffer of this size.
64 | sort_buffer_size = {'type':'infer','min_value': 32768,'max_value': 134217728,'length':32}
65 | # The size of the cache to hold changes to the binary log during a transaction.
66 | binlog_cache_size = {'type':'infer','min_value': 4096,'max_value': 134217728,'length':32}
67 | # The cutoff on the size of index values that determines which filesort algorithm to use.
68 | max_length_for_sort_data = {'type':'infer','min_value': 4,'max_value': 8388608,'length':1}
69 | # This variable limits the total number of prepared statements in the server. Setting the value to 0 disables prepared statements.
70 | max_prepared_stmt_count = {'type':'infer','min_value': 0,'max_value': 1048576,'length':1}
71 | # The number of times that any given stored procedure may be called recursively. The default value for this option is 0, which completely disables recursion in stored procedures. The maximum value is 255.
72 | max_sp_recursion_depth = {'type':'infer','min_value': 0,'max_value': 255,'length':1}
73 | # The maximum number of simultaneous connections permitted to any given MySQL user account. A value of 0 (the default) means “no limit.”
74 | max_user_connections = {'type':'infer','min_value': 0,'max_value': 4294967295,'length':1}
75 | # If this value is greater than 1, MyISAM table indexes are created in parallel.
76 | myisam_repair_threads = {'type':'infer','min_value': 1,'max_value': 4294967295,'length':1}
77 | #for test use ,innodb_buffer_pool_instances&innodb_buffer_pool_chunk_size is read only
78 | read_buffer_size = {'type':'infer','min_value': 1,'max_value': 8589934592,'length':32}
79 | # The size of buffer pool (bytes). A larger buffer pool requires less disk I/O to access the same table data more than once.
80 | innodb_buffer_pool_size = {'type':'infer','min_value': 1048576,'max_value': 8589934592,'length':1048576}


--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import configparser
 4 | import json
 5 | 
 6 | 
 7 | class DictParser(configparser.ConfigParser):
 8 |     def read_dict(self):
 9 |         d = dict(self._sections)
10 |         for k in d:
11 |             d[k] = dict(d[k])
12 |         return d
13 | 
14 | 
15 | cf = DictParser()
16 | cf.read("config.ini", encoding="utf-8")
17 | config_dict = cf.read_dict()
18 | 
19 | 
20 | def parse_args():
21 |     return config_dict["database_tune"]
22 | 
23 | 
24 | def parse_knob_config():
25 |     _knob_config = config_dict["knob_config"]
26 |     for key in _knob_config:
27 |         _knob_config[key] = json.loads(str(_knob_config[key]).replace("\'", "\""))
28 |     return _knob_config
29 | 
30 | 
31 | knob_config = parse_knob_config()
32 | # sync with main.py
33 | 
34 | predictor_output_dim = int(config_dict["predictor"]["predictor_output_dim"])
35 | 
36 | predictor_epoch = int(config_dict["predictor"]["predictor_epoch"])
37 | 


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib as mpl
 4 | import matplotlib.pyplot as plt
 5 | from matplotlib import rcParams
 6 | import sys
 7 | 
 8 | 
 9 | def draw_lines(filelist, metric_name):
10 |     '''
11 |     multiple lines on the same metric (y) with increasing iterations (x)
12 |     :param filelist:
13 |     :param metric_name:
14 |     :return: 1 (succeed)/0 (fail)
15 |     '''
16 | 
17 |     ''' Load Data: [QTune] '''
18 |     col_list = ["iteration", metric_name]
19 |     df = pd.read_csv("training-results/" + filelist[0], usecols=col_list, sep="\t")
20 | 
21 |     x_qtune = list(df[col_list[0]])
22 |     x_qtune = [int(x) for x in x_qtune]
23 |     y_qtune = list(df[col_list[1]])
24 |     y_qtune = [float(y) for y in y_qtune]
25 | 
26 |     ''' Load Data: [Random] '''
27 |     col_list = ["iteration", metric_name]
28 |     df = pd.read_csv("training-results/" + filelist[1], usecols=col_list, sep="\t")
29 |     x_random = list(df[col_list[0]])
30 |     x_random = [int(x) for x in x_random]
31 |     y_random = list(df[col_list[1]])
32 |     y_random = [float(y) for y in y_random]
33 | 
34 |     ''' figure drawing '''
35 |     mpl.rcdefaults()
36 |     rcParams.update({
37 |         'xtick.labelsize': 12,
38 |         'ytick.labelsize': 12,
39 |         'axes.labelsize': 15,
40 |         #     'figure.autolayout': True,
41 |         'figure.subplot.hspace': 0.45,
42 |         'figure.subplot.wspace': 0.22,
43 |         #     'mathtext.fontset': 'cm',
44 |     })
45 | 
46 |     fig = plt.figure()
47 | 
48 |     qid = 1
49 |     ax = fig.add_subplot(1, 1, qid)
50 | 
51 |     rf = np.array(y_qtune)
52 |     dt = np.array(y_random)
53 | 
54 |     x = np.arange(1, max(len(x_random), len(x_qtune)) + 5)
55 |     # y = np.arange(0.0, 1.0)
56 | 
57 |     l1, = plt.plot(x_qtune, rf[:len(x_qtune)], marker='D', ms=3, linewidth=1)
58 |     l2, = plt.plot(x_random, dt[:len(x_random)], marker='X', ms=3, linewidth=1)
59 | 
60 |     ax.text(0.5, -0.36,
61 |             f"({chr(ord('a') + qid - 1)}) $D_{{ {qid} }}$",
62 |             horizontalalignment='center', transform=ax.transAxes, fontsize=15, family='serif',
63 |             )
64 |     ax.set_xticks(np.arange(0, len(x), len(x) / 10))
65 |     if metric_name == 'latency':
66 |         y_range = max(max(y_qtune), max(y_random)) + 5
67 |     elif metric_name == 'throughput':
68 |         y_range = max(max(y_qtune), max(y_random)) + 100
69 | 
70 |     ax.set_yticks(np.arange(0, y_range, y_range / 10))
71 |     ax.set_ylim(0, y_range)
72 |     ax.set_xlim(0, len(x))
73 |     ax.set_xlabel('#-Iterations')
74 |     ax.set_ylabel('Performance')
75 | 
76 |     fig.legend([l1, l2], ['QTune', 'Random'],
77 |                loc='upper center', ncol=4,
78 |                handlelength=3,
79 |                columnspacing=6.,
80 |                bbox_to_anchor=(0., 0.98, 1., .0),
81 |                bbox_transform=plt.gcf().transFigure,
82 |                fontsize=10,
83 |                )
84 | 
85 |     plt.savefig('training-results/training.png')
86 | 
87 |     return 1
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     argv = sys.argv
92 |     linelist = argv[1].split(',')
93 |     metric_name = argv[2]
94 |     mark = draw_lines(linelist, metric_name)
95 |     if mark:
96 |         print('Successfully update figure!')
97 |     else:
98 |         print('Fail to update figure!')
99 | 


--------------------------------------------------------------------------------
/environment.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import subprocess
  3 | from collections import deque
  4 | import numpy as np
  5 | 
  6 | import pymysql
  7 | import pymysql.cursors as pycursor
  8 | 
  9 | import gym
 10 | from gym import spaces
 11 | from gym.utils import seeding
 12 | 
 13 | from sql2resource import SqlParser
 14 | 
 15 | from configs import knob_config
 16 | import time
 17 | from run_job import run_job
 18 | 
 19 | 
 20 | # fetch all the knobs from the prepared configuration info
 21 | 
 22 | 
 23 | class Database:
 24 |     def __init__(self, argus):
 25 |         self.argus = argus
 26 |         # self.internal_metric_num = 13 # 13(state) + cumulative()
 27 |         self.external_metric_num = 2  # [throughput, latency]           # num_event / t
 28 |         self.max_connections_num = None
 29 |         self.knob_names = [knob for knob in knob_config]
 30 |         print("knob_names:", self.knob_names)
 31 |         self.knob_num = len(knob_config)
 32 |         self.internal_metric_num = 65  # default system metrics enabled in metric_innodb
 33 |         self.max_connections()
 34 |         try:
 35 |             conn = self._get_conn()
 36 |             cursor = conn.cursor()
 37 |             sql = "SELECT count FROM INFORMATION_SCHEMA.INNODB_METRICS where status='enabled'"
 38 |             cursor.execute(sql)
 39 |             result = cursor.fetchall()
 40 |             self.internal_metric_num = len(result)
 41 |             cursor.close()
 42 |             conn.close()
 43 |         except Exception as err:
 44 |             print("execute sql error:", err)
 45 | 
 46 |     def _get_conn(self):
 47 |         conn = pymysql.connect(host=self.argus['host'],
 48 |                                port=int(self.argus['port']),
 49 |                                user=self.argus['user'],
 50 |                                password=self.argus['password'],
 51 |                                db='INFORMATION_SCHEMA',
 52 |                                connect_timeout=36000,
 53 |                                cursorclass=pycursor.DictCursor)
 54 |         return conn
 55 | 
 56 |     def fetch_internal_metrics(self):
 57 |         ######### observation_space
 58 |         #         State_status
 59 |         # [lock_row_lock_time_max, lock_row_lock_time_avg, buffer_pool_size,
 60 |         # buffer_pool_pages_total, buffer_pool_pages_misc, buffer_pool_pages_data, buffer_pool_bytes_data,
 61 |         # buffer_pool_pages_dirty, buffer_pool_bytes_dirty, buffer_pool_pages_free, trx_rseg_history_len,
 62 |         # file_num_open_files, innodb_page_size]
 63 |         #         Cumulative_status
 64 |         # [lock_row_lock_current_waits, ]
 65 |         '''
 66 |         sql = "select count from INNODB_METRICS where name='lock_row_lock_time_max' or name='lock_row_lock_time_avg'\
 67 |         or name='buffer_pool_size' or name='buffer_pool_pages_total' or name='buffer_pool_pages_misc' or name='buffer_pool_pages_data'\
 68 |         or name='buffer_pool_bytes_data' or name='buffer_pool_pages_dirty' or name='buffer_pool_bytes_dirty' or name='buffer_pool_pages_free'\
 69 |         or name='trx_rseg_history_len' or name='file_num_open_files' or name='innodb_page_size'"
 70 |         '''
 71 |         state_list = np.array([])
 72 |         try:
 73 |             conn = self._get_conn()
 74 |             cursor = conn.cursor()
 75 |             sql = "SELECT count FROM INFORMATION_SCHEMA.INNODB_METRICS where status='enabled'"
 76 |             cursor.execute(sql)
 77 |             result = cursor.fetchall()
 78 |             for s in result:
 79 |                 state_list = np.append(state_list, [s['count']])
 80 |             cursor.close()
 81 |             conn.close()
 82 |         except Exception as error:
 83 |             print(error)
 84 | 
 85 |         return state_list
 86 | 
 87 |     def fetch_knob(self):
 88 |         state_list = np.append([], [])
 89 |         try:
 90 |             conn = self._get_conn()
 91 |             cursor = conn.cursor()
 92 |             sql = "select"
 93 |             for i, knob in enumerate(self.knob_names):
 94 |                 sql = sql + ' @@' + knob
 95 |                 if i < self.knob_num - 1:
 96 |                     sql = sql + ', '
 97 |             # print("fetch_knob:", sql)
 98 |             cursor.execute(sql)
 99 |             result = cursor.fetchall()
100 |             for i in range(self.knob_num):
101 |                 state_list = np.append(state_list, result[0]["@@%s" % self.knob_names[i]])
102 |             cursor.close()
103 |             conn.close()
104 |         except Exception as error:
105 |             print("fetch_knob Error:", error)
106 |         return state_list
107 | 
108 |     def max_connections(self):
109 |         # if not self.max_connections_num:
110 |         if 1:
111 |             try:
112 |                 conn = self._get_conn()
113 |                 cursor = conn.cursor()
114 |                 sql = "show global variables like 'max_connections';"
115 |                 cursor.execute(sql)
116 |                 self.max_connections_num = int(cursor.fetchone()["Value"])
117 |                 cursor.close()
118 |                 conn.close()
119 |             except Exception as error:
120 |                 print(error)
121 |         return self.max_connections_num
122 | 
123 |     def change_knob_nonrestart(self, actions):
124 |         try:
125 |             conn = self._get_conn()
126 |             for i in range(self.knob_num):
127 |                 cursor = conn.cursor()
128 |                 if self.knob_names[i] == 'max_connections':
129 |                     self.max_connections_num = actions[i]
130 |                 sql = 'set global %s=%d' % (self.knob_names[i], actions[i])
131 |                 cursor.execute(sql)
132 |                 # print(f"修改参数-{self.knob_names[i]}:{actions[i]}")
133 |                 conn.commit()
134 |             conn.close()
135 |             return 1
136 |         except Exception as error:
137 |             conn.close()
138 |             print("change_knob_nonrestart error：", error)
139 |             return 0
140 | 
141 | 
142 | # Define the environment
143 | class Environment(gym.Env):
144 | 
145 |     def __init__(self, db, argus):
146 | 
147 |         self.db = db
148 | 
149 |         self.parser = SqlParser(argus)
150 | 
151 |         self.state_num = db.internal_metric_num
152 |         self.action_num = db.knob_num
153 |         self.timestamp = int(time.time())
154 | 
155 |         # pfs = open('training-results/res_predict-' + str(self.timestamp), 'a')
156 |         # pfs.write("%s\t%s\t%s\n" % ('iteration', 'throughput', 'latency'))
157 |         # pfs.close()
158 |         #
159 |         # rfs = open('training-results/res_random-' + str(self.timestamp), 'a')
160 |         # rfs.write("%s\t%s\t%s\n" % ('iteration', 'throughput', 'latency'))
161 |         # rfs.close()
162 | 
163 |         ''' observation dim = system metric dim + query vector dim '''
164 |         self.score = 0  # accumulate rewards
165 | 
166 |         self.o_dim = db.internal_metric_num + len(self.db.fetch_internal_metrics())
167 |         self.o_low = np.array([-1e+10] * self.o_dim)
168 |         self.o_high = np.array([1e+10] * self.o_dim)
169 | 
170 |         self.observation_space = spaces.Box(low=self.o_low, high=self.o_high, dtype=np.float32)
171 |         # part 1: current system metric
172 |         self.state = db.fetch_internal_metrics()
173 |         # print("Concatenated state:")
174 |         # part 2: predicted system metric after executing the workload
175 |         self.workload = argus["workload"]
176 | 
177 |         # TODO: 打开训练predict的方法后，此方法注释
178 |         ################################################################################
179 |         state0 = self.db.fetch_internal_metrics()
180 |         self.preheat()
181 |         state1 = self.db.fetch_internal_metrics()
182 |         try:
183 |             if self.parser.predict_sql_resource_value is None:
184 |                 self.parser.predict_sql_resource_value = state1 - state0
185 |         except Exception as error:
186 |             print("get predict_sql_resource_value error:", error)
187 |         ################################################################################
188 | 
189 |         self.state = np.append(self.parser.predict_sql_resource(self.workload), self.state)
190 | 
191 |         ''' action space '''
192 |         # Offline
193 |         # table_open_cache(1), max_connections(2), innodb_buffer_pool_instances(4),
194 |         # innodb_log_files_in_group(5), innodb_log_file_size(6), innodb_purge_threads(7), innodb_read_io_threads(8)
195 |         # innodb_write_io_threads(9),
196 |         # Online
197 |         # innodb_buffer_pool_size(3), max_binlog_cache_size(10), binlog_cache_size(11)
198 |         # 1 2 3 11
199 |         # exclude
200 |         # innodb_file_per_table, skip_name_resolve, binlog_checksum,
201 |         # binlog_format(dynamic, [ROW, STATEMENT, MIXED]),
202 | 
203 |         calculate_knobs = []
204 |         infer_knobs = []
205 |         for k in knob_config.items():
206 |             if k[1]['type'] == 'infer':
207 |                 infer_knobs.append(k)
208 |             else:
209 |                 calculate_knobs.append(k)
210 |         self.knob_num = len(knob_config)
211 |         # self.a_low = np.array([knob[1]['min_value']/knob[1]['length'] for knob in list(knob_config.items())[:db.knob_num]])
212 |         self.a_low = np.array([knob[1]['min_value'] / knob[1]['length'] for knob in infer_knobs])
213 |         # self.a_high = np.array([knob[1]['max_value']/knob[1]['length'] for knob in list(knob_config.items())[:db.knob_num]])
214 |         self.a_high = np.array([knob[1]['max_value'] / knob[1]['length'] for knob in infer_knobs])
215 |         # self.length = np.array([knob[1]['length'] for knob in list(knob_config.items())[:db.knob_num]])
216 |         self.length = np.array([knob[1]['length'] * 1.0 for knob in infer_knobs])
217 |         self.action_space = spaces.Box(low=self.a_low, high=self.a_high, dtype=np.float32)
218 |         self.default_action = self.a_low
219 |         self.mem = deque(maxlen=int(argus['maxlen_mem']))  # [throughput, latency]
220 |         self.predicted_mem = deque(maxlen=int(argus['maxlen_predict_mem']))
221 |         self.knob2pos = {knob: i for i, knob in enumerate(knob_config)}
222 |         self.seed()
223 |         self.start_time = datetime.datetime.now()
224 | 
225 |     def seed(self, seed=None):
226 |         self.np_random, seed = seeding.np_random(seed)
227 |         return [seed]
228 | 
229 |     def execute_command(self):
230 |         self.db.max_connections()
231 |         # print(self.db.max_connections_num) 
232 |         if self.parser.argus['thread_num_auto'] == '0':
233 |             thread_num = int(self.parser.argus['thread_num'])
234 |         else:
235 |             thread_num = int(self.db.max_connections_num) - 1
236 |         run_job(thread_num, self.workload, self.parser.resfile)
237 | 
238 |     def preheat(self):
239 |         self.execute_command()
240 | 
241 |     def fetch_action(self):
242 |         while True:
243 |             state_list = self.db.fetch_knob()
244 |             if list(state_list):
245 |                 break
246 |             time.sleep(5)
247 |         return state_list
248 | 
249 |     # new_state, reward, done,
250 |     def step(self, u, isPredicted, iteration, action_tmp=None):
251 |         flag = self.db.change_knob_nonrestart(u)
252 | 
253 |         # if failing to tune knobs, give a high panlty
254 |         if not flag:
255 |             return self.state, -10e+4, self.score, 1
256 | 
257 |         self.execute_command()
258 |         throughput, latency = self._get_throughput_latency()
259 |         # ifs = open('fl1', 'r')
260 |         # print(str(len(self.mem)+1)+"\t"+str(throughput)+"\t"+str(latency))
261 |         cur_time = datetime.datetime.now()
262 |         interval = (cur_time - self.start_time).seconds
263 |         self.mem.append([throughput, latency])
264 |         # 2 refetch state
265 |         self._get_obs()
266 |         # 3 cul reward(T, L)
267 |         reward = self._calculate_reward(throughput, latency)
268 |         '''
269 |         reward = 0
270 |         for i in range(u.shape[0]):
271 |             tmp = u[i] / self.a_high[i]
272 |             reward+=tmp
273 |         # print("Performance: %d\t%f\t%f\t%f\t%ds" % (len(self.mem) + 1, throughput, latency, reward, interval))
274 |         if isPredicted:
275 |             self.predicted_mem.append([len(self.predicted_mem), throughput, latency, reward])
276 |             if len(self.predicted_mem)%10 == 0:
277 |                 print("Predict List")
278 |                 print(self.predicted_mem)
279 |        '''
280 | 
281 |         action = self.fetch_action()
282 | 
283 |         if isPredicted:
284 |             self.predicted_mem.append([len(self.predicted_mem), throughput, latency, reward])
285 | 
286 |             # print("Predict %d\t%f\t%f\t%f\t%ds" % (len(self.mem) + 1, throughput, latency, reward, interval))
287 | 
288 |             pfs = open('training-results/res_predict-' + str(self.timestamp), 'a')
289 |             pfs.write("%d\t%f\t%f\n" % (iteration, throughput, latency))
290 |             pfs.close()
291 | 
292 |             fetch_knob = open('training-results/fetch_knob_predict-' + str(self.timestamp), 'a')
293 |             fetch_knob.write(f"{str(iteration)}\t{str(list(action))}\n")
294 |             fetch_knob.close()
295 | 
296 |             action_write = open('training-results/action_test_predict-' + str(self.timestamp), 'a')
297 |             action_write.write(f"{str(iteration)}\t{str(list(u))}\n")
298 |             action_write.write(f"{str(iteration)}\t{str(list(action_tmp))}\n")
299 |             action_write.close()
300 | 
301 |             self.score = self.score + reward
302 | 
303 |         else:
304 |             # print("Random %d\t%f\t%f\t%f\t%ds" % (len(self.mem) + 1, throughput, latency, reward, interval))
305 | 
306 |             rfs = open('training-results/res_random-' + str(self.timestamp), 'a')
307 |             rfs.write("%d\t%f\t%f\n" % (iteration, throughput, latency))
308 |             rfs.close()
309 | 
310 |             action_write = open('training-results/action_random-' + str(self.timestamp), 'a')
311 |             action_write.write(f"{str(iteration)}\t{str(list(u))}\n")
312 |             action_write.close()
313 | 
314 |             fetch_knob = open('training-results/fetch_knob_random-' + str(self.timestamp), 'a')
315 |             fetch_knob.write(f"{str(iteration)}\t{str(list(action))}\n")
316 |             fetch_knob.close()
317 | 
318 |         return self.state, reward, self.score, throughput
319 | 
320 |     def _get_throughput_latency(self):
321 |         with open(self.parser.resfile, 'r') as f:
322 |             try:
323 |                 for line in f.readlines():
324 |                     a = line.split()
325 |                     if len(a) > 1 and 'avg_qps(queries/s):' == a[0]:
326 |                         throughput = float(a[1])
327 |                     if len(a) > 1 and 'avg_lat(s):' == a[0]:
328 |                         latency = float(a[1])
329 |             finally:
330 |                 f.close()
331 |             # print("throughput:{} \n latency:{}".format(throughput, latency))
332 |             return throughput, latency
333 | 
334 |     def _calculate_reward(self, throughput, latency):
335 |         if len(self.mem) != 0:
336 |             dt0 = (throughput - self.mem[0][0]) / self.mem[0][0]
337 |             dt1 = (throughput - self.mem[len(self.mem) - 1][0]) / self.mem[len(self.mem) - 1][0]
338 |             if dt0 >= 0:
339 |                 rt = ((1 + dt0) ** 2 - 1) * abs(1 + dt1)
340 |             else:
341 |                 rt = -((1 - dt0) ** 2 - 1) * abs(1 - dt1)
342 | 
343 |             dl0 = -(latency - self.mem[0][1]) / self.mem[0][1]
344 | 
345 |             dl1 = -(latency - self.mem[len(self.mem) - 1][1]) / self.mem[len(self.mem) - 1][1]
346 | 
347 |             if dl0 >= 0:
348 |                 rl = ((1 + dl0) ** 2 - 1) * abs(1 + dl1)
349 |             else:
350 |                 rl = -((1 - dl0) ** 2 - 1) * abs(1 - dl1)
351 | 
352 |         else:  # initial action
353 |             rt = 0
354 |             rl = 0
355 |         reward = 1 * rl + 9 * rt
356 |         return reward
357 | 
358 |     def _get_obs(self):
359 |         self.state = self.db.fetch_internal_metrics()
360 |         self.state = np.append(self.parser.predict_sql_resource(self.workload), self.state)
361 |         return self.state
362 | 


--------------------------------------------------------------------------------
/generate_sysbench_insert_and_delete_query.py:
--------------------------------------------------------------------------------
 1 | table_count = 20
 2 | rows_count = 10000
 3 | 
 4 | 
 5 | def write_insert_to_file(file_path):
 6 |     with open(file_path, 'a+') as f:
 7 |         for i in range(0, int(rows_count / table_count)):
 8 | 
 9 |             for j in range(1, table_count + 1):
10 |                 insert = "insert into sbtest{} (k, c, pad) values ((select floor(2+rand()*100000)), '{}', '94657455071-01886877449-66853068383-97480802197-06448926027');".format(
11 |                     j, i + 1)
12 |                 f.write(insert)
13 |                 f.write('\n')
14 | 
15 |         f.close()
16 |         print("写入成功")
17 | 
18 | 
19 | def write_delete_to_file(file_path):
20 |     with open(file_path, 'a+') as f:
21 |         for i in range(0, int(rows_count / table_count)):
22 |             for j in range(1, table_count + 1):
23 |                 delete = "delete from sbtest{} order by id desc limit 1;".format(j)
24 |                 f.write(delete)
25 |                 f.write('\n')
26 |         f.close()
27 |         print("写入成功")
28 | 
29 | 
30 | # Press the green button in the gutter to run the script.
31 | if __name__ == '__main__':
32 |     write_insert_to_file("/Users/Four/Desktop/insert.txt")
33 |     write_delete_to_file("/Users/Four/Desktop/delete.txt")
34 | 


--------------------------------------------------------------------------------
/get_res_data_from_file.py:
--------------------------------------------------------------------------------
 1 | def data_from_file(file_path):
 2 |     indexs = []
 3 |     datas = []
 4 |     delaies = []
 5 |     try:
 6 |         with open(file_path, 'r') as f:
 7 |             result = f.read().splitlines()
 8 |             for i in result:
 9 |                 a = i.split()
10 |                 indexs.append(a[0])
11 |                 datas.append(a[1])
12 |                 delaies.append(a[2])
13 |             f.close()
14 |     except Exception as error:
15 |         print(str(error))
16 | 
17 |     print(indexs)
18 |     print(datas)
19 |     print(delaies)
20 | 
21 |     return indexs, datas, delaies
22 | 
23 | 
24 | def knob_data_from_file(file_path):
25 |     indexs = []
26 |     datas = []
27 |     try:
28 |         with open(file_path, 'r') as f:
29 |             result = f.read().splitlines()
30 |             for i in result:
31 |                 a = i.split("\t")
32 |                 print(a)
33 |                 indexs.append(a[0])
34 |                 datas.append(a[1])
35 |             f.close()
36 |     except Exception as error:
37 |         print(str(error))
38 | 
39 |     print(indexs)
40 |     print(datas)
41 | 
42 |     return indexs, datas
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     data_from_file("/Users/Four/Desktop/qtune_results/res_predict-1625452896")
47 |     print("\n\n")
48 |     data_from_file("/Users/Four/Desktop/qtune_results/res_random-1625452896")
49 |     print("\n\n")
50 | 


--------------------------------------------------------------------------------
/get_sysbench_workload_from_slow_file.py:
--------------------------------------------------------------------------------
 1 | def get_workload(file_path):
 2 |     workload = []
 3 |     try:
 4 |         with open(file_path, 'r') as f:
 5 |             result = f.read().splitlines()
 6 |             for i in result:
 7 |                 if ("#" not in i) and ("SET timestamp" not in i) and ("COMMIT" not in i) and ("BEGIN" not in i) and i:
 8 |                     workload.append(i)
 9 |             f.close()
10 |         print(f"共有{len(workload)}条数据")
11 |         with open("/Users/Four/Desktop/workload123.txt", "w+") as sql_file:
12 |             for query in workload:
13 |                 if query:
14 |                     sql_file.write(query + "\n")
15 |             sql_file.close()
16 |     except Exception as error:
17 |         print(str(error))
18 | 
19 | 
20 | if __name__ == '__main__':
21 | 
22 |     # get_workload("/Users/Four/Desktop/slow.log")
23 | 
24 |     for i in range(0, 1):
25 | 
26 |         for j in range(1, 21):
27 |             insert = "insert into sbtest{} (k, c, pad) values ((select floor(2+rand()*100000)), '{}', '94657455071-01886877449-66853068383-97480802197-06448926027');".format(
28 |                 j, i + 1)
29 |             print(insert)
30 | 


--------------------------------------------------------------------------------
/get_workload_from_file.py:
--------------------------------------------------------------------------------
 1 | def get_workload_from_file(file_path):
 2 |     workload = []
 3 |     try:
 4 |         with open(file_path, 'r') as f:
 5 |             result = f.read().splitlines()
 6 |             for i in result:
 7 |                 if i:
 8 |                     workload.append(i)
 9 |             f.close()
10 |     except Exception as error:
11 |         print(str(error))
12 |     return workload
13 | 
14 | 


--------------------------------------------------------------------------------
/knob-effect-test.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import keras.backend as K
  6 | from environment import Database, Environment
  7 | from model import ActorCritic
  8 | from configs import parse_args
  9 | from get_workload_from_file import get_workload_from_file
 10 | 
 11 | if __name__ == "__main__":
 12 | 
 13 |     argus = parse_args()
 14 | 
 15 |     # prepare_training_workloads
 16 |     training_workloads = []
 17 |     workload = get_workload_from_file(argus["workload_file_path"])
 18 |     argus["workload"] = workload
 19 |     sess = tf.Session()
 20 |     K.set_session(sess)
 21 |     db = Database(argus)  # connector knobs metric
 22 |     env = Environment(db, argus)
 23 | 
 24 |     # TODO: 训练predict
 25 |     # sample_times = 2
 26 |     # for i in range(sample_times):
 27 |     #     training_workloads.append(np.random.choice(workload, np.random.randint(len(workload)), replace=False, p=None))
 28 |     # X = []
 29 |     # Y = []
 30 |     # for w in training_workloads:
 31 |     #     vec = env.parser.get_workload_encoding(w)
 32 |     #     X.append(vec.flatten())
 33 |     #     state0 = env.db.fetch_internal_metrics()
 34 |     #     env.preheat()
 35 |     #     state1 = env.db.fetch_internal_metrics()
 36 |     #     Y.append(state1 - state0)
 37 |     # X = np.array(X)
 38 |     # Y = np.array(Y)
 39 |     # env.parser.estimator.fit(X, Y, batch_size=50, epochs=predictor_epoch)
 40 | 
 41 |     # TODO save&load model e.g. env.parser.estimator.save_weights(path)
 42 |     # env.parser.estimator.save_weights(filepath=path)
 43 |     # env.parser.estimator.load_weights(filepath=path)
 44 | 
 45 |     actor_critic = ActorCritic(env, sess, learning_rate=float(argus['learning_rate']),
 46 |                                train_min_size=int(argus['train_min_size']),
 47 |                                size_mem=int(argus['maxlen_mem']), size_predict_mem=int(argus['maxlen_predict_mem']))
 48 | 
 49 |     num_trials = int(argus['num_trial'])  # ?
 50 |     # trial_len  = 500   # ?
 51 |     # ntp
 52 | 
 53 |     interval = 1
 54 |     time.sleep(interval)
 55 |     for value in range(3,12,1):
 56 |         sql = 'set global %s=%d' % ('max_connections', value)
 57 |         conn = db._get_conn()
 58 |         cursor = conn.cursor()
 59 |         cursor.execute(sql)
 60 |         print(f"修改参数-max_connections:{value}")
 61 |         conn.commit()
 62 |         conn.close()
 63 | 
 64 |         env.execute_command()
 65 |         time.sleep(interval)
 66 | 
 67 | 
 68 |     exit()
 69 | 
 70 |     # First iteration
 71 |     cur_state = env._get_obs()  # np.array      (inner_metric + sql)
 72 |     cur_state = cur_state.reshape((1, env.state.shape[0]))
 73 |     # action = env.action_space.sample()
 74 |     action = env.fetch_action()  # np.array
 75 |     action_2 = action.reshape((1, env.knob_num))  # for memory
 76 |     action_2 = action_2[:, :env.action_space.shape[0]]
 77 |     new_state, reward, socre, cur_throughput = env.step(action, 0,
 78 |                                                         1)  # apply the action -> to steady state -> return the reward
 79 |     new_state = new_state.reshape((1, env.state.shape[0]))
 80 |     reward_np = np.array([reward])
 81 |     print(reward_np)
 82 |     actor_critic.remember(cur_state, action_2, reward_np, new_state, False)
 83 |     actor_critic.train(1)  # len<32, useless
 84 | 
 85 |     cur_state = new_state
 86 |     predicted_rewardList = []
 87 |     for epoch in range(num_trials):
 88 |         # env.render()
 89 |         cur_state = cur_state.reshape((1, env.state.shape[0]))
 90 |         action, isPredicted, action_tmp = actor_critic.act(cur_state)
 91 |         # action.tolist()                                          # to execute
 92 |         new_state, reward, score, throughput = env.step(action, isPredicted, epoch + 1, action_tmp)
 93 |         new_state = new_state.reshape((1, env.state.shape[0]))
 94 | 
 95 |         action = env.fetch_action()
 96 |         action_2 = action.reshape((1, env.knob_num))  # for memory
 97 |         action_2 = action_2[:, :env.action_space.shape[0]]
 98 | 
 99 |         if isPredicted == 1:
100 |             predicted_rewardList.append([epoch, reward])
101 | 
102 |         reward_np = np.array([reward])
103 | 
104 |         actor_critic.remember(cur_state, action_2, reward_np, new_state, False)
105 |         actor_critic.train(epoch)
106 | 
107 |         print('============train running==========')
108 | 
109 |         if epoch % 5 == 0:
110 |             print('============save_weights==========')
111 |             actor_critic.actor_model.save_weights('saved_model_weights/actor_weights.h5')
112 |             actor_critic.critic_model.save_weights('saved_model_weights/critic_weights.h5')
113 | 
114 |         if (throughput - cur_throughput) / cur_throughput > float(argus['stopping_throughput_improvement_percentage']):
115 |             print("training end!!")
116 |             env.parser.close_mysql_conn()
117 |             break
118 | 
119 |         cur_state = new_state
120 | 
121 | 
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import keras.backend as K
  6 | from environment import Database, Environment
  7 | from model import ActorCritic
  8 | from configs import parse_args
  9 | from get_workload_from_file import get_workload_from_file
 10 | 
 11 | if __name__ == "__main__":
 12 | 
 13 |     argus = parse_args()
 14 | 
 15 |     # prepare_training_workloads
 16 |     training_workloads = []
 17 |     workload = get_workload_from_file(argus["workload_file_path"])
 18 |     argus["workload"] = workload
 19 |     sess = tf.Session()
 20 |     K.set_session(sess)
 21 |     db = Database(argus)  # connector knobs metric
 22 |     env = Environment(db, argus)
 23 | 
 24 |     # TODO: 训练predict
 25 |     # sample_times = 2
 26 |     # for i in range(sample_times):
 27 |     #     training_workloads.append(np.random.choice(workload, np.random.randint(len(workload)), replace=False, p=None))
 28 |     # X = []
 29 |     # Y = []
 30 |     # for w in training_workloads:
 31 |     #     vec = env.parser.get_workload_encoding(w)
 32 |     #     X.append(vec.flatten())
 33 |     #     state0 = env.db.fetch_internal_metrics()
 34 |     #     env.preheat()
 35 |     #     state1 = env.db.fetch_internal_metrics()
 36 |     #     Y.append(state1 - state0)
 37 |     # X = np.array(X)
 38 |     # Y = np.array(Y)
 39 |     # env.parser.estimator.fit(X, Y, batch_size=50, epochs=predictor_epoch)
 40 | 
 41 |     # TODO save&load model e.g. env.parser.estimator.save_weights(path)
 42 |     # env.parser.estimator.save_weights(filepath=path)
 43 |     # env.parser.estimator.load_weights(filepath=path)
 44 | 
 45 |     actor_critic = ActorCritic(env, sess, learning_rate=float(argus['learning_rate']),
 46 |                                train_min_size=int(argus['train_min_size']),
 47 |                                size_mem=int(argus['maxlen_mem']), size_predict_mem=int(argus['maxlen_predict_mem']))
 48 | 
 49 |     num_trials = int(argus['num_trial'])  # ?
 50 |     # trial_len  = 500   # ?
 51 |     # ntp
 52 | 
 53 | 
 54 |     # First iteration
 55 |     cur_state = env._get_obs()  # np.array      (inner_metric + sql)
 56 |     cur_state = cur_state.reshape((1, env.state.shape[0]))
 57 |     # action = env.action_space.sample()
 58 |     action = env.fetch_action()  # np.array
 59 |     action_2 = action.reshape((1, env.knob_num))  # for memory
 60 |     action_2 = action_2[:, :env.action_space.shape[0]]
 61 |     new_state, reward, socre, cur_throughput = env.step(action, 0,
 62 |                                                         1)  # apply the action -> to steady state -> return the reward
 63 |     new_state = new_state.reshape((1, env.state.shape[0]))
 64 |     reward_np = np.array([reward])
 65 |     print(reward_np)
 66 |     actor_critic.remember(cur_state, action_2, reward_np, new_state, False)
 67 |     actor_critic.train(1)  # len<[train_min_size], useless
 68 | 
 69 |     cur_state = new_state
 70 |     predicted_rewardList = []
 71 |     for epoch in range(num_trials):
 72 |         # env.render()
 73 |         cur_state = cur_state.reshape((1, env.state.shape[0]))
 74 |         action, isPredicted, action_tmp = actor_critic.act(cur_state)
 75 |         # action.tolist()                                          # to execute
 76 |         new_state, reward, score, throughput = env.step(action, isPredicted, epoch + 1, action_tmp)
 77 |         new_state = new_state.reshape((1, env.state.shape[0]))
 78 | 
 79 |         action = env.fetch_action()
 80 |         action_2 = action.reshape((1, env.knob_num))  # for memory
 81 |         action_2 = action_2[:, :env.action_space.shape[0]]
 82 | 
 83 |         if isPredicted == 1:
 84 |             predicted_rewardList.append([epoch, reward])
 85 |             print("[predicted]", action_2,  reward, throughput)
 86 |         else:
 87 |             print("[random]", action_2,  reward, throughput)
 88 | 
 89 |         reward_np = np.array([reward])
 90 | 
 91 |         actor_critic.remember(cur_state, action_2, reward_np, new_state, False)
 92 |         actor_critic.train(epoch)
 93 | 
 94 |         # print('============train running==========')
 95 | 
 96 |         if epoch % 5 == 0:
 97 |             # print('============save_weights==========')
 98 |             actor_critic.actor_model.save_weights('saved_model_weights/actor_weights.h5')
 99 |             actor_critic.critic_model.save_weights('saved_model_weights/critic_weights.h5')
100 |         '''
101 |         if (throughput - cur_throughput) / cur_throughput > float(argus['stopping_throughput_improvement_percentage']):
102 |             print("training end!!")
103 |             env.parser.close_mysql_conn()
104 |             break
105 |         '''
106 | 
107 |         cur_state = new_state
108 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import datetime
  3 | from os import path
  4 | import subprocess
  5 | import time
  6 | from collections import deque
  7 | import numpy as np
  8 | import random
  9 | import tensorflow as tf
 10 | import pandas
 11 | import heapq
 12 | import pymysql
 13 | import pymysql.cursors as pycursor
 14 | 
 15 | import gym
 16 | from gym import spaces
 17 | from gym.utils import seeding
 18 | 
 19 | import os
 20 | from keras.models import Sequential, Model
 21 | from keras.layers import Dense, Dropout, Input
 22 | from keras.layers.merge import Add, Multiply
 23 | from keras.optimizers import Adam
 24 | import keras.backend as K
 25 | from keras.layers.normalization import BatchNormalization
 26 | from keras.wrappers.scikit_learn import KerasRegressor
 27 | from sklearn.preprocessing import StandardScaler
 28 | from configs import knob_config
 29 | from keras.initializers import random_uniform,ones,constant
 30 | 
 31 | 
 32 | # determines how to assign values to each state, i.e. takes the state
 33 | # and action (two-input model) and determines the corresponding value
 34 | # Tunable parameters
 35 | # learning_rate = 0.001
 36 | # epsilon = 1.0
 37 | # epsilon_decay = .995
 38 | # gamma = .95
 39 | # tau   = .125
 40 | # 4*relu
 41 | class ActorCritic:
 42 |     def __init__(self, env, sess, learning_rate=0.001, train_min_size=32, size_mem=2000, size_predict_mem=2000):
 43 |         self.env = env
 44 | 
 45 |         self.sess = sess
 46 |         self.learning_rate = learning_rate  # 0.001
 47 |         self.train_min_size = train_min_size
 48 |         self.epsilon = .9
 49 |         self.epsilon_decay = .999
 50 |         self.gamma = .095
 51 |         self.tau = .125
 52 |         self.timestamp = int(time.time())
 53 |         # ===================================================================== #
 54 |         #                               Actor Model                             #
 55 |         # Chain rule: find the gradient of chaging the actor network params in  #
 56 |         # getting closest to the final value network predictions, i.e. de/dA    #
 57 |         # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act #
 58 |         # ===================================================================== #
 59 |         self.memory = deque(maxlen=size_mem)
 60 |         self.mem_predicted = deque(maxlen=size_predict_mem)
 61 |         self.actor_state_input, self.actor_model = self.create_actor_model()
 62 |         _, self.target_actor_model = self.create_actor_model()
 63 | 
 64 |         self.actor_critic_grad = tf.placeholder(tf.float32,
 65 |                                                 [None, self.env.action_space.shape[
 66 |                                                     0]])  # where we will feed de/dC (from critic)
 67 | 
 68 |         # load pre-trained models
 69 |         # if os.path.exists('saved_model_weights/actor_weights.h5'):
 70 |         #     self.actor_model.load_weights('saved_model_weights/actor_weights.h5')
 71 |         #     self.target_actor_model.load_weights('saved_model_weights/actor_weights.h5')
 72 | 
 73 |         actor_model_weights = self.actor_model.trainable_weights
 74 |         self.actor_grads = tf.gradients(self.actor_model.output,
 75 |                                         actor_model_weights, -self.actor_critic_grad)  # dC/dA (from actor)
 76 |         grads = zip(self.actor_grads, actor_model_weights)
 77 |         self.optimize = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(grads)
 78 | 
 79 |         # ===================================================================== #
 80 |         #                              Critic Model                             #
 81 |         # ===================================================================== #
 82 | 
 83 |         self.critic_state_input, self.critic_action_input, \
 84 |         self.critic_model = self.create_critic_model()
 85 |         _, _, self.target_critic_model = self.create_critic_model()
 86 | 
 87 |         #if os.path.exists('saved_model_weights/critic_weights.h5'):
 88 |         #     self.critic_model.load_weights('saved_model_weights/critic_weights.h5')
 89 |         #     self.target_critic_model.load_weights('saved_model_weights/critic_weights.h5')
 90 | 
 91 |         # print('de:', self.critic_model.output)
 92 |         # print('dC:', self.critic_action_input)
 93 | 
 94 |         self.critic_grads = tf.gradients(self.critic_model.output,
 95 |                                          self.critic_action_input)  # where we calcaulte de/dC for feeding above
 96 | 
 97 |         # Initialize for later gradient calculations
 98 |         self.sess.run(tf.initialize_all_variables())
 99 | 
100 |     # ========================================================================= #
101 |     #                              Model Definitions                            #
102 |     # ========================================================================= #
103 | 
104 |     def create_actor_model(self):
105 |         def target_range(x, target_min=self.env.a_low, target_max=self.env.a_high):
106 |             x02 = K.tanh(x) + 1  # x in range(0,2)
107 |             scale = (target_max - target_min) / 2.
108 |             return x02 * scale + target_min
109 | 
110 |         #def target_range(x, target_min=self.env.a_low, target_max=self.env.a_high):
111 |         #    scale = (target_max - target_min)
112 |         #    return scale * K.sigmoid(x) + target_min
113 | 
114 |         state_input = Input(shape=self.env.observation_space.shape)
115 | 
116 |         h1 = Dense(128,name = 'h1', activation='relu')(state_input)
117 |         n1 =BatchNormalization(axis=1,center=False,scale=False,name='n1')(h1)
118 |         h2 = Dense(64, name = 'h2',activation='tanh')(n1)
119 |         d1 = Dropout(0.3)(h2)
120 |         # add a dense-tanh expend the space!!
121 |         #n1 = BatchNormalization(name='n1',center=False,scale=False)(d1)
122 |         output = Dense(self.env.action_space.shape[0],activation=target_range)(d1)
123 | 
124 |         model = Model(input=state_input, output=output)
125 |         adam = Adam(lr=0.001)
126 |         model.compile(loss="mse", optimizer=adam)
127 | 
128 |         return state_input, model
129 | 
130 |     def create_critic_model(self):
131 |         # (dense dense)->dense->dense->BN->dense
132 | 
133 |         state_input = Input(shape=self.env.observation_space.shape)
134 |         state_h1 = Dense(128)(state_input)
135 |         # state_h2 = Dense(13)(state_h1)
136 | 
137 |         action_input = Input(shape=self.env.action_space.shape)
138 |         action_h1 = Dense(128)(action_input)  #
139 | 
140 |         merged = Add()([state_h1, action_h1])
141 |         merged_h1 = Dense(int(256))(merged)
142 |         h2 = Dense(256)(merged_h1)
143 |         n1 = BatchNormalization()(h2)
144 |         h3 = Dense(64,activation='tanh')(n1)
145 |         d1 = Dropout(0.3)(h3)
146 |         n1 = BatchNormalization()(d1)
147 |         output = Dense(1)(n1)
148 | 
149 |         model = Model(input=[state_input, action_input], output=output)
150 | 
151 |         adam = Adam(lr=0.001)
152 |         model.compile(loss="mse", optimizer=adam,metrics=['mse'])
153 |         return state_input, action_input, model
154 | 
155 |     # ========================================================================= #
156 |     #                               Model Training                              #
157 |     # ========================================================================= #
158 | 
159 |     def remember(self, cur_state, action, reward, new_state, done):
160 |         self.memory.append([cur_state, action, reward, new_state, done])
161 |         # print("Mem: Q-%f"%reward)
162 | 
163 |     def _train_actor(self, samples, i):
164 |         for sample in samples:
165 |             cur_state, action, reward, new_state, _ = sample
166 |             cur_state = (cur_state - min(cur_state[0]))/(max(cur_state[0])-min(cur_state[0]))
167 |             predicted_action = self.actor_model.predict(cur_state)
168 |             h1 = Model(self.actor_model.input,self.actor_model.get_layer('h1').output)
169 |             h2 = Model(self.actor_model.input, self.actor_model.get_layer('h2').output)
170 |             n1 = Model(self.actor_model.input,self.actor_model.get_layer('n1').output)
171 |             print('predicted_action'*5)
172 |             print(predicted_action)
173 |             print(h1.predict(cur_state))
174 |             print(h2.predict(cur_state))
175 |             res_n1 = n1.predict(cur_state)[0]
176 |             print(res_n1)
177 |             print(np.mean(res_n1))
178 |             print(np.std(res_n1))
179 |             # print("predicted action", predicted_action)
180 |             grads = self.sess.run(self.critic_grads, feed_dict={
181 |                 self.critic_state_input: cur_state,
182 |                 self.critic_action_input: predicted_action
183 |             })[0]
184 |             # print("first gradient",grads)
185 |             self.sess.run(self.optimize, feed_dict={
186 |                 self.actor_state_input: cur_state,
187 |                 self.actor_critic_grad: grads
188 |             })
189 |             writer = open('training-results/training-' + str(self.timestamp), 'a')
190 |             # writer.write(f"{str(i)}\t{str(list(self.actor_critic_grad))}\n")
191 |             writer.write('grads')
192 |             writer.write(f"{str(i)}\t{str(list(grads))}\n")
193 |             writer.write('cur_state\n')
194 |             writer.write(str(cur_state)+'\n')
195 |             writer.write('predicted_action\n')
196 |             writer.write(str(predicted_action)+'\n')
197 |             writer.close()
198 | 
199 |     def _train_critic(self, samples,i):
200 |         for sample in samples:
201 |             cur_state, action, t_reward, new_state, done = sample
202 |             reward = np.array([])
203 |             reward = np.append(reward, t_reward[0])
204 |             cur_state = (cur_state - min(cur_state[0]))/(max(cur_state[0])-min(cur_state[0]))
205 |             # print("<>Q-value:")
206 |             # print(reward)
207 |             # if not done:
208 |             target_action = self.target_actor_model.predict(new_state)
209 |             future_reward = self.target_critic_model.predict(
210 |                 [new_state, target_action])[0][0]
211 |             print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
212 |             print("future_reward:", future_reward)
213 |             reward += self.gamma * future_reward
214 |             print("reward:", reward)
215 |             print("target_action:",target_action)
216 |             print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
217 |             # There comes the convert
218 |             # print("Look:")
219 |             # print(cur_state.shape)
220 |             # print(action.shape)
221 |             # print(reward.shape)
222 |             # print(reward)
223 |             loss = self.critic_model.fit([cur_state, action], reward, verbose=1)  # update the Q-value
224 |             writer = open('training-results/critic_training-' + str(self.timestamp), 'a')
225 |             writer.write('epoch:\t'+str(i)+'\n')
226 |             writer.write('critic_loss\t')
227 |             writer.write(f"{str(loss.history['loss'])}\n")
228 |             writer.write('reward:\t')
229 |             writer.write(f"{str(reward)}\n")
230 |             writer.close()
231 |     def train(self, i):
232 |         self.batch_size = self.train_min_size  # 32
233 |         if len(self.memory) < self.batch_size:
234 |             return
235 |         mem = list(self.memory)
236 |         rewards = [i[2][0] for i in mem]
237 |         indexs = heapq.nlargest(self.batch_size, range(len(rewards)), rewards.__getitem__)
238 |         samples = []
239 |         for i in indexs:
240 |             samples.append(mem[i])
241 |         samples = random.sample(list(self.memory), self.batch_size - 2)
242 |         writer = open('training-results/training-' + str(self.timestamp), 'a')
243 |         writer.write('samples\n')
244 |         writer.write(f"{str(i)}\t{str(np.array(samples)[:,2])}\n")
245 |         writer.close()
246 |         # print(samples)
247 |         self._train_critic(samples,i)
248 |         self._train_actor(samples, i)
249 |         self.update_target()
250 | 
251 |     # ========================================================================= #
252 |     #                         Target Model Updating                             #
253 |     # ========================================================================= #
254 | 
255 |     def _update_actor_target(self):
256 |         actor_model_weights = self.actor_model.get_weights()
257 |         actor_target_weights = self.target_actor_model.get_weights()
258 | 
259 |         for i in range(len(actor_target_weights)):
260 |             actor_target_weights[i] = actor_model_weights[i]
261 |         self.target_actor_model.set_weights(actor_target_weights)
262 | 
263 |     def _update_critic_target(self):
264 |         critic_model_weights = self.critic_model.get_weights()
265 |         critic_target_weights = self.target_critic_model.get_weights()
266 | 
267 |         for i in range(len(critic_target_weights)):
268 |             critic_target_weights[i] = critic_model_weights[i]
269 |         self.target_critic_model.set_weights(critic_target_weights)
270 | 
271 |     def update_target(self):
272 |         self._update_actor_target()
273 |         self._update_critic_target()
274 | 
275 |     # ========================================================================= #
276 |     #                              Model Predictions                            #
277 |     # ========================================================================= #
278 | 
279 |     def get_calculate_knobs(self, action):
280 |         caculate_knobs = list(knob_config)[len(action):]
281 |         for k in caculate_knobs:
282 | 
283 |             if knob_config[k]['operator'] == 'multiply':
284 |                 pos_x = self.env.knob2pos[knob_config[k]['x']]
285 |                 pos_y = self.env.knob2pos[knob_config[k]['y']]
286 |                 tmp = action[pos_x] * action[pos_y]
287 |                 action = np.append(action, tmp)
288 |         return action
289 | 
290 |     def act(self, cur_state):
291 |         self.epsilon *= self.epsilon_decay
292 |         action_tmp = None
293 |         if np.random.random(1) < self.epsilon or len(self.memory) < self.batch_size:
294 |             print("[Random Tuning]")
295 |             action = np.round(self.env.action_space.sample())
296 |             action = action.astype(np.float64)
297 |             flag = 0
298 |         else:
299 |             print("[Model Tuning]")
300 |             # action = np.round(self.actor_model.predict(cur_state)[0])
301 |             cur_state = (cur_state - min(cur_state[0]))/(max(cur_state[0])-min(cur_state[0]))
302 |             action = self.actor_model.predict(cur_state)[0]
303 |             print(action)
304 |             # TODO: 临时参数，查看状态使用
305 |             action_tmp = action
306 |             action = np.round(action)
307 |             action = action.astype(np.float64)
308 |             flag = 1
309 | 
310 |         for i in range(action.shape[0]):
311 |             if action[i] <= self.env.default_action[i]:
312 |                 print("[Action %d] Lower than DEFAULT: %f" % (i, action[i]))
313 |                 action[i] = int(self.env.default_action[i]) * int(self.env.length[i])
314 |             elif action[i] > self.env.a_high[i]:
315 |                 print("[Action %d] Higher than MAX: %f" % (i, action[i]))
316 |                 action[i] = int(self.env.a_high[i]) * int(self.env.length[i])
317 |             else:
318 |                 action[i] = action[i] * self.env.length[i]
319 | 
320 |         action = self.get_calculate_knobs(action)
321 | 
322 |         return action, flag, action_tmp
323 | 
324 | 


--------------------------------------------------------------------------------
/pre-processing/knob_rankiing.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.linear_model import lasso_path
  3 | 
  4 | '''
  5 | from OtterTune - lasso.py
  6 | https://github.com/cmu-db/ottertune/edit/master/server/analysis/lasso.py
  7 | '''
  8 | 
  9 | class LassoPath():
 10 |     """Lasso:
 11 | 
 12 |     Computes the Lasso path using Sklearn's lasso_path method.
 13 | 
 14 | 
 15 |     See also
 16 |     --------
 17 |     http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.lasso_path.html
 18 | 
 19 | 
 20 |     Attributes
 21 |     ----------
 22 |     feature_labels_ : array, [n_features]
 23 |                       Labels for each of the features in X.
 24 | 
 25 |     alphas_ : array, [n_alphas]
 26 |               The alphas along the path where models are computed. (These are
 27 |               the decreasing values of the penalty along the path).
 28 | 
 29 |     coefs_ : array, [n_outputs, n_features, n_alphas]
 30 |              Coefficients along the path.
 31 | 
 32 |     rankings_ : array, [n_features]
 33 |              The average ranking of each feature across all target values.
 34 |     """
 35 |     def __init__(self):
 36 |         self.feature_labels_ = None
 37 |         self.alphas_ = None
 38 |         self.coefs_ = None
 39 |         self.rankings_ = None
 40 | 
 41 |     def _reset(self):
 42 |         """Resets all attributes (erases the model)"""
 43 |         self.feature_labels_ = None
 44 |         self.alphas_ = None
 45 |         self.coefs_ = None
 46 |         self.rankings_ = None
 47 | 
 48 |     def fit(self, X, y, feature_labels, estimator_params=None):
 49 |         """Computes the Lasso path using Sklearn's lasso_path method.
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         X : array-like, shape (n_samples, n_features)
 54 |             Training data (the independent variables).
 55 | 
 56 |         y : array-like, shape (n_samples, n_outputs)
 57 |             Training data (the output/target values).
 58 | 
 59 |         feature_labels : array-like, shape (n_features)
 60 |                          Labels for each of the features in X.
 61 | 
 62 |         estimator_params : dict, optional
 63 |                            The parameters to pass to Sklearn's Lasso estimator.
 64 | 
 65 |         Returns
 66 |         -------
 67 |         self
 68 |         """
 69 | 
 70 |         self._reset()
 71 |         if estimator_params is None:
 72 |             estimator_params = {}
 73 |         self.feature_labels_ = feature_labels
 74 | 
 75 |         alphas, coefs, _ = lasso_path(X, y, **estimator_params)
 76 |         self.alphas_ = alphas.copy()
 77 |         self.coefs_ = coefs.copy()
 78 | 
 79 |         # Rank the features in X by order of importance. This ranking is based
 80 |         # on how early a given features enter the regression (the earlier a
 81 |         # feature enters the regression, the MORE important it is).
 82 |         feature_rankings = [[] for _ in range(X.shape[1])]
 83 |         for target_coef_paths in self.coefs_:
 84 |             for i, feature_path in enumerate(target_coef_paths):
 85 |                 entrance_step = 1
 86 |                 for val_at_step in feature_path:
 87 |                     if val_at_step == 0:
 88 |                         entrance_step += 1
 89 |                     else:
 90 |                         break
 91 |                 feature_rankings[i].append(entrance_step)
 92 |         self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])
 93 |         return self
 94 | 
 95 |     def get_ranked_features(self):
 96 |         if self.rankings_ is None:
 97 |             raise Exception("No lasso path has been fit yet!")
 98 | 
 99 |         rank_idxs = np.argsort(self.rankings_)
100 |         return [self.feature_labels_[i] for i in rank_idxs]
101 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | gym==0.18.0
 2 | 
 3 | Keras==2.3.0
 4 | 
 5 | matplotlib==3.1.1
 6 | 
 7 | numpy==1.17.2
 8 | 
 9 | pandas==0.25.2
10 | 
11 | paramiko==2.7.2
12 | 
13 | PyMySQL==0.10.1
14 | 
15 | pyparsing==2.4.2
16 | 
17 | python-dateutil==2.8.0
18 | 
19 | tensorflow==1.15.0
20 | 
21 | sklearn==0.0
22 | 
23 | DBUtils==2.0.1
24 | 


--------------------------------------------------------------------------------
/run_job.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import time
  3 | import queue
  4 | import pymysql
  5 | from configs import parse_args
  6 | from dbutils.pooled_db import PooledDB
  7 | 
  8 | args = parse_args()
  9 | lock = threading.Lock()
 10 | total_lat = 0
 11 | error_query_num = 0
 12 | 
 13 | POOL = None
 14 | 
 15 | 
 16 | # 把任务放入队列中
 17 | class Producer(threading.Thread):
 18 |     def __init__(self, name, queue, workload):
 19 |         self.__name = name
 20 |         self.__queue = queue
 21 |         self.workload = workload
 22 |         super(Producer, self).__init__()
 23 | 
 24 |     def run(self):
 25 |         for index, query in enumerate(self.workload):
 26 |             self.__queue.put(str(index) + "~#~" + query)
 27 | 
 28 | 
 29 | # 线程处理任务
 30 | class Consumer(threading.Thread):
 31 |     def __init__(self, name, queue):
 32 |         self.__name = name
 33 |         self.__queue = queue
 34 |         super(Consumer, self).__init__()
 35 | 
 36 |     def run(self):
 37 |         while not self.__queue.empty():
 38 |             query = self.__queue.get()
 39 |             try:
 40 |                 consumer_process(query)
 41 |             finally:
 42 |                 self.__queue.task_done()
 43 | 
 44 | 
 45 | def consumer_process(task_key):
 46 |     query = task_key.split('~#~')[1]
 47 |     if query:
 48 | 
 49 |         start = time.time()
 50 |         result = mysql_query(query)
 51 |         end = time.time()
 52 |         interval = end - start
 53 | 
 54 |         if result:
 55 |             lock.acquire()
 56 |             global total_lat
 57 |             total_lat += interval
 58 |             lock.release()
 59 | 
 60 |         else:
 61 |             global error_query_num
 62 |             lock.acquire()
 63 |             error_query_num += 1
 64 |             lock.release()
 65 | 
 66 | 
 67 | def startConsumer(thread_num, queue):
 68 |     t_consumer = []
 69 |     for i in range(thread_num):
 70 |         c = Consumer(i, queue)
 71 |         c.setDaemon(True)
 72 |         c.start()
 73 |         t_consumer.append(c)
 74 |     return t_consumer
 75 | 
 76 | 
 77 | def run_job(thread_num=1, workload=[], resfile="../output.res"):
 78 |     global total_lat
 79 |     total_lat = 0
 80 |     global error_query_num
 81 |     error_query_num = 0
 82 |     workload_len = len(workload)
 83 | 
 84 |     global POOL
 85 |     POOL = PooledDB(
 86 |         creator=pymysql,  # 使用链接数据库的模块
 87 |         maxconnections=thread_num,  # 连接池允许的最大连接数，0和None表示不限制连接数
 88 |         mincached=0,  # 初始化时，链接池中至少创建的空闲的链接，0表示不创建
 89 |         maxcached=0,  # 链接池中最多闲置的链接，0和None不限制
 90 |         maxshared=0,
 91 |         blocking=True,  # 连接池中如果没有可用连接后，是否阻塞等待。True，等待；False，不等待然后报错
 92 |         maxusage=None,  # 一个链接最多被重复使用的次数，None表示无限制
 93 |         setsession=[],  # 开始会话前执行的命令列表。
 94 |         ping=0,
 95 |         # ping MySQL服务端，检查是否服务可用。
 96 |         host=args["host"],
 97 |         port=int(args["port"]),
 98 |         user=args["user"],
 99 |         password=args["password"],
100 |         database=args["database"],
101 |         charset='utf8'
102 |     )
103 | 
104 |     main_queue = queue.Queue(maxsize=0)
105 |     p = Producer("Producer Query", main_queue, workload)
106 |     p.setDaemon(True)
107 |     p.start()
108 |     startConsumer(thread_num, main_queue)
109 |     # 确保所有的任务都生成
110 |     p.join()
111 |     start = time.time()
112 |     print("run_job开始运行,线程数：", thread_num)
113 |     # 等待处理完所有任务
114 |     main_queue.join()
115 |     POOL.close()
116 |     run_time = round(time.time() - start, 1)
117 |     run_query_num = workload_len - error_query_num
118 |     if run_query_num == 0:
119 |         avg_lat = 0
120 |         avg_qps = 0
121 |     else:
122 |         avg_lat = total_lat / run_query_num
123 |         avg_qps = run_query_num / run_time
124 |     text = "\navg_qps(queries/s): \t{}\navg_lat(s): \t{}\n".format(round(avg_qps, 4), round(avg_lat, 4))
125 |     with open(resfile, "w+") as f:
126 |         f.write(text)
127 |         f.close()
128 |     print("run_job运行结束\n脚本总耗时:{}秒,sql执行总耗时:{}秒\n共有{}条数据，执行成功{}条\n{}".format(str(run_time), str(total_lat),
129 |                                                                             str(workload_len),
130 |                                                                             str(run_query_num),
131 |                                                                             text))
132 | 
133 |     return round(avg_qps, 4), round(avg_lat, 4)
134 | 
135 | 
136 | def mysql_query(sql: str) -> bool:
137 |     try:
138 |         global POOL
139 |         conn = POOL.connection()
140 |         cursor = conn.cursor()
141 |         cursor.execute(sql)
142 |         cursor.close()
143 |         conn.commit()
144 |         return True
145 |     except Exception as error:
146 |         print("mysql execute: " + str(error))
147 |         return False
148 | 


--------------------------------------------------------------------------------
/run_workload.py:
--------------------------------------------------------------------------------
 1 | from configs import parse_args
 2 | from get_workload_from_file import get_workload_from_file
 3 | from run_job import run_job
 4 | import time
 5 | 
 6 | if __name__ == "__main__":
 7 | 
 8 |     argus = parse_args()
 9 | 
10 |     # prepare_training_workloads
11 |     workload = get_workload_from_file(argus["workload_file_path"])
12 | 
13 |     file_path = 'training-results/res_no_change-' + str(int(time.time()))
14 |     for i in range(0, 3):
15 |         pfs = open(file_path, 'a')
16 |         avg_qps, avg_lat = run_job(int(argus["thread_num"]), workload)
17 |         pfs.write("%d\t%s\t%s\n" % (i, avg_qps, avg_lat))
18 |         pfs.close()
19 | 
20 | 


--------------------------------------------------------------------------------
/saved_model_weights-141threads/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-141threads/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights-141threads/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-141threads/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights-2021-07-22-2/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-2021-07-22-2/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights-2021-07-22-2/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-2021-07-22-2/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights-2021-07-22/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-2021-07-22/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights-2021-07-22/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights-2021-07-22/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights/rewards.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights/rewards.txt


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-07/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-07/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-07/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-07/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-08/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-08/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-08/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-08/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-09/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-09/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021-07-09/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021-07-09/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_02_23_39_12/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_02_23_39_12/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_02_23_39_12/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_02_23_39_12/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_11/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_11/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_11/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_11/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_11_2/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_11_2/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_2021_07_11_2/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_2021_07_11_2/critic_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_tmp/actor_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_tmp/actor_weights.h5


--------------------------------------------------------------------------------
/saved_model_weights_tmp/critic_weights.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XuanheZhou/qtune-mysql/741a42b1c4582ecce8c1cb0ee2aa6db052125af0/saved_model_weights_tmp/critic_weights.h5


--------------------------------------------------------------------------------
/scripts/output.res:
--------------------------------------------------------------------------------
1 | 
2 | avg_qps(queries/s): 	753.9394
3 | avg_lat(s): 	0.1227
4 | 


--------------------------------------------------------------------------------
/scripts/run_job.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # run_job.sh  selectedList.txt  queries_dir   output
 3 | 
 4 | avg_lat=0
 5 | avg_tps=0
 6 | count=0
 7 | 
 8 | printf "query\tlat(ms)\n" > $7
 9 | 
10 | tmp_fifofile="/tmp/$$.fifo"
11 | mkfifo $tmp_fifofile
12 | exec 6<>$tmp_fifofile
13 | rm $tmp_fifofile
14 | 
15 | thread_num=$8
16 | 
17 | rm "/tmp/avg_lat_pipef.txt"
18 | rm "/tmp/avg_tps_pipef.txt"
19 | rm "/tmp/count_pipef.txt"
20 | 
21 | for ((i=0;i<${thread_num};i++));do
22 |     echo
23 | done >&6
24 | 
25 | for file in $6/*;
26 | do
27 |     read -u6
28 |     {
29 |         tmp=$(mysql -h$1 -p$2 -u$3 -p$4 $5 < $file | tail -n 1 )
30 |         query=`echo $tmp | awk '{print $1}'`
31 |         lat=`echo $tmp | awk '{print $2}'`
32 |         mysql -h$1 -p$2 -u$3 -p$4 $5 -e"\q"
33 |         tps=$(echo "scale=4; 60000 / $lat" | bc)
34 | 
35 |         echo "scale=4; $lat / 1000" | bc >> "/tmp/avg_lat_pipef.txt"
36 |         echo "scale=4; $tps" | bc >> "/tmp/avg_tps_pipef.txt"
37 |         echo "1" | bc >> "/tmp/count_pipef.txt"
38 |         printf "$query\t$lat\n" >> $7
39 |         echo >&6
40 |     } &
41 | done
42 | wait
43 | exec 6>&-
44 | 
45 | avg_lat=$(echo $(echo -n `cat /tmp/avg_lat_pipef.txt | awk '{print $1}'`| tr ' ' '+')|bc)
46 | avg_tps=$(echo $(echo -n `cat /tmp/avg_tps_pipef.txt | awk '{print $1}'`| tr ' ' '+')|bc)
47 | count=$(echo $(echo -n `cat /tmp/count_pipef.txt | awk '{print $1}'`| tr ' ' '+')|bc)
48 | 
49 | rm "/tmp/avg_lat_pipef.txt"
50 | rm "/tmp/avg_tps_pipef.txt"
51 | rm "/tmp/count_pipef.txt"
52 | 
53 | avg_lat=`echo "scale=4; $avg_lat/$count" | bc`
54 | avg_tps=`echo "scale=4; $avg_tps/$count" | bc`
55 | 
56 | printf "\navg_tps(txn/min): \t%5.4f\navg_lat(ms): \t%5.4f\n" $avg_tps $avg_lat >> $7
57 | 


--------------------------------------------------------------------------------
/sql2resource.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas
  3 | import json
  4 | import os
  5 | import pymysql
  6 | from keras.models import Sequential, Model
  7 | from keras.layers import Dense, Dropout, Input
  8 | from configs import predictor_output_dim
  9 | 
 10 | query_types = ["insert", "delete", "update", "select"]
 11 | 
 12 | 
 13 | # base prediction model
 14 | def baseline_model(num_feature=len(query_types)):
 15 |     # create model
 16 |     model = Sequential()
 17 |     model.add(Dense(10, input_dim=num_feature, kernel_initializer='normal', activation='relu'))
 18 |     model.add(Dense(10, kernel_initializer='normal', activation='relu'))
 19 |     model.add(Dense(predictor_output_dim, kernel_initializer='normal'))
 20 |     # Compile model
 21 |     model.compile(loss='mean_squared_error', optimizer='adam')
 22 | 
 23 |     return model
 24 | 
 25 | 
 26 | class SqlParser:
 27 |     ###########################################################################
 28 |     # DML: select delete insert update      0 1 2 3
 29 |     # select {select_expr}
 30 |     # <modifier> (The first is default)
 31 |     # [ALL | DISTINCT | DISTINCTROW]
 32 |     # [0 | HIGH_PRIORITY], faster than update, with table-level lock
 33 |     # [0 | STRAIGHT_JOIN],
 34 |     # [0 | SQL_SMALL_RESULT | SQL_BIG_RESULT]
 35 |     # [0 | SQL_BUFFER_RESULT]
 36 |     # [SQL_CACHE | SQL_NO_CACHE]
 37 |     # [SQL_CALC_FOUND_ROWS]
 38 | 
 39 |     # OLTP (workload2vector)
 40 |     # select*w1 + sum(modifiers)*w2 + num({select_expr})*wl3        # 0.7 0.1 0.2
 41 |     # from [table]
 42 |     # [WHERE where_condition]   range join
 43 | 
 44 |     # OLTP (sql2vector)
 45 |     # cost-vector: [Aggregate, Nested Loop, Index Scan, Hash_Join]
 46 | 
 47 |     # Keywords
 48 |     # [GROUP BY {col_name | expr | position}]
 49 |     # [ASC | DESC], ...[WITH ROLLUP]
 50 |     # [HAVING where_condition]
 51 |     # [ORDER BY {col_name | expr | position}]
 52 |     # [ASC | DESC], ...
 53 | 
 54 |     # sum(group_table_scale(having)*wi) + order_cost*wi
 55 |     ###########################################################################
 56 | 
 57 |     def __init__(self, argus):
 58 | 
 59 |         self.resfile = os.path.join("scripts/") + "output.res"
 60 |         self.argus = argus
 61 |         self.conn = self.mysql_conn()
 62 |         self.tables = self.get_database_tables()
 63 |         self.query_encoding_map = {}
 64 |         ########### Convert from the sql statement to the sql vector
 65 |         #  directly read vector from a file (so a python2 script needs to run first!)
 66 |         #  sql_type * (num_events, C, aggregation, in-mem)
 67 |         #############################################################################################################################
 68 | 
 69 |         # query encoding features
 70 |         self.op_weight = {'oltp_point_select': 1, 'select_random_ranges': 2, 'oltp_delete': 3,
 71 |                           'oltp_insert': 4, 'bulk_insert': 5, 'oltp_update_index': 6,
 72 |                           'oltp_update_non_index': 7, }
 73 |         self.num_event = int(argus['num_event'])
 74 |         self.C = [10000]
 75 |         self.group_cost = 0
 76 |         self.in_mem = 0
 77 |         self.predict_sql_resource_value = None
 78 |         self.estimator = baseline_model(len(query_types) + len(self.tables))
 79 |         # Prepare Data
 80 |         fs = open("training-data/trainData_sql.txt", 'r')
 81 |         df = pandas.read_csv(fs, sep=' ', header=None)
 82 |         lt_sql = df.values
 83 |         # seperate into input X and output Y
 84 |         sql_op = lt_sql[:, 0]
 85 |         sql_X = lt_sql[:, 1:5]  # op_type   events  table_size
 86 |         sql_Y = lt_sql[:, 5:]
 87 | 
 88 |     def query_encoding(self, query):
 89 | 
 90 |         if not query:
 91 |             print("query is empty")
 92 |             return []
 93 | 
 94 |         if self.query_encoding_map.get(str(query), None):
 95 |             return self.query_encoding_map[str(query)]
 96 | 
 97 |         result = [0 for i in range(len(self.tables) + len(query_types))]
 98 |         # [0, 0, 0, 0, X, X, X..........]
 99 |         query_split_list = query.lower().split(" ")
100 | 
101 |         for index, query_type in enumerate(query_types):
102 |             if query_type in query_split_list:
103 |                 result[index] = 1
104 | 
105 |         query_split_list = query.replace(",", "").split(" ")
106 | 
107 |         explain_format_fetchall = self.mysql_query("EXPLAIN FORMAT=JSON {};".format(query))
108 |         if not explain_format_fetchall:
109 |             print("explain_format_fetchall is empty, query: {}".format(query))
110 |             return []
111 |         explain_format = json.loads(explain_format_fetchall[0][0])
112 |         explain_format_tables_list = self.get_explain_format_tables_list([], explain_format.get("query_block"), "table")
113 |         for explain_format_table in explain_format_tables_list:
114 |             explain_format_table_name = explain_format_table["table_name"]
115 |             index = query_split_list.index(explain_format_table_name)
116 |             if query_split_list[index - 1].lower() == "as":
117 |                 explain_format_table_name = query_split_list[index - 2]
118 |             else:
119 |                 explain_format_table_name = query_split_list[index - 1]
120 | 
121 |             for index, table_name in enumerate(self.tables):
122 |                 if explain_format_table_name == table_name:
123 |                     result[index + len(query_types)] = float(explain_format_table["cost_info"]["prefix_cost"])
124 |                     continue
125 |         self.query_encoding_map[str(query)] = result
126 |         return result
127 | 
128 |     def predict_sql_resource(self, workload=[]):
129 |         # Predict sql convert
130 |         # inner_metric_change   np.array
131 |         if self.predict_sql_resource_value is None:
132 |             print("predict_sql_resource_value is None")
133 |             exit()
134 |         return self.predict_sql_resource_value
135 |         # return self.estimator.predict(self.get_workload_encoding(
136 |         #     workload))  # input : np.array([[...]])      (sq_type, num_events, C, aggregation, in-mem)
137 |         # # output : np.array([[...]])
138 | 
139 |     def update(self):
140 |         pass
141 | 
142 |     def mysql_conn(self):
143 |         conn = pymysql.connect(
144 |             host=self.argus["host"],
145 |             user=self.argus["user"],
146 |             passwd=self.argus["password"],
147 |             port=int(self.argus["port"]),
148 |             connect_timeout=30,
149 |             charset='utf8')
150 |         conn.select_db(self.argus["database"])
151 |         return conn
152 | 
153 |     def close_mysql_conn(self):
154 |         try:
155 |             self.conn.close()
156 |         except Exception as error:
157 |             print("close mysqlconn: " + str(error))
158 | 
159 |     def mysql_query(self, sql):
160 |         try:
161 |             cursor = self.conn.cursor()
162 |             count = cursor.execute(sql)
163 |             if count == 0:
164 |                 result = 0
165 |             else:
166 |                 result = cursor.fetchall()
167 |             cursor.close()
168 |             return result
169 |         except Exception as error:
170 |             print("mysql execute: " + str(error))
171 |             return None
172 | 
173 |     def get_database_tables(self):
174 |         # get all tables
175 |         tables_fetchall = self.mysql_query(
176 |             "select table_name from information_schema.tables where table_schema='{}';".format(self.argus["database"]))
177 |         tables = []
178 |         if not tables_fetchall:
179 |             print("tables was not found")
180 |             return
181 |         for table in tables_fetchall:
182 |             if table and table[0]:
183 |                 tables.append(table[0])
184 |         print("get all tables success")
185 |         return tables
186 | 
187 |     def get_explain_format_tables_list(self, values_list, json, key):
188 |         if isinstance(json, dict):
189 |             for item, values in json.items():
190 |                 if str(item) == str(key):
191 |                     values_list.append(json.get(item))
192 |                 if isinstance(values, dict):
193 |                     self.get_explain_format_tables_list(values_list, values, key=key)
194 |                 if isinstance(values, list):
195 |                     self.get_explain_format_tables_list(values_list, values, key=key)
196 |                 else:
197 |                     pass
198 |         elif isinstance(json, list):
199 |             for data in json:
200 |                 if isinstance(data, dict):
201 |                     self.get_explain_format_tables_list(values_list, data, key)
202 |         else:
203 |             return values_list
204 |         return values_list
205 | 
206 |     def get_workload_encoding(self, workload):
207 |         queries_encoding = []
208 |         for query in workload:
209 |             queries_encoding.append(self.query_encoding(query))
210 | 
211 |         # [0, 0, 0, 0, X, X, X..........]
212 |         workload_encoding = np.array([0 for i in range(len(self.tables) + len(query_types))])
213 |         for query_encoding in queries_encoding:
214 |             workload_encoding = workload_encoding + np.array(query_encoding)
215 | 
216 |         for i in range(len(query_types)):
217 |             if workload_encoding[i] > 0:
218 |                 workload_encoding[i] = 1
219 | 
220 |         return workload_encoding.reshape(1, len(workload_encoding))
221 | 


--------------------------------------------------------------------------------
/training-data/trainData_sql.txt:
--------------------------------------------------------------------------------
1 | 0 10 10000 0 0 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0
2 | 


--------------------------------------------------------------------------------
/workload_file_example.txt:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
2 | 
3 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
4 | 
5 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword  like '%sequel%' AND mi_idx.info  > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id;
6 | 
7 | 


--------------------------------------------------------------------------------
/配置文档.md:
--------------------------------------------------------------------------------
 1 | ### 功能
 2 | 
 3 | [✓] 基于负载+状态特征自动调参（environment.py）
 4 | 
 5 | [✓] 解决硬编码问题（database、benchmark、knob类别|范围|类型等、系统状态指标、prediction model、模型尺寸等）
 6 | 
 7 | [✓] 提供预训练数据和模型（model.py）
 8 | 
 9 | [✓] 添加OLAP/OLTP测试集（JOB、Sysbench、TPC-H）
10 | 
11 | [✓] 绘制训练曲线（draw.py），包括随机探索、模型预测的表现（latency/throughput）
12 | 
13 | [✓] 停机问题
14 |    
15 |       设置config.ini文件中的stopping_throughput_improvement_percentage参数。
16 | 
17 | [todo] 同时支持重启/非重启参数
18 | 
19 | [todo] 支持基于模板的在线参数调优；端到端训练调参模型和负载编码模型
20 | 
21 | [todo] 集合现有规则预调参
22 | 
23 | 
24 | ### 搭建步骤
25 | 
26 | 1. 安装v 5.7 MySQL数据库
27 | 
28 |    * 给mysql的root用户开启远程访问权限参考：[mysql给root开启远程访问权限](https://www.cnblogs.com/goxcheer/p/8797377.html)
29 | 
30 |    * add a new line `sql_mode=NO_ENGINE_SUBSTITUTION` to `my.cnf` or `my.ini`, and restart mysqld
31 | 
32 | 2. 在数据库的服务器上，上传标准测试集的代码，并进行安装：
33 |    * Sysbench：https://blog.csdn.net/cxin917/article/details/81557453（数据库服务器端安装）
34 |      ```
35 |       sysbench oltp_read_only --db-driver=mysql --tables=4 --table-size=4000000 --threads=141 --events=0 --mysql-host=172.27.58.68 --mysql-user=root --mysql-password=dbmind2020 --mysql-port=3306 --mysql-db='sysbench'  --time=150  --range-size=10  --mysql-storage-engine=innodb --rand-type=uniform --report-interval=10
36 |      ```
37 |    * JOB：https://blog.csdn.net/cxin917/article/details/81557453 (imdb数据集如果下载过慢，可以通过云盘提供)
38 |    * 开启mysqllog，获取workload。
39 |    * 关闭mysqllog
40 | 3. 将调参代码放在能够连接数据库的一台服务器上，配置python3.6依赖环境。安装依赖包
41 |    ```
42 |    pip3 install -r requirements.txt
43 |    ```
44 | 4. 修改参数:
45 |    将config_example.ini复制，新文件名为config.ini
46 |    `
47 |       cp config_example.ini config.ini
48 |    `
49 |    在config.ini修改各项参数
50 |    workload_file_path将从指定的文件中获取，文件内容可参考workload_file_example.txt文件。
51 | 
52 | 5. 运行代码：
53 | 
54 |    ```
55 |    python3 main.py
56 |    ```
57 | 
58 | 6. 手动绘制training.png，用于观察训练效果：
59 |    
60 |    ```
61 |    python3 draw.py res_predict-1623852012,res_random-1623852012 latency
62 |    ```
63 | 
64 | 
65 | ### 报错解决
66 | 
67 | **Q:** “mysql cannot connect from remote host”
68 | 
69 | **A:** https://devanswers.co/cant-connect-mysql-server-remotely/#:~:text=You%20may%20need%20to%20comment,the%20MySQL%20config%20file%20mysqld.&text=The%20above%20line%20is%20telling,remote%20connections%20to%20that%20IP.
70 | 
71 | **Q:** JOB queries take very long time to run
72 | 
73 | **A:** 删除所有外键并创建索引（修改fkindexes.sql）；将脚本改成并行执行；选择部分sql执行
74 | 
75 | **Q:** “ImportError: attempted relative import with no known parent package”
76 | 
77 | **A:** **https://stackoverflow.com/questions/16981921/relative-imports-in-python-3
78 | 
79 | **Q:** pd.read_csv: “ValueError: cannot convert float NaN to integer“
80 | 
81 | **A:** 1. , sep = "\t"; 2. encoding style (utf-8)
82 | 


--------------------------------------------------------------------------------