├── workflow.pdf ├── log.py ├── speed.py ├── memory.py ├── README.md ├── cluster.py ├── fifo_env.py ├── srtf_env.py ├── tb_log.py ├── drf_env.py ├── tetris_env.py ├── optimus_env.py ├── exp_drf_env.py ├── validate.py ├── job.py ├── comparison.py ├── params_template.py ├── prioritized_memory.py ├── scheduler_base.py ├── parameters.py ├── trace.py ├── network.py ├── experiment.py ├── rl_env.py ├── train_a3c.py └── train.py /workflow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pengyanghua/DL2/HEAD/workflow.pdf -------------------------------------------------------------------------------- /log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # create logger 4 | def getLogger(name='logger', level='INFO', mode='w', fh=True, ch=True, prefix=""): 5 | 6 | logger = logging.getLogger(name) 7 | 8 | fh = logging.FileHandler(name + '.log', mode) 9 | 10 | ch = logging.StreamHandler() 11 | 12 | if level == "INFO": 13 | logger.setLevel(logging.INFO) 14 | fh.setLevel(logging.INFO) 15 | ch.setLevel(logging.INFO) 16 | elif level == "DEBUG": 17 | logger.setLevel(logging.DEBUG) 18 | fh.setLevel(logging.DEBUG) 19 | ch.setLevel(logging.DEBUG) 20 | elif level == "ERROR": 21 | logger.setLevel(logging.ERROR) 22 | fh.setLevel(logging.ERROR) 23 | ch.setLevel(logging.ERROR) 24 | 25 | #formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') 26 | formatter = logging.Formatter(prefix+' '+'%(filename)s:%(lineno)s %(levelname)s: %(message)s') 27 | fh.setFormatter(formatter) 28 | ch.setFormatter(formatter) 29 | 30 | if fh: 31 | logger.addHandler(fh) 32 | if ch: 33 | logger.addHandler(ch) 34 | 35 | return logger 36 | -------------------------------------------------------------------------------- /speed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ast 3 | import scipy.interpolate 4 | import matplotlib.pyplot as plt 5 | from matplotlib import cm 6 | from mpl_toolkits.mplot3d import Axes3D 7 | from matplotlib.ticker import MaxNLocator 8 | 9 | 10 | def fit(): 11 | # fit a speed function for each model 12 | speed_funcs = dict() 13 | records = [] 14 | with open("config_speed.txt", "r") as f: 15 | for line in f: 16 | records.append(ast.literal_eval(line.replace('\n',''))) 17 | speed_maps = dict() 18 | for record in records: 19 | model, sync_mode, tot_batch_size, num_ps, num_worker, speeds, ps_cpu_usages, worker_cpu_usages = record 20 | if model not in speed_maps: 21 | speed_maps[model] = [] 22 | speed_maps[model].append((num_ps, num_worker, sum(speeds))) 23 | for model in speed_maps.keys(): 24 | x = []; y = []; z = [] 25 | for _num_ps, _num_worker, _speed in speed_maps[model]: 26 | x.append(_num_ps) 27 | y.append(_num_worker) 28 | z.append(_speed) 29 | interp = scipy.interpolate.Rbf(np.array(x), np.array(y), np.array(z), function='linear') 30 | speed_funcs[model] = interp 31 | return speed_funcs 32 | 33 | speed_funcs = fit() 34 | 35 | -------------------------------------------------------------------------------- /memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import collections 3 | 4 | ''' 5 | implement prioritized experience replay later 6 | sample transitions with probability relative to the last encountered absolute TD error 7 | ''' 8 | 9 | class Memory: 10 | def __init__(self, maxlen): 11 | self.maxlen = maxlen 12 | self.memory = collections.deque(maxlen=maxlen) 13 | self.Transition = collections.namedtuple('Transition', ('state', 'output', 'action', 'reward')) 14 | 15 | def store(self, state, output, action, reward): 16 | self.memory.append(self.Transition(state, output, action, reward)) 17 | 18 | def sample(self, batch_size): 19 | indexes = np.random.choice(self.maxlen, size=batch_size, replace=False) 20 | experience_batch = [self.memory[index] for index in indexes] 21 | IS_weights = [1 for _ in range(batch_size)] 22 | return (indexes, experience_batch, IS_weights) 23 | 24 | def full(self): 25 | if len(self.memory) == self.maxlen: 26 | return True 27 | else: 28 | return False 29 | 30 | def avg_reward(self): 31 | assert len(self.memory) > 0 32 | sum_reward = 0 33 | for i in range(len(self.memory)): 34 | transition = self.memory[i] 35 | sum_reward += transition.reward 36 | return sum_reward/len(self.memory) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DL2 2 | DL2 is a deep learning-driven scheduler for elastic training in deep learning clusters. DL2 advocates a joint supervised learning and reinforcement learning approach: a neural network is warmed up via offline supervised learning based on job traces produced by the existing cluster scheduler; then the neural network is plugged into the live DL cluster, fine-tuned by reinforcement learning carried out throughout the training progress of the DL jobs, and used for deciding job resource allocation in an online fashion. 3 | 4 | Check [this figure](./workflow.pdf) for the overall workflow illustration. 5 | 6 | 7 | ## Prerequisites 8 | We use TensorFlow to train a model. Make sure you have have installed a 1.x version: 9 | 10 | ``` 11 | pip install tensorflow-gpu==1.13.1 12 | ``` 13 | 14 | ## Training 15 | To train model, run the following command. It will start multiple processes to train a centralized model. 16 | 17 | ``` 18 | python train.py 19 | ``` 20 | 21 | Check [parameters.py](./parameters.py) if you want to change some hyper-parameters. For ease of comparison, we also provide a script [experiment.py](./experiment.py) and you can choose different configurations. 22 | 23 | ## Trace 24 | We put some traces collected from our testbed in [config_speed.txt](./config_speed.txt). You may need to collect your own trace if running on a different setup. For k8s setup, please check [Optimus](https://github.com/pengyanghua/optimus). 25 | 26 | ## Elastic Scaling 27 | 28 | Please check the [MXNet repo](https://github.com/pengyanghua/mxnet) for the implementation of elastic resource scaling. We have modified the communication library including KVStore and pslite. 29 | 30 | 31 | ## Publication 32 | To Add. 33 | -------------------------------------------------------------------------------- /cluster.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import parameters as pm 3 | 4 | 5 | class Cluster: 6 | def __init__(self, logger): 7 | # 0 means available 8 | self.logger = logger 9 | 10 | self.CLUSTER_RESR_CAPS = np.array([pm.CLUSTER_NUM_NODES * pm.NUM_RESR_SLOTS for i in range(pm.NUM_RESR_TYPES)]) 11 | self.NODE_RESR_CAPS = np.array([pm.NUM_RESR_SLOTS for i in range(pm.NUM_RESR_TYPES)]) 12 | self.cluster_state = np.zeros(shape=(pm.NUM_RESR_TYPES, pm.CLUSTER_NUM_NODES*pm.NUM_RESR_SLOTS)) 13 | self.nodes_used_resrs = np.zeros(shape=(pm.CLUSTER_NUM_NODES, pm.NUM_RESR_TYPES)) 14 | 15 | 16 | def alloc(self, resr_reqs, node): 17 | # allocate resources for one task on a node 18 | if np.any(np.greater(self.nodes_used_resrs[node] + resr_reqs, self.NODE_RESR_CAPS)): # resource not enough 19 | return False,self.nodes_used_resrs[node] 20 | else: 21 | self.nodes_used_resrs[node] += resr_reqs 22 | for i in range(pm.NUM_RESR_TYPES): 23 | resr_req = resr_reqs[i] 24 | if resr_req > 0: 25 | start_index = node*pm.NUM_RESR_SLOTS 26 | for j in range(pm.NUM_RESR_SLOTS): 27 | if self.cluster_state[i, j+start_index] == 0: 28 | self.cluster_state[i, j+start_index] = 1 # instead of job.id/pm.TOT_NUM_JOBS 29 | resr_req -= 1 30 | if resr_req == 0: 31 | break 32 | return True,self.nodes_used_resrs[node] 33 | 34 | 35 | def get_cluster_state(self): 36 | return self.cluster_state.copy() 37 | 38 | def get_cluster_util(self): 39 | utils = [] 40 | for i in range(pm.NUM_RESR_TYPES): 41 | util = float(np.sum(self.nodes_used_resrs[:,i])) / self.CLUSTER_RESR_CAPS[i] 42 | utils.append(util) 43 | 44 | return utils 45 | 46 | def clear(self): 47 | self.cluster_state.fill(0) 48 | self.nodes_used_resrs.fill(0) 49 | 50 | -------------------------------------------------------------------------------- /fifo_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | class FIFO_Env(Scheduler): 8 | def _schedule(self): 9 | tic = time.time() 10 | 11 | fifo_queue = Queue.PriorityQueue() 12 | for job in self.uncompleted_jobs: 13 | fifo_queue.put((job.arrv_time, job)) # enqueue jobs into srtf queue 14 | 15 | flag = False 16 | while not fifo_queue.empty(): 17 | (_, job) = fifo_queue.get() 18 | # allocate maximal number of workers 19 | # bundle one ps and one worker together by default 20 | for i in range(pm.MAX_NUM_WORKERS): 21 | _, node = self.node_used_resr_queue.get() 22 | if pm.PS_WORKER: 23 | resr_reqs = job.resr_worker + job.resr_ps 24 | else: 25 | resr_reqs = job.resr_worker 26 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 27 | self.node_used_resr_queue.put((np.sum(node_used_resrs), node)) 28 | if succ: 29 | if pm.PS_WORKER and pm.BUNDLE_ACTION and False: 30 | self._state(job.id, "bundle") 31 | job.num_workers += 1 32 | job.curr_worker_placement.append(node) 33 | job.num_ps += 1 34 | job.curr_ps_placement.append(node) 35 | job.dom_share = np.max(1.0 * ( 36 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 37 | else: 38 | self._state(job.id, "worker") 39 | job.num_workers += 1 40 | job.curr_worker_placement.append(node) 41 | job.dom_share = np.max(1.0 * ( 42 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 43 | 44 | if pm.PS_WORKER: 45 | self._state(job.id, "ps") 46 | job.num_ps += 1 47 | job.curr_ps_placement.append(node) 48 | job.dom_share = np.max(1.0 * ( 49 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 50 | 51 | self.running_jobs.add(job) 52 | else: # fail to alloc resources, continue to try other job 53 | flag = True 54 | break 55 | if flag: 56 | break 57 | 58 | toc = time.time() 59 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 60 | for job in self.uncompleted_jobs: 61 | self.logger.debug(self.name + ":: scheduling results" +" num_worker: " + str(job.num_workers)) 62 | 63 | 64 | def test(): 65 | import log, trace 66 | logger = log.getLogger(name="test.log", level="INFO") 67 | job_trace = trace.Trace(logger).get_trace() 68 | env = FIFO_Env("FIFO", job_trace, logger) 69 | while not env.end: 70 | env.step() 71 | print env.get_results() 72 | 73 | 74 | 75 | 76 | 77 | if __name__ == '__main__': 78 | test() 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /srtf_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | class SRTF_Env(Scheduler): 8 | def _schedule(self): 9 | tic = time.time() 10 | srtf_queue = Queue.PriorityQueue() 11 | for job in self.uncompleted_jobs: 12 | srtf_queue.put((1-job.progress/job.num_epochs, job.arrv_time, job)) # enqueue jobs into srtf queue 13 | 14 | flag = False 15 | while not srtf_queue.empty(): 16 | (_, job_arrival, job) = srtf_queue.get() 17 | # allocate maximal number of workers 18 | # bundle one ps and one worker together by default 19 | for i in range(pm.MAX_NUM_WORKERS): 20 | _, node = self.node_used_resr_queue.get() 21 | if pm.PS_WORKER: 22 | resr_reqs = job.resr_worker + job.resr_ps 23 | else: 24 | resr_reqs = job.resr_worker 25 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 26 | self.node_used_resr_queue.put((np.sum(node_used_resrs), node)) 27 | if succ: 28 | if pm.PS_WORKER and pm.BUNDLE_ACTION and False: 29 | self._state(job.id, "bundle") 30 | job.num_workers += 1 31 | job.curr_worker_placement.append(node) 32 | job.num_ps += 1 33 | job.curr_ps_placement.append(node) 34 | job.dom_share = np.max(1.0 * ( 35 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 36 | else: 37 | self._state(job.id, "worker") 38 | job.num_workers += 1 39 | job.curr_worker_placement.append(node) 40 | job.dom_share = np.max(1.0 * ( 41 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 42 | 43 | if pm.PS_WORKER: 44 | self._state(job.id, "ps") 45 | job.num_ps += 1 46 | job.curr_ps_placement.append(node) 47 | job.dom_share = np.max(1.0 * ( 48 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 49 | 50 | self.running_jobs.add(job) 51 | else: # fail to alloc resources 52 | flag = True 53 | break 54 | if flag: 55 | break 56 | 57 | toc = time.time() 58 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 59 | for job in self.uncompleted_jobs: 60 | self.logger.debug(self.name + ":: scheduling results" +" num_worker: " + str(job.num_workers)) 61 | 62 | 63 | def test(): 64 | import log, trace 65 | logger = log.getLogger(name="test.log", level="INFO") 66 | job_trace = trace.Trace(logger).get_trace() 67 | env = SRTF_Env("SRTF", job_trace, logger) 68 | while not env.end: 69 | env.step() 70 | print env.get_results() 71 | 72 | 73 | 74 | 75 | 76 | if __name__ == '__main__': 77 | test() 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /tb_log.py: -------------------------------------------------------------------------------- 1 | """Simple example on how to log scalars and images to tensorboard without tensor ops. 2 | License: Copyleft 3 | """ 4 | __author__ = "Michael Gygli" 5 | 6 | import tensorflow as tf 7 | from StringIO import StringIO 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | 12 | class Logger(object): 13 | """Logging in tensorboard without tensorflow ops.""" 14 | 15 | def __init__(self, log_dir): 16 | """Creates a summary writer logging to log_dir.""" 17 | self.writer = tf.summary.FileWriter(log_dir) 18 | 19 | def add_graph(self, graph): 20 | self.writer.add_graph(graph) 21 | 22 | def add_scalar(self, tag, value, step): 23 | """Log a scalar variable. 24 | Parameter 25 | ---------- 26 | tag : basestring 27 | Name of the scalar 28 | value 29 | step : int 30 | training iteration 31 | """ 32 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, 33 | simple_value=value)]) 34 | self.writer.add_summary(summary, step) 35 | 36 | def add_text(self, tag, value, step): 37 | text_tensor = tf.make_tensor_proto(value, dtype=tf.string) 38 | meta = tf.SummaryMetadata() 39 | meta.plugin_data.plugin_name = "text" 40 | summary = tf.Summary() 41 | summary.value.add(tag=tag, metadata=meta, tensor=text_tensor) 42 | self.writer.add_summary(summary, step) 43 | 44 | def add_images(self, tag, images, step): 45 | """Logs a list of images.""" 46 | 47 | im_summaries = [] 48 | for nr, img in enumerate(images): 49 | # Write the image to a string 50 | s = StringIO() 51 | plt.imsave(s, img, format='png') 52 | 53 | # Create an Image object 54 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 55 | height=img.shape[0], 56 | width=img.shape[1]) 57 | # Create a Summary value 58 | im_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, nr), 59 | image=img_sum)) 60 | 61 | # Create and write Summary 62 | summary = tf.Summary(value=im_summaries) 63 | self.writer.add_summary(summary, step) 64 | 65 | def add_histogram(self, tag, value, step, bins=1000): 66 | """Logs the histogram of a list/vector of values.""" 67 | # Convert to a numpy array 68 | value = np.array(value) 69 | 70 | # Create histogram using numpy 71 | counts, bin_edges = np.histogram(value, bins=bins) 72 | 73 | # Fill fields of histogram proto 74 | hist = tf.HistogramProto() 75 | hist.min = float(np.min(value)) 76 | hist.max = float(np.max(value)) 77 | hist.num = int(np.prod(value.shape)) 78 | hist.sum = float(np.sum(value)) 79 | hist.sum_squares = float(np.sum(value ** 2)) 80 | 81 | # Requires equal number as bins, where the first goes from -DBL_MAX to bin_edges[1] 82 | # See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto#L30 83 | # Thus, we drop the start of the first bin 84 | bin_edges = bin_edges[1:] 85 | 86 | # Add bin edges and counts 87 | for edge in bin_edges: 88 | hist.bucket_limit.append(edge) 89 | for c in counts: 90 | hist.bucket.append(c) 91 | 92 | # Create and write Summary 93 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 94 | self.writer.add_summary(summary, step) 95 | 96 | def flush(self): 97 | self.writer.flush() -------------------------------------------------------------------------------- /drf_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | class DRF_Env(Scheduler): 8 | # overwrite the scheduling algorithm in Scheduler 9 | def _schedule(self): 10 | tic = time.time() 11 | drf_queue = Queue.PriorityQueue() 12 | for job in self.uncompleted_jobs: 13 | drf_queue.put((0, job.arrv_time, job)) # enqueue jobs into drf queue 14 | 15 | while not drf_queue.empty(): 16 | (_, job_arrival, job) = drf_queue.get() 17 | # bundle one ps and one worker together by default 18 | _, node = self.node_used_resr_queue.get() 19 | if pm.PS_WORKER: 20 | resr_reqs = job.resr_worker + job.resr_ps 21 | else: 22 | resr_reqs = job.resr_worker 23 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 24 | self.node_used_resr_queue.put((np.sum(node_used_resrs), node)) 25 | if succ: 26 | if pm.PS_WORKER and pm.BUNDLE_ACTION and False: 27 | self._state(job.id, "bundle") 28 | job.num_workers += 1 29 | job.curr_worker_placement.append(node) 30 | job.num_ps += 1 31 | job.curr_ps_placement.append(node) 32 | job.dom_share = np.max(1.0 * (job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 33 | else: 34 | self._state(job.id, "worker") 35 | job.num_workers += 1 36 | job.curr_worker_placement.append(node) 37 | job.dom_share = np.max(1.0 * (job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 38 | # print "worker", self.data[-1] 39 | 40 | if pm.PS_WORKER: 41 | self._state(job.id, "ps") 42 | job.num_ps += 1 43 | job.curr_ps_placement.append(node) 44 | job.dom_share = np.max(1.0 * (job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 45 | 46 | # print "ps", self.data[-1] 47 | # a = raw_input("next step?") 48 | self.running_jobs.add(job) 49 | if job.num_workers < pm.MAX_NUM_WORKERS and job.num_ps < pm.MAX_NUM_WORKERS: 50 | drf_queue.put((job.dom_share, job_arrival, job)) 51 | 52 | else: # fail to alloc resources, try other jobs 53 | # continue 54 | break 55 | 56 | toc = time.time() 57 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 58 | for job in self.uncompleted_jobs: 59 | self.logger.debug(self.name + ":: scheduling results" +" num_worker: " + str(job.num_workers) +" num_ps: " + str(job.num_ps)) 60 | 61 | 62 | def test(): 63 | import log, trace 64 | np.random.seed(9973) 65 | logger = log.getLogger(name="test.log", level="DEBUG") 66 | job_trace = trace.Trace(logger).get_trace() 67 | env = DRF_Env("DRF", job_trace, logger) 68 | while not env.end: 69 | env.step() 70 | #print env.observe() 71 | # print env.data 72 | # input() 73 | print env.get_results() 74 | print env.get_job_jcts() 75 | for i in range(len(env.trace)): 76 | if i in env.trace: 77 | for job in env.trace[i]: 78 | print i+1, job.id, job.type, job.model 79 | 80 | 81 | 82 | 83 | 84 | 85 | if __name__ == '__main__': 86 | test() 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /tetris_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | class Tetris_Env(Scheduler): 8 | 9 | def _schedule(self): 10 | tic = time.time() 11 | if len(self.uncompleted_jobs) > 0: 12 | node_used_resr_queue = Queue.PriorityQueue() 13 | for i in range(pm.CLUSTER_NUM_NODES): 14 | node_used_resr_queue.put((i, np.zeros(pm.NUM_RESR_TYPES))) # this queue is sorted based on node id instead of available resources 15 | 16 | while not node_used_resr_queue.empty(): 17 | node, used_resrs = node_used_resr_queue.get() 18 | # calculate score 19 | mean_resr_score = dict() 20 | mean_align_score = dict() 21 | for job in self.uncompleted_jobs: 22 | if pm.PS_WORKER: 23 | resr = job.resr_ps + job.resr_worker 24 | else: 25 | resr = job.resr_worker 26 | mean_resr_score[job] = np.sum(resr) * (1 - job.progress / job.num_epochs) 27 | mean_align_score[job] = np.sum((pm.NUM_RESR_SLOTS - used_resrs) * resr) 28 | weight = (float(sum(mean_align_score.values())) / len(mean_align_score)) / (float(sum(mean_resr_score.values())) / len(mean_resr_score)) 29 | if weight == 0: 30 | continue 31 | score_q = Queue.PriorityQueue() 32 | for job in self.uncompleted_jobs: 33 | score = mean_align_score[job] + weight * mean_resr_score[job] 34 | score_q.put((-score, job)) 35 | while not score_q.empty(): 36 | _, job = score_q.get() 37 | if job.num_workers >= pm.MAX_NUM_WORKERS: 38 | continue 39 | else: 40 | # alloc resr 41 | if pm.PS_WORKER: 42 | resr_reqs = job.resr_worker + job.resr_ps 43 | else: 44 | resr_reqs = job.resr_worker 45 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 46 | if succ: 47 | if pm.PS_WORKER and pm.BUNDLE_ACTION: 48 | self._state(job.id, "bundle") 49 | job.num_workers += 1 50 | job.curr_worker_placement.append(node) 51 | job.num_ps += 1 52 | job.curr_ps_placement.append(node) 53 | job.dom_share = np.max(1.0 * (job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 54 | else: 55 | self._state(job.id, "worker") 56 | job.num_workers += 1 57 | job.curr_worker_placement.append(node) 58 | job.dom_share = np.max(1.0 * ( 59 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 60 | 61 | if pm.PS_WORKER: 62 | self._state(job.id, "ps") 63 | job.num_ps += 1 64 | job.curr_ps_placement.append(node) 65 | job.dom_share = np.max(1.0 * ( 66 | job.num_workers * job.resr_worker + job.num_ps * job.resr_ps) / self.cluster.CLUSTER_RESR_CAPS) 67 | self.running_jobs.add(job) 68 | node_used_resr_queue.put((node, node_used_resrs)) # this code must be here instead of above 69 | break 70 | else: # fail to alloc resources 71 | # continue 72 | break 73 | 74 | toc = time.time() 75 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 76 | for job in self.uncompleted_jobs: 77 | self.logger.debug(self.name + ":: scheduling results" +" num_worker: " + str(job.num_workers)) 78 | 79 | 80 | def test(): 81 | import log, trace 82 | logger = log.getLogger(name="test.log", level="DEBUG") 83 | job_trace = trace.Trace(logger).get_trace() 84 | env = Tetris_Env("Tetris", job_trace, logger) 85 | while not env.end: 86 | env.step() 87 | print env.get_results() 88 | 89 | 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | test() 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /optimus_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | 8 | EST_ERROR = 0.0 # change to 0.05 with estimation error 9 | 10 | 11 | class Optimus_Env(Scheduler): 12 | # can negatively impact performance when 1. local minimum 2. EST_ERROR make the utility negative, 13 | # need to use curve fitting for correct implementation of optimus 14 | def est_util(self, job): 15 | if job.num_workers == 0: 16 | return (-np.iinfo(np.int32).max, "worker") 17 | if pm.PS_WORKER and job.num_ps == 0: 18 | return (-np.iinfo(np.int32).max, "ps") 19 | 20 | speed = job.step(False) * (1+EST_ERROR*np.random.choice([-1,1],1)) 21 | node_used_resrs, node = self.node_used_resr_queue.get() 22 | self.node_used_resr_queue.put((np.sum(node_used_resrs), node)) 23 | 24 | job.num_workers += 1 25 | job.curr_worker_placement.append(node) 26 | speed_2 = job.step(False) * (1+EST_ERROR*np.random.choice([-1,1],1)) 27 | worker_utility = (job.num_epochs - job.progress) / speed - (job.num_epochs - job.progress) / speed_2 28 | job.num_workers -= 1 29 | job.curr_worker_placement = job.curr_worker_placement[:-1] 30 | 31 | if pm.PS_WORKER: 32 | job.num_ps += 1 33 | job.curr_ps_placement.append(node) 34 | speed_3 = job.step(False) 35 | ps_utility = (job.num_epochs - job.progress) / speed - (job.num_epochs - job.progress) / speed_3 36 | job.num_ps -= 1 37 | job.curr_ps_placement = job.curr_ps_placement[:-1] 38 | if ps_utility >= worker_utility: 39 | return (-ps_utility, "ps") 40 | else: 41 | return (-worker_utility, "worker") 42 | else: 43 | return (-worker_utility, "worker") 44 | 45 | def _schedule(self): 46 | tic = time.time() 47 | opt_queue = Queue.PriorityQueue() # initialize all jobs' utility to be 0 48 | for job in self.uncompleted_jobs: 49 | util, role = self.est_util(job) 50 | opt_queue.put((util, job, role)) 51 | 52 | while not opt_queue.empty(): 53 | utility, job, role = opt_queue.get() 54 | if utility >= 0: 55 | break 56 | elif role == "worker" and job.num_workers >= pm.MAX_NUM_WORKERS: 57 | continue 58 | elif pm.PS_WORKER and role == "ps" and job.num_ps >= pm.MAX_NUM_WORKERS: 59 | continue 60 | else: 61 | if pm.PS_WORKER and role == "ps": 62 | resr_reqs = job.resr_ps 63 | else: 64 | resr_reqs = job.resr_worker 65 | _, node = self.node_used_resr_queue.get() 66 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 67 | self.node_used_resr_queue.put((np.sum(node_used_resrs), node)) 68 | if succ: 69 | if pm.PS_WORKER and role == "ps": 70 | self._state(job.id, "ps") 71 | job.num_ps += 1 72 | job.curr_ps_placement.append(node) 73 | else: 74 | self._state(job.id, "worker") 75 | job.num_workers += 1 76 | job.curr_worker_placement.append(node) 77 | self.running_jobs.add(job) 78 | util, role = self.est_util(job) 79 | opt_queue.put((util, job, role)) 80 | else: 81 | # continue 82 | break 83 | 84 | toc = time.time() 85 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 86 | for job in self.uncompleted_jobs: 87 | self.logger.debug(self.name + ":: scheduling results" + " type: " + str(job.type) + " num_worker: " + str(job.num_workers) +" num_ps: " + str(job.num_ps)) 88 | 89 | 90 | 91 | def test(): 92 | import log, trace 93 | logger = log.getLogger(name="test.log", level="DEBUG") 94 | job_trace = trace.Trace(logger).get_trace() 95 | env = Optimus_Env("Optimus", job_trace, logger) 96 | while not env.end: 97 | env.step() 98 | print env.get_results() 99 | 100 | 101 | 102 | 103 | 104 | if __name__ == '__main__': 105 | test() 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /exp_drf_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from scheduler_base import Scheduler 6 | 7 | class DRF_Env(Scheduler): 8 | # overwrite the scheduling algorithm in Scheduler 9 | def _schedule(self): 10 | # DRF 11 | tic = time.time() 12 | 13 | drf_queue = Queue.PriorityQueue() 14 | for job in self.uncompleted_jobs: 15 | # init num_ps and num_worker 16 | job.num_workers = 0 17 | job.num_ps = 0 18 | drf_queue.put((0, job.arrv_time, job)) # enqueue jobs into drf queue 19 | 20 | self.running_jobs = set() 21 | 22 | # keep track of available resources on each node. 23 | node_used_cpu_list = [0 for i in range(pm.CLUSTER_NUM_NODES)] 24 | node_used_mem_list = [0 for i in range(pm.CLUSTER_NUM_NODES)] 25 | node_used_gpu_list = [0 for i in range(pm.CLUSTER_NUM_NODES)] 26 | node_used_bw_list = [0 for i in range(pm.CLUSTER_NUM_NODES)] 27 | 28 | # cur_node_index = 0 29 | node_used_resr_queue = Queue.PriorityQueue() 30 | for i in range(pm.CLUSTER_NUM_NODES): 31 | node_used_resr_queue.put((0, i)) 32 | placements = dict() # job_id:placement_list 33 | 34 | while not drf_queue.empty(): 35 | (dom_share, job_arrival, job) = drf_queue.get() 36 | # bundle one ps and one worker together by default 37 | cpu_req = job.resr_worker[0] + job.resr_ps[0] 38 | mem_req = 0 # job.worker_mem + job.ps_mem 39 | bw_req = 0 # job.worker_bw + job.ps_bw 40 | gpu_req = job.resr_worker[1] + job.resr_ps[1] 41 | 42 | # check whether resources are sufficient 43 | print cpu_req, gpu_req 44 | # node_index = (cur_node_index + i) % len(params.NODE_LIST) # check all nodes 45 | _, node_index = node_used_resr_queue.get() 46 | suff_resr = True 47 | if node_used_cpu_list[node_index] + cpu_req > 8 or \ 48 | node_used_mem_list[node_index] + mem_req > 48 or \ 49 | node_used_bw_list[node_index] + bw_req > 10 or \ 50 | node_used_gpu_list[node_index] + gpu_req > 8: 51 | suff_resr = False 52 | print suff_resr 53 | if suff_resr: 54 | job.num_workers += 1 55 | job.num_ps += 1 56 | node_used_cpu_list[node_index] += cpu_req 57 | node_used_mem_list[node_index] += mem_req 58 | node_used_bw_list[node_index] += bw_req 59 | node_used_gpu_list[node_index] += gpu_req 60 | node_used_resr_queue.put((node_used_cpu_list[node_index] + node_used_gpu_list[node_index], node_index)) 61 | # placement 62 | if job.id in placements: 63 | placements[job.id].append(node_index) 64 | else: 65 | placements[job.id] = [node_index] 66 | job.curr_ps_placement.append(node_index) 67 | job.curr_worker_placement.append(node_index) 68 | # cur_node_index = (node_index + 1) % len(params.NODE_LIST) # update index if round-robin, otherwise adopt best fit packing 69 | 70 | # update dominant resource 71 | cpu_share = 1.0 * (job.num_workers * job.resr_worker[0] + job.num_ps * job.resr_ps[0]) / 48 72 | #mem_share = 1.0 * (job.num_worker * job.worker_mem + job.num_ps * job.ps_mem) / 288 73 | #bw_share = 1.0 * (job.num_worker * job.worker_bw + job.num_ps * job.ps_bw) / 60 74 | gpu_share = 1.0 * (job.num_workers * job.resr_worker[1]) / 48 75 | dom_share = max(cpu_share, gpu_share) 76 | if job.num_workers < 16 and job.num_ps < 16: 77 | drf_queue.put((dom_share, job_arrival, job)) 78 | 79 | if job not in self.running_jobs: 80 | self.running_jobs.add(job) 81 | else: 82 | self.cluster_used_cpu = sum(node_used_cpu_list) 83 | self.cluster_used_mem = sum(node_used_mem_list) 84 | self.cluster_used_bw = sum(node_used_bw_list) 85 | self.cluster_used_gpu = sum(node_used_gpu_list) 86 | break # no enough resource 87 | 88 | toc = time.time() 89 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 90 | 91 | toc = time.time() 92 | self.logger.debug(self.name + ":: " + "scheduling time: " + "%.3f" % (toc - tic) + " seconds.") 93 | for job in self.uncompleted_jobs: 94 | self.logger.debug(self.name + ":: scheduling results" + "job id: " + str(job.id) + " num_worker: " + str(job.num_workers) +" num_ps: " + str(job.num_ps)) 95 | a = raw_input() 96 | 97 | def test(): 98 | import log, trace 99 | np.random.seed(9973) 100 | logger = log.getLogger(name="test.log", level="DEBUG") 101 | job_trace = trace.Trace(logger).get_trace() 102 | env = DRF_Env("DRF", job_trace, logger) 103 | while not env.end: 104 | env.step() 105 | #print env.observe() 106 | # print env.data 107 | # input() 108 | print env.get_results() 109 | print env.get_job_jcts() 110 | for i in range(len(env.trace)): 111 | if i in env.trace: 112 | for job in env.trace[i]: 113 | print i+1, job.id, job.type, job.model 114 | 115 | 116 | 117 | 118 | 119 | 120 | if __name__ == '__main__': 121 | test() 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /validate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import parameters as pm 4 | import drf_env 5 | import fifo_env 6 | import tetris_env 7 | import srtf_env 8 | import optimus_env 9 | import rl_env 10 | 11 | 12 | def val_loss(net, val_traces, logger, global_step): 13 | avg_loss = 0 14 | step = 0 15 | data = [] 16 | for episode in range(len(val_traces)): 17 | job_trace = val_traces[episode] 18 | if pm.HEURISTIC == "DRF": 19 | env = drf_env.DRF_Env("DRF", job_trace, logger) 20 | elif pm.HEURISTIC == "FIFO": 21 | env = fifo_env.FIFO_Env("FIFO", job_trace, logger) 22 | elif pm.HEURISTIC == "SRTF": 23 | env = srtf_env.SRTF_Env("SRTF", job_trace, logger) 24 | elif pm.HEURISTIC == "Tetris": 25 | env = tetris_env.Tetris_Env("Tetris", job_trace, logger) 26 | elif pm.HEURISTIC == "Optimus": 27 | env = optimus_env.Optimus_Env("Optimus", job_trace, logger) 28 | 29 | ts = 0 30 | while not env.end: 31 | data += env.step() 32 | ts += 1 33 | if len(data) >= pm.MINI_BATCH_SIZE: 34 | # prepare a validation batch 35 | indexes = np.random.choice(len(data), size=pm.MINI_BATCH_SIZE, replace=False) 36 | inputs = [] 37 | labels = [] 38 | for index in indexes: 39 | input, label = data[index] 40 | inputs.append(input) 41 | labels.append(label) 42 | # superversed learning to calculate gradients 43 | output, loss = net.get_sl_loss(np.stack(inputs),np.vstack(labels)) 44 | avg_loss += loss 45 | # if step%50 == 0: 46 | # # # type, # of time slots in the system so far, normalized remaining epoch, dom resource 47 | # tb_logger.add_text(tag="sl:input+label+output:" + str(episode) + "_" + str(ts), value="input:" + \ 48 | # " type: "+ str(input[0]) + " stay_ts: " + str(input[1]) + " rt: " + str(input[2]) \ 49 | # + " resr:" + str(input[3]) + "\n" + 50 | # " label: " + str(label) + "\n" + " output: " + str(output[-1]), step=global_step) 51 | step += 1 52 | data = [] 53 | 54 | return avg_loss/step 55 | 56 | 57 | def val_jmr(net, val_traces, logger, global_step, tb_logger): 58 | avg_jct = [] 59 | avg_makespan = [] 60 | avg_reward = [] 61 | step = 0.0 62 | tic = time.time() 63 | stats = dict() 64 | stats["step"] = global_step 65 | stats["jcts"] = [] 66 | states_dict = dict() 67 | states_dict["step"] = global_step 68 | states_dict["states"] = [] 69 | for episode in range(len(val_traces)): 70 | job_trace = val_traces[episode] 71 | env = rl_env.RL_Env("RL", job_trace, logger, False) 72 | ts = 0 73 | while not env.end: 74 | input = env.observe() 75 | output = net.predict(np.reshape(input,(1, pm.STATE_DIM[0], pm.STATE_DIM[1]))) 76 | masked_output, action, reward, move_on, valid_state = env.step(output) 77 | if episode == 0 and move_on: # record the first trace 78 | states = env.get_sched_states() 79 | states_dict["states"].append(states) 80 | ''' 81 | job id: type: num_workers: 82 | ''' 83 | string = "ts: " + str(ts) + " " 84 | for id,type,num_workers,num_ps in states: 85 | if pm.PS_WORKER: 86 | string += "(id: "+str(id) + " type: " + str(type) + " num_workers: " + str(num_workers) + " num_ps: " + str(num_ps) + ") \n" 87 | else: 88 | string += "(id: " + str(id) + " type: " + str(type) + " num_workers: " + str(num_workers) + ") \n" 89 | tb_logger.add_text(tag="rl:resr_allocation:" + str(episode)+str(global_step), value=string, step=global_step) 90 | ts += 1 91 | 92 | if episode == 0: 93 | if step % 50 == 0: 94 | i = 0 95 | value = "input:" 96 | for (key, enabled) in pm.INPUTS_GATE: 97 | if enabled: 98 | # [("TYPE",True), ("STAY",False), ("PROGRESS",False), ("DOM_RESR",True), ("WORKERS",False)] 99 | if key == "TYPE": 100 | value += " type: " + str(input[i]) + "\n\n" 101 | elif key == "STAY": 102 | value += " stay_ts: " + str(input[i]) + "\n\n" 103 | elif key == "PROGRESS": 104 | value += " rt: " + str(input[i]) + "\n\n" 105 | elif key == "DOM_RESR": 106 | value += " resr: " + str(input[i]) + "\n\n" 107 | elif key == "WORKERS": 108 | value += " workers: " + str(input[i]) + "\n\n" 109 | elif key == "PS": 110 | value += " ps: " + str(input[i]) + "\n\n" 111 | i += 1 112 | value += " output: " + str(output) + "\n\n" + " masked_output: " + str(masked_output) + "\n\n" + " action: " + str(action) 113 | 114 | tb_logger.add_text(tag="rl:input+output+action:" + str(global_step) + "_" + str(episode) + "_" + str(ts) + "_" + str(step), 115 | value=value, step=global_step) 116 | step += 1 117 | num_jobs, jct, makespan, reward = env.get_results() 118 | stats["jcts"].append(env.get_job_jcts().values()) 119 | avg_jct.append(jct) 120 | avg_makespan.append(makespan) 121 | avg_reward.append(reward) 122 | elapsed_t = time.time() - tic 123 | logger.info("time for making one decision: " + str(elapsed_t / step) + " seconds") 124 | with open("DL2_JCTs.txt", 'a') as f: 125 | f.write(str(stats) + '\n') 126 | with open("DL2_states.txt", 'a') as f: 127 | f.write(str(states_dict)+"\n") 128 | 129 | return (1.0*sum(avg_jct)/len(avg_jct), 1.0*sum(avg_makespan)/len(avg_makespan), sum(avg_reward)/len(avg_reward)) -------------------------------------------------------------------------------- /job.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import parameters as pm 3 | import numpy as np 4 | 5 | class Job: 6 | def __init__(self, id, type, logger=None): 7 | self.id = id 8 | self.type = type 9 | self.logger = logger 10 | 11 | self.num_epochs = None 12 | self.real_num_epochs = None 13 | self.progress = 0.0 14 | 15 | self.arrv_time = None 16 | self.start_time = None # not tracked 17 | self.end_time = None 18 | 19 | self.num_workers = 0 20 | self.num_ps = 0 21 | self.resr_worker = None 22 | self.resr_ps = None 23 | 24 | self.model = None 25 | self.epoch_size = None 26 | self.local_comp_time = None 27 | self.model_size = None 28 | self.inter_bw = None 29 | self.intra_bw = None 30 | 31 | self.prev_worker_placement = None 32 | self.curr_worker_placement = None 33 | self.prev_ps_placement = None 34 | self.curr_ps_placement = None 35 | 36 | self.dom_share = 0 37 | self.speed_func = None 38 | self.training = True 39 | self.run_time_in_ts = 0 # only valid immediately after step() call 40 | 41 | 42 | def step(self, flag=True): 43 | assert self.progress < self.real_num_epochs 44 | assert self.num_workers == len(self.curr_worker_placement) 45 | try: 46 | if flag: 47 | assert self.num_workers <= pm.MAX_NUM_WORKERS and self.num_ps <= pm.MAX_NUM_WORKERS 48 | else: 49 | assert self.num_workers <= pm.MAX_NUM_WORKERS+1 and self.num_ps <= pm.MAX_NUM_WORKERS+1 50 | except AssertionError as e: 51 | print "num_workers:", self.num_workers, "num_ps:", self.num_ps # 13, 17 52 | raise 53 | if self.num_workers == 0: 54 | return 0. 55 | if pm.PS_WORKER and self.num_ps == 0: 56 | return 0. 57 | 58 | if pm.REAL_SPEED_TRACE or not self.training: # always use real trace for validation 59 | if pm.PS_WORKER: 60 | epoch = self.speed_func(self.num_ps, self.num_workers) * pm.TS_DURATION / self.epoch_size 61 | num_epoch_error = pm.TRAIN_SPEED_ERROR * (2 * np.random.rand() - 1) 62 | epoch = (1 + num_epoch_error) * epoch 63 | else: 64 | epoch = self.speed_func(self.num_workers, self.num_workers) * pm.TS_DURATION / self.epoch_size 65 | else: 66 | if pm.PS_WORKER: 67 | iter_times = [] # each worker's step time 68 | ps_on_node = dict() # number of ps on each cluster node 69 | worker_on_node = dict() # number of workers on each cluster node 70 | for node in self.curr_worker_placement: 71 | if node in worker_on_node: 72 | worker_on_node[node] += 1 73 | else: 74 | worker_on_node[node] = 1 75 | if node not in ps_on_node: 76 | ps_on_node[node] = 0 77 | for node in self.curr_ps_placement: 78 | if node in ps_on_node: 79 | ps_on_node[node] += 1 80 | else: 81 | ps_on_node[node] = 1 82 | if node not in worker_on_node: 83 | worker_on_node[node] = 0 84 | 85 | for node in self.curr_worker_placement: 86 | effective_intra_bw = self.intra_bw/max(ps_on_node[node], worker_on_node[node]) 87 | if len(self.curr_ps_placement) == ps_on_node[node]: # all ps in this worker node 88 | worker_side_inter_bw = ps_side_inter_bw = self.inter_bw 89 | else: 90 | worker_side_inter_bw = self.inter_bw/(len(self.curr_ps_placement) - ps_on_node[node]) 91 | num_worker_list = [] 92 | for n in ps_on_node: 93 | if ps_on_node[n] > 0: 94 | num_worker_list.append(worker_on_node[n]) 95 | ps_side_inter_bw = self.inter_bw/(len(self.curr_worker_placement) - min(num_worker_list)) 96 | 97 | effective_inter_bw = min(worker_side_inter_bw, ps_side_inter_bw) 98 | inter_trans_time = 2.0 * (self.model_size / len(self.curr_ps_placement))/ effective_inter_bw 99 | intra_trans_time = 2.0 * (self.model_size / len(self.curr_ps_placement))/ effective_intra_bw 100 | iter_time = self.local_comp_time + max(inter_trans_time, intra_trans_time) # training time of one step at a worker 101 | iter_times.append(iter_time) 102 | epoch = self.num_workers * pm.TS_DURATION / max(iter_times) / self.epoch_size # each time slot is 20 minutes 103 | else: 104 | colocations = collections.Counter(self.curr_worker_placement) 105 | max_inter_trans_time = 2.0 * (1 - min(colocations.values())/len(self.curr_worker_placement)) * self.model_size / self.inter_bw 106 | intra_trans_time = 2.0 * min(colocations.values())/len(self.curr_worker_placement) * self.model_size / self.intra_bw 107 | iter_time = self.local_comp_time + max(max_inter_trans_time, intra_trans_time) 108 | # epoch = self.num_workers * pm.TS_DURATION / iter_time / self.epoch_size # training time of one step at a worker 109 | if self.num_workers <= 8: 110 | epoch = self.num_workers * pm.TS_DURATION / iter_time / self.epoch_size 111 | else: 112 | epoch = max((12-self.num_workers/2.0) * pm.TS_DURATION / iter_time / self.epoch_size, pm.TS_DURATION / iter_time / self.epoch_size) 113 | 114 | if flag: 115 | if self.progress + epoch > self.real_num_epochs: 116 | self.run_time_in_ts = (self.real_num_epochs - self.progress) / epoch 117 | epoch = self.real_num_epochs - self.progress 118 | self.progress = float(self.real_num_epochs) 119 | else: 120 | self.progress += epoch 121 | self.run_time_in_ts = 1 122 | return epoch 123 | 124 | def get_run_time_in_ts(self): 125 | return self.run_time_in_ts 126 | 127 | 128 | def reset(self): # reset all, used for validation where the trace should be kept same 129 | self.progress = 0.0 130 | self.end_time = None 131 | 132 | self.num_workers = 0 133 | self.num_ps = 0 134 | 135 | self.prev_worker_placement = None 136 | self.curr_worker_placement = None 137 | self.prev_ps_placement = None 138 | self.curr_ps_placement = None 139 | 140 | self.dom_share = 0 141 | 142 | def info(self): 143 | return "Job id: " + str(self.id) + " type: " + str(self.type) + " arrv time: " + str(self.arrv_time) \ 144 | + " progress: " + str(self.progress) + " total epochs: " + str(self.real_num_epochs) 145 | 146 | 147 | def main(): 148 | import numpy as np 149 | id = 1 150 | type = 1 151 | job = Job(id, type, None) # type start from 1 152 | job.arrv_time = 0 153 | job.epoch_size = 115 154 | job.model_size = 102.2 155 | job.local_comp_time = 0.449 156 | job.intra_bw = 306.5 157 | job.inter_bw = 91.875 158 | job.resr_ps = np.array([3, 0]) 159 | job.resr_worker = np.array([2, 4]) 160 | job.num_epochs = 120 161 | job.real_num_epochs = 118 162 | 163 | 164 | 165 | if __name__ == '__main__': 166 | main() -------------------------------------------------------------------------------- /comparison.py: -------------------------------------------------------------------------------- 1 | import parameters 2 | import multiprocessing 3 | import parameters as pm 4 | import os 5 | import log 6 | import trace 7 | import time 8 | import drf_env 9 | import srtf_env 10 | import fifo_env 11 | import tetris_env 12 | import optimus_env 13 | import copy_reg 14 | import types 15 | 16 | 17 | # register method instance as pickable objects 18 | def _pickle_method(m): 19 | if m.im_self is None: 20 | return getattr, (m.im_class, m.im_func.func_name) 21 | else: 22 | return getattr, (m.im_self, m.im_func.func_name) 23 | 24 | copy_reg.pickle(types.MethodType, _pickle_method) 25 | 26 | 27 | def drf(job_trace=None): 28 | if job_trace is None: 29 | job_trace = trace.Trace(None).get_trace() 30 | env = drf_env.DRF_Env("DRF", job_trace, None) 31 | while not env.end: 32 | env.step() 33 | return [env.get_results(), env.get_job_jcts().values()] 34 | 35 | 36 | def srtf(job_trace=None): 37 | if job_trace is None: 38 | job_trace = trace.Trace(None).get_trace() 39 | env = srtf_env.SRTF_Env("SRTF", job_trace, None) 40 | while not env.end: 41 | env.step() 42 | return [env.get_results(), env.get_job_jcts().values()] 43 | 44 | def fifo(job_trace=None): 45 | if job_trace is None: 46 | job_trace = trace.Trace(None).get_trace() 47 | env = fifo_env.FIFO_Env("FIFO", job_trace, None) 48 | while not env.end: 49 | env.step() 50 | return [env.get_results(), env.get_job_jcts().values()] 51 | 52 | def tetris(job_trace=None): 53 | if job_trace is None: 54 | job_trace = trace.Trace(None).get_trace() 55 | env = tetris_env.Tetris_Env("Tetris", job_trace, None) 56 | while not env.end: 57 | env.step() 58 | return [env.get_results(), env.get_job_jcts().values()] 59 | 60 | def optimus(job_trace=None): 61 | if job_trace is None: 62 | job_trace = trace.Trace(None).get_trace() 63 | env = optimus_env.Optimus_Env("Optimus", job_trace, None) 64 | while not env.end: 65 | env.step() 66 | return [env.get_results(), env.get_job_jcts().values()] 67 | 68 | 69 | 70 | def compare(traces, logger, debug="False"): 71 | if debug: 72 | drf(traces[0]) 73 | srtf(traces[0]) 74 | fifo(traces[0]) 75 | tetris(traces[0]) 76 | optimus(traces[0]) 77 | f = open("DRF_JCTs.txt", 'w') 78 | f.close() 79 | 80 | num_schedulers = 5 81 | thread_list = [[] for i in range(num_schedulers)] # a two dimension matrix 82 | tic = time.time() 83 | pool = multiprocessing.Pool(processes=40) 84 | for i in range(len(traces)): # one example takes about 10s 85 | thread_list[0].append(pool.apply_async(drf, args=(traces[i],))) 86 | thread_list[1].append(pool.apply_async(srtf, args=(traces[i],))) 87 | thread_list[2].append(pool.apply_async(fifo, args=(traces[i],))) 88 | thread_list[3].append(pool.apply_async(tetris, args=(traces[i],))) 89 | thread_list[4].append(pool.apply_async(optimus, args=(traces[i],))) 90 | pool.close() 91 | pool.join() 92 | 93 | jct_list = [[] for i in range(num_schedulers)] # a two dimension matrix 94 | makespan_list = [[] for i in range(num_schedulers)] 95 | reward_list = [[] for i in range(num_schedulers)] 96 | for i in range(num_schedulers): 97 | for j in range(len(thread_list[i])): 98 | result, jcts = thread_list[i][j].get() 99 | if i == 0: # DRF 100 | with open("DRF_JCTs.txt", 'a') as f: 101 | f.write(str(jcts)+'\n') 102 | num_jobs, jct, makespan, reward = result 103 | jct_list[i].append(jct) 104 | makespan_list[i].append(makespan) 105 | reward_list[i].append(reward) 106 | toc = time.time() 107 | 108 | logger.info("---------------------------------------------------------------") 109 | logger.info("progress: finish testing " + str(len(traces)) + " traces within " + str(int(toc - tic)) + " seconds") 110 | logger.info("Average JCT: DRF " + '%.3f' % (sum(jct_list[0]) / len(jct_list[0])) + " SRTF " + \ 111 | '%.3f' % (sum(jct_list[1]) / len(jct_list[1])) + " FIFO " + '%.3f' % (sum(jct_list[2]) / len(jct_list[2])) \ 112 | + " Tetris " + '%.3f' % (sum(jct_list[3]) / len(jct_list[3])) + " Optimus " + '%.3f' % (sum(jct_list[4]) / len(jct_list[4]))) 113 | logger.info("Average Makespan: DRF " + '%.3f' % (1.0 * sum(makespan_list[0]) / len(makespan_list[0])) + \ 114 | " SRTF " + '%.3f' % (1.0 * sum(makespan_list[1]) / len(makespan_list[1])) + \ 115 | " FIFO " + '%.3f' % (1.0 * sum(makespan_list[2]) / len(makespan_list[2])) + " Tetris " + '%.3f' % ( 116 | 1.0 * sum(makespan_list[3]) / len(makespan_list[3])) + " Optimus " + '%.3f' % (sum(makespan_list[4]) / len(makespan_list[4]))) 117 | logger.info("Average Reward: DRF " + '%.3f' % (1.0 * sum(reward_list[0]) / len(reward_list[0])) + \ 118 | " SRTF " + '%.3f' % (1.0 * sum(reward_list[1]) / len(reward_list[1])) + \ 119 | " FIFO " + '%.3f' % (1.0 * sum(reward_list[2]) / len(reward_list[2])) + " Tetris " + '%.3f' % ( 120 | 1.0 * sum(reward_list[3]) / len(reward_list[3])) + " Optimus " + '%.3f' % (sum(reward_list[4]) / len(reward_list[4]))) 121 | stats = [() for i in range(num_schedulers)] 122 | for i in range(num_schedulers): 123 | jct = 1.0*sum(jct_list[i]) / len(jct_list[i]) 124 | makespan = 1.0*sum(makespan_list[i]) / len(makespan_list[i]) 125 | reward = 1.0*sum(reward_list[i]) / len(reward_list[i]) 126 | stats[i] = (jct, makespan, reward) 127 | 128 | if pm.EXPERIMENT_NAME is not None: 129 | LOG_DIR = "./" + pm.EXPERIMENT_NAME + "/" 130 | os.system("rm -rf " + LOG_DIR) 131 | os.system("mkdir -p " + LOG_DIR + "; cp *.py *.txt " + LOG_DIR) 132 | f = open(LOG_DIR + "rl_validation.txt", 'a') 133 | tags_prefix = ["DRF", "SRTF", "FIFO", "Tetris", "Optimus"] 134 | assert len(tags_prefix) == len(stats) 135 | for i in range(len(stats)): 136 | if pm.HEURISTIC == tags_prefix[i]: 137 | jct, makespan, reward = stats[i] 138 | f.write(pm.HEURISTIC + " 0: " + str(jct) + " " + str(makespan) + " " + str(reward) + "\n") 139 | f.close() 140 | 141 | return stats 142 | 143 | 144 | 145 | def main(): 146 | logger = log.getLogger(name="comparison", level="INFO") 147 | num_traces = 10 148 | traces = [] 149 | for i in range(num_traces): 150 | job_trace = trace.Trace(None).get_trace() 151 | traces.append(job_trace) 152 | compare(traces, logger, False) 153 | 154 | if __name__ == '__main__': 155 | main() 156 | 157 | 158 | ''' 159 | comparison.py:74 INFO: Average JCT: DRF 5.900 SRTF 8.132 FIFO 8.203 Tetris 9.606 160 | comparison.py:78 INFO: Average Makespan: DRF 29.207 SRTF 36.991 FIFO 37.221 Tetris 36.204 161 | comparison.py:82 INFO: Average Reward: DRF 2.063 SRTF 1.633 FIFO 1.623 Tetris 1.668 162 | ''' -------------------------------------------------------------------------------- /params_template.py: -------------------------------------------------------------------------------- 1 | # experiment use 2 | EXPERIMENT_NAME = None 3 | 4 | # random seed 5 | RANDOMNESS = False 6 | np_seed = 9973 # seed for numpy 7 | tf_seed = 53 # seed for tf 8 | trace_seed = 103 # seed for trace, not used 9 | 10 | # configuration 11 | LOG_MODE = "INFO" 12 | NUM_AGENTS = 1 # at most 28 for tesla p100 and 40 for gtx 1080ti 13 | 14 | TRAINING_MODE = "RL" # or "RL" 15 | HEURISTIC = "DRF" # the heuristic algorithm used for supervised learning 16 | VALUE_NET = True # disable/enable critic network 17 | 18 | POLICY_NN_MODEL = "Models/policy_sl_ps_worker_100.ckpt" # path of the checkpointed model, or None 19 | VALUE_NN_MODEL = None # "Models/value_rl_ps_worker_1000.ckpt" # path of value network model 20 | SAVE_VALUE_MODEL = False 21 | SUMMARY_DIR = "TensorBoard/" # tensorboard logging dir 22 | MODEL_DIR = "Models/" # checkpoint dir 23 | MAX_NUM_CHECKPOINTS = 10 # max number of saved checkpoints 24 | CHECKPOINT_INTERVAL = 1000 25 | DISP_INTERVAL = 5 # display frequency 26 | VISUAL_GW_INTERVAL = 100 # tf log gradients/weights frequency 27 | NUM_RECORD_AGENTS = 2 # log details of 2 agents in tensorboard and ignore others for saved space 28 | SKIP_FIRST_VAL = False # if False, the central agent will test the initialized model at first before training 29 | SELECT_ACTION_MAX_PROB = False # whether to select the action with the highest probability or select based on distribution, default based on distribution 30 | MASK_PROB = 1. # whether to mask actions mapped None jobs, set it to be lower seems to be worse 31 | ASSIGN_BUNDLE = True # assign 1 ps and 1 worker for each in the beginning of each timeslot to avoid starvation 32 | 33 | # hyperparameters 34 | SL_LOSS_FUNCTION = "Cross_Entropy" # "Mean_Square", "Cross_Entropy", "Absolute_Difference" 35 | OPTIMIZER = "Adam" # RMSProp 36 | FIX_LEARNING_RATE = True # keep constant learning rate 37 | ADJUST_LR_STEPS = [5000] # halving learning rate once reaching a certain step, not functional 38 | LEARNING_RATE = 0.0001 39 | 40 | MINI_BATCH_SIZE = 256 41 | EPSILON_GREEDY = False # whether to enable epsilon greedy policy for exploration 42 | VARYING_EPSILON = True # different values of epsilon for agents 43 | EPSILON = 0.1 # not used 44 | ENTROPY_WEIGHT = 0.1 45 | ENTROPY_EPS = 1e-6 46 | MAX_ENTROPY_WEIGHT = 10.0 47 | ANNEALING_TEMPERATURE = 500.0 48 | FIX_ENTROPY_WEIGHT = True # if true, the entropy weight is ENTROPY_WEIGHT; else, it is calculated based on ANNEALING_TEMPERATURE and MAX_ENTROPY_WEIGHT 49 | 50 | RAND_RANGE = 100000 51 | TOT_NUM_STEPS = 6000 52 | TOT_TRAIN_EPOCHS = 2000 # number of training epochs 53 | VAL_INTERVAL = 50 # validation interval 54 | VAL_ON_MASTER = True # validation on agent uses CPU instead of GPU, and may cause use up all memory, do not know why, so far it must be set true 55 | 56 | REPLAY_MEMORY_SIZE = 8192 # or 65536 57 | RANDOM_FILL_MEMORY = False 58 | PRIORITY_REPLAY = True 59 | PRIORITY_MEMORY_SORT_REWARD = True # use reward as priority 60 | PRIORITY_MEMORY_EVICT_PRIORITY = False # remove samples from experience buffer based on priority instead of age 61 | PRIORITY_MEMORY_SORT_WEIGHTED_REWARD_GRADIENTS = False # not used 62 | 63 | LT_REWARD_IN_TS = False # use long term reward within a timeslot 64 | LT_REWARD_NUM_TS = 1 # not implemented 65 | DISCOUNT_FACTOR = 0.99 66 | TS_REWARD_PLUS_JOB_REWARD = False # another way to assign reward 67 | NUM_UNCOMPLETED_JOB_REWARD = False 68 | MEAN_REWARD_BASELINE = True # whether to use reward mean as baseline 69 | 70 | INJECT_SAMPLES = True # inject samples to experience buffer to get samples with high reward 71 | SAMPLE_INJECTION_PROB = 0.1 # probabilistically inject samples with high reward 72 | VARYING_SKIP_NUM_WORKERS = True 73 | MIN_ACTION_PROB_FOR_SKIP = 10**(-20) # 10**(-12) 74 | NUM_TS_PER_UPDATE = 1 # update once after passing x timeslot(s), default 1, i.e., update weights per timeslot 75 | VARYING_PS_WORKER_RATIO = True # explore different ratio of ps over worker 76 | STEP_TRAIN_CRITIC_NET = 0 # number of steps for pretraining critic network, default 0, not functional 77 | CHANGING_JOB_TYPES = False 78 | JOB_RESR_BALANCE = True 79 | FINE_GRAIN_JCT = True 80 | 81 | # cluster 82 | TESTBED = False 83 | LARGE_SCALE = False 84 | CLUSTER_NUM_NODES = 48 # should be at least 3 times of maximal number of uncompleted jobs at each ts, default 160 85 | NUM_RESR_TYPES = 2 # number of resource types, e.g., cpu,gpu 86 | NUM_RESR_SLOTS = 8 # number of available resource slots on each machine 87 | 88 | # dataset 89 | TRAIN_SPEED_ERROR = 0 90 | JOB_EPOCH_EST_ERROR = 0 91 | REAL_SPEED_TRACE = True # whether to use real traces collected from experiment testbed 92 | JOB_LEN_PATTERN = "Ali_Trace" # Ali_Trace, Normal 93 | JOB_ARRIVAL_PATTERN = "Ali_Trace" # Ali_Trace, Uniform, Google_Trace, Poisson 94 | FIX_JOB_LEN = True 95 | TRAIN_EPOCH_SIZE = 100 # number of traces for training dataset 96 | TOT_NUM_JOBS = 60 # number of jobs in one trace 97 | MAX_ARRVS_PER_TS = 3 # max number of jobs arrived in one time slot 98 | MAX_NUM_EPOCHS = 80000 # maximum duration of jobs, epochs. default 200 99 | MAX_NUM_WORKERS = 32 100 | TS_DURATION = 1200 101 | VAL_DATASET = 10 # number of traces for validation in each agent 102 | MAX_TS_LEN = 1000 # maximal timeslot length for one trace 103 | 104 | # neural network 105 | JOB_ORDER_SHUFFLE = False # whether to shuffle the order of the jobs in the scheduling window, can also be used for data augmentation 106 | JOB_SORT_PRIORITY = "Arrival" # or Arrival, Resource, Progress, sort job based on resource or arrival 107 | SCHED_WINDOW_SIZE = 20 # maximum allowed number of jobs for NN input 108 | PS_WORKER = True # whether consider ps and worker tasks separately or not 109 | INPUTS_GATE= [("TYPE",True), ("STAY",True), ("PROGRESS",True), ("DOM_RESR",True), ("WORKERS",True), ("PS", True)] 110 | BUNDLE_ACTION = True # add a 'bundle' action to each job, i.e., selecting a ps and a worker by one action 111 | TYPE_BINARY = False # 4 bits 112 | STATE_DIM = (3*TYPE_BINARY + sum([enable for (_,enable) in INPUTS_GATE]), SCHED_WINDOW_SIZE) # type, # of time slots in the system so far, normalized remaining epoch, dom resource, # of workers 113 | SKIP_TS = True # whether we skip the timeslot 114 | 115 | ACTION_DIM = 3 * SCHED_WINDOW_SIZE + 1 116 | INPUT_RESCALE = False # not implemented on heuristic algorithms yet 117 | JOB_CENTRAL_REPRESENTATION = False # treat each job as an input instead of treating each type of information of all jobs as an input 118 | ATTRIBUTE_CENTRAL_REPRESENTATION = False # treat each property of all jobs as an input, default fully connected to input 119 | ZERO_PADDING = True # how to represent None job as input 120 | FIRST_LAYER_TANH = False 121 | NN_SHORTCUT_CONN = False # connect the output of first layer to the NN layer before softmax output 122 | NUM_FCN_LAYERS = 1 # number of fully connected layers, must be > 0 123 | NUM_NEURONS_PER_FCN = STATE_DIM[0] * STATE_DIM[1] # default same number as input size 124 | BATCH_NORMALIZATION = True -------------------------------------------------------------------------------- /prioritized_memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import collections 3 | import parameters as pm 4 | 5 | 6 | class SumTree(object): 7 | """ 8 | This SumTree code is modified version and the original code is from: 9 | https://github.com/jaara/AI-blog/blob/master/SumTree.py 10 | 11 | Story the data with it priority in tree and data frameworks. 12 | """ 13 | 14 | def __init__(self, capacity): 15 | self.capacity = capacity # for all priority values 16 | self.tree = np.zeros(2 * capacity - 1) 17 | # [--------------Parent nodes-------------][-------leaves to recode priority-------] 18 | # size: capacity - 1 size: capacity 19 | self.data = np.zeros(capacity, dtype=object) # for all transitions 20 | # [--------------data frame-------------] 21 | # size: capacity 22 | # self.Transition = collections.namedtuple('Transition', ('state_outer', 'output_outer', 'action_outer', 'state_inner', 'output_inner', 'action_inner', 'reward')) 23 | self.full = False 24 | self.data_pointer = 0 25 | self.evict_pq = np.zeros(capacity) # a new problem is introduced by this policy 26 | 27 | self.counter = 0 28 | 29 | def add(self, p, data): 30 | self.evict_pq[self.data_pointer] = p 31 | 32 | tree_idx = self.data_pointer + self.capacity - 1 33 | self.data[self.data_pointer] = data # update data_frame 34 | self.update(tree_idx, p) # update tree_frame 35 | 36 | self.data_pointer += 1 37 | if self.data_pointer >= self.capacity: # replace when exceed the capacity 38 | self.data_pointer = 0 39 | self.full = True 40 | if pm.PRIORITY_MEMORY_EVICT_PRIORITY: 41 | if self.full: 42 | self.data_pointer = np.argmin(self.evict_pq) 43 | self.counter += 1 44 | # if self.counter % 1000 == 0: 45 | # print [_.reward for _ in self.data] 46 | # print self.evict_pq 47 | # print min(self.evict_pq), max(self.evict_pq) 48 | 49 | 50 | def update(self, tree_idx, p): 51 | self.evict_pq[tree_idx - self.capacity + 1] = p 52 | 53 | change = p - self.tree[tree_idx] 54 | self.tree[tree_idx] = p 55 | # then propagate the change through tree 56 | while tree_idx != 0: # this method is faster than the recursive loop in the reference code 57 | tree_idx = (tree_idx - 1) / 2 # result is forced to be int 58 | self.tree[tree_idx] += change 59 | 60 | 61 | def get_leaf(self, v): 62 | """ 63 | Tree structure and array storage: 64 | 65 | Tree index: 66 | 0 -> storing priority sum 67 | / \ 68 | 1 2 69 | / \ / \ 70 | 3 4 5 6 -> storing priority for transitions 71 | 72 | Array type for storing: 73 | [0,1,2,3,4,5,6] 74 | """ 75 | parent_idx = 0 76 | while True: # the while loop is faster than the method in the reference code 77 | cl_idx = 2 * parent_idx + 1 # this leaf's left and right kids 78 | cr_idx = cl_idx + 1 79 | if cl_idx >= len(self.tree): # reach bottom, end search 80 | leaf_idx = parent_idx 81 | break 82 | else: # downward search, always search for a higher priority node 83 | if v <= self.tree[cl_idx]: 84 | parent_idx = cl_idx 85 | else: 86 | v -= self.tree[cl_idx] 87 | parent_idx = cr_idx 88 | 89 | data_idx = leaf_idx - self.capacity + 1 90 | return leaf_idx, self.tree[leaf_idx], self.data[data_idx] 91 | 92 | @property 93 | def total_p(self): 94 | return self.tree[0] # the root 95 | 96 | def list_leaves(self): 97 | for parent_idx in range(0, self.capacity - 1): 98 | print "parent idx", parent_idx, "value", self.tree[parent_idx] 99 | 100 | for tree_idx in range(self.capacity - 1, 2 * self.capacity - 1): 101 | print "tree idx", tree_idx, "value", self.tree[tree_idx] 102 | 103 | 104 | class Memory(object): # stored as ( s, a, r, s_ ) in SumTree 105 | """ 106 | This SumTree code is modified version and the original code is from: 107 | https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py 108 | """ 109 | epsilon = 0.01 # small amount to avoid zero priority 110 | alpha = 0.9 # [0~1] convert the importance of TD error to priority 111 | beta = 0.1 # importance-sampling, from initial value increasing to 1 112 | beta_increment_per_sampling = 0.0001 113 | if pm.PRIORITY_MEMORY_SORT_REWARD: 114 | abs_err_upper = 7. # reward 115 | if pm.MEAN_REWARD_BASELINE: 116 | abs_err_upper = 4. 117 | else: 118 | abs_err_upper = 1. # clipped abs error 119 | 120 | def __init__(self, maxlen): 121 | self.tree = SumTree(maxlen) 122 | self.Transition = collections.namedtuple('Transition', ('state', 'output', 'action', 'reward')) 123 | self.sample_rewards = collections.deque(maxlen=maxlen) # smaller one like maxline/10 may lead to bias 124 | self.store_rewards = collections.deque(maxlen=maxlen) 125 | 126 | def store(self, state, output, action, reward): 127 | transition = self.Transition(state, output, action, reward) 128 | self.store_rewards.append(reward) 129 | 130 | if pm.PRIORITY_MEMORY_SORT_REWARD and pm.MEAN_REWARD_BASELINE: 131 | p = max(1, reward - sum(self.store_rewards)/len(self.store_rewards)) # p can not be assigned 0 due to ISWeights.append(np.power(prob/min_prob, -self.beta)) 132 | else: 133 | p = np.max(self.tree.tree[-self.tree.capacity:]) # 1.5 may be too large 134 | if p == 0: 135 | p = self.abs_err_upper/1.2 136 | self.tree.add(p, transition) # set the max p for new p 137 | 138 | def sample(self, n): 139 | b_idx = [] 140 | b_memory = [] 141 | ISWeights = [] 142 | pri_seg = self.tree.total_p / n # priority segment 143 | self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1 144 | 145 | min_prob = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total_p # for later calculate ISweight 146 | for i in range(n): 147 | a, b = pri_seg * i, pri_seg * (i + 1) 148 | # a, b = pri_seg * i, min(pri_seg * (i + 3), pri_seg*n) # introduce bias, do not consider any more 149 | v = np.random.uniform(a, b) 150 | idx, p, data = self.tree.get_leaf(v) 151 | prob = p / self.tree.total_p 152 | # ISWeights[i, 0] = np.power(prob/min_prob, -self.beta) 153 | ISWeights.append(np.power(prob/min_prob, -self.beta)) # higher prob, higher beta -> lower weights 154 | b_idx.append(idx) 155 | # b_idx[i] = idx 156 | # print i, "here" 157 | # print b_memory[i], type(data) 158 | # b_memory[i, :] = data 159 | b_memory.append(data) 160 | self.sample_rewards.append(data.reward) 161 | return b_idx, b_memory, ISWeights 162 | 163 | def update(self, tree_idx, abs_errors): 164 | abs_errors += self.epsilon # convert to abs and avoid 0 165 | clipped_errors = np.minimum(abs_errors, self.abs_err_upper) 166 | ps = np.power(clipped_errors, self.alpha) 167 | for ti, p in zip(tree_idx, ps): 168 | self.tree.update(ti, p) 169 | 170 | def avg_reward(self): # just for compatibility 171 | # assert len(self.sample_rewards) > 0 172 | # return sum(self.sample_rewards)/len(self.sample_rewards) 173 | assert len(self.store_rewards) > 0 174 | return sum(self.store_rewards) / len(self.store_rewards) 175 | 176 | def full(self): 177 | return self.tree.full -------------------------------------------------------------------------------- /scheduler_base.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import numpy as np 3 | import parameters as pm 4 | from cluster import Cluster 5 | import log 6 | 7 | class Scheduler(object): 8 | def __init__(self, name, trace, logger): 9 | self.name = name # e.g., 'DRF' 10 | self.trace = trace 11 | if logger is None: 12 | assert name 13 | self.logger = log.getLogger(name=name, fh=False) 14 | else: 15 | self.logger = logger 16 | 17 | self.cluster = Cluster(self.logger) 18 | self.curr_ts = 0 19 | self.end = False 20 | 21 | self.running_jobs = set() 22 | self.uncompleted_jobs = set() 23 | self.completed_jobs = set() 24 | 25 | self.data = None # all state action pairs in one ts 26 | self.rewards = [] 27 | 28 | def step(self): 29 | # step by one timeslot 30 | assert not self.end 31 | self._prepare() 32 | self._schedule() 33 | self._progress() 34 | if len(self.completed_jobs) == pm.TOT_NUM_JOBS: 35 | self.end = True 36 | self.curr_ts += 1 37 | return self.data 38 | 39 | def get_results(self): 40 | # get final results, including avg jct, makespan and avg reward 41 | jct_list = [(job.end_time - job.arrv_time + 1.0) for job in self.completed_jobs] 42 | makespan = max([job.end_time+1.0 for job in self.completed_jobs]) 43 | assert jct_list 44 | return (len(self.completed_jobs), 1.0*sum(jct_list)/len(jct_list), makespan, sum(self.rewards)/len(self.rewards)) 45 | 46 | def get_job_jcts(self): 47 | jcts = dict() 48 | for job in self.completed_jobs: 49 | jcts[job.id] = job.end_time - job.arrv_time + 1.0 50 | return jcts 51 | 52 | def _prepare(self): 53 | self.cluster.clear() 54 | self.data = [] 55 | self.running_jobs.clear() 56 | if self.curr_ts in self.trace: 57 | for job in self.trace[self.curr_ts]: 58 | job.reset() # must reset since it is trained for multiple epochs 59 | self.uncompleted_jobs.add(job) 60 | self.logger.debug(job.info()) 61 | for job in self.uncompleted_jobs: 62 | job.num_workers = 0 63 | job.curr_worker_placement = [] 64 | if pm.PS_WORKER: 65 | job.num_ps = 0 66 | job.curr_ps_placement = [] 67 | # sort based on used resources from smallest to largest for load balancing 68 | self.node_used_resr_queue = Queue.PriorityQueue() 69 | for i in range(pm.CLUSTER_NUM_NODES): 70 | self.node_used_resr_queue.put((0, i)) 71 | 72 | def _schedule(self): 73 | self.logger.info("This method is to be implemented on child class!") 74 | 75 | def _progress(self): 76 | reward = 0 77 | for job in self.running_jobs.copy(): 78 | epoch = job.step() 79 | reward += epoch / job.num_epochs 80 | if job.progress >= job.real_num_epochs: 81 | if pm.FINE_GRAIN_JCT: 82 | job.end_time = self.curr_ts - 1 + job.get_run_time_in_ts() 83 | else: 84 | job.end_time = self.curr_ts 85 | # self.running_jobs.remove(job) 86 | self.uncompleted_jobs.remove(job) 87 | self.completed_jobs.add(job) 88 | if pm.NUM_UNCOMPLETED_JOB_REWARD: 89 | reward = len(self.uncompleted_jobs) 90 | self.rewards.append(reward) 91 | 92 | def observe(self): 93 | ''' 94 | existing resource share of each job: 0-1 95 | job type 0-8 96 | job normalized progress 0-1 97 | num of backlogs: percentage of total number of jobs in the trace 98 | ''' 99 | # cluster_state = self.cluster.get_cluster_state() 100 | # for test, first use dominant resource share of each job as input state 101 | q = Queue.PriorityQueue() 102 | for job in self.uncompleted_jobs: 103 | if pm.PS_WORKER: 104 | if job.num_workers >= pm.MAX_NUM_WORKERS and job.num_ps >= pm.MAX_NUM_WORKERS: # and, not or 105 | continue 106 | else: 107 | if job.num_workers >= pm.MAX_NUM_WORKERS: # not schedule it any more 108 | continue 109 | if pm.JOB_SORT_PRIORITY == "Resource": 110 | q.put((job.dom_share, job.arrv_time, job)) 111 | elif pm.JOB_SORT_PRIORITY == "Arrival": 112 | q.put((job.arrv_time, job.arrv_time, job)) 113 | elif pm.JOB_SORT_PRIORITY == "Progress": 114 | q.put((1-job.progress/job.num_epochs, job.arrv_time, job)) 115 | 116 | if pm.ZERO_PADDING: 117 | state = np.zeros(shape=pm.STATE_DIM) # zero padding instead of -1 118 | else: 119 | state = -1*np.ones(shape=pm.STATE_DIM) 120 | self.window_jobs = [None for _ in range(pm.SCHED_WINDOW_SIZE)] 121 | 122 | shuffle = np.array([i for i in range(pm.SCHED_WINDOW_SIZE)]) # default keep order 123 | if pm.JOB_ORDER_SHUFFLE: 124 | shuffle = np.random.choice(pm.SCHED_WINDOW_SIZE, pm.SCHED_WINDOW_SIZE, replace=False) 125 | 126 | # resource share / job arrival / progress 127 | for order in shuffle: 128 | if not q.empty(): 129 | _, _, job = q.get() 130 | j = 0 131 | for (input,enable) in pm.INPUTS_GATE: # INPUTS_GATE=[("TYPE",True), ("STAY",False), ("PROGRESS",False), ("DOM_RESR",False), ("WORKERS",True)] 132 | if enable: 133 | if input == "TYPE": 134 | if not pm.INPUT_RESCALE: 135 | if not pm.TYPE_BINARY: 136 | state[j][order] = job.type 137 | else: 138 | bin_str = "{0:b}".format(job.type).zfill(4) 139 | for bin_ch in bin_str: 140 | state[j][order] = int(bin_ch) 141 | j += 1 142 | j -= 1 143 | else: 144 | state[j][order] = float(job.type)/8 145 | elif input == "STAY": 146 | if not pm.INPUT_RESCALE: 147 | state[j][order] = self.curr_ts - job.arrv_time 148 | else: 149 | state[j][order] = float(self.curr_ts - job.arrv_time) / 100 150 | elif input == "PROGRESS": 151 | state[j][order] = 1 - job.progress/job.num_epochs 152 | elif input == "DOM_RESR": 153 | state[j][order] = job.dom_share 154 | elif input == "WORKERS": 155 | if not pm.INPUT_RESCALE: 156 | state[j][order] = job.num_workers 157 | else: 158 | state[j][order] = float(job.num_workers)/pm.MAX_NUM_WORKERS 159 | elif input == "PS": 160 | if not pm.INPUT_RESCALE: 161 | state[j][order] = job.num_ps 162 | else: 163 | state[j][order] = float(job.num_ps) / pm.MAX_NUM_WORKERS 164 | else: 165 | raise RuntimeError 166 | j += 1 167 | self.window_jobs[order] = job 168 | 169 | # backlog = float(max(len(self.uncompleted_jobs) - pm.SCHED_WINDOW_SIZE, 0))/len(pm.TOT_NUM_JOBS) 170 | self.logger.debug("ts: " + str(self.curr_ts) \ 171 | + " backlog: " + str(max(len(self.uncompleted_jobs) - pm.SCHED_WINDOW_SIZE, 0)) \ 172 | + " completed jobs: " + str(len(self.completed_jobs)) \ 173 | + " uncompleted jobs: " + str(len(self.uncompleted_jobs))) 174 | return state 175 | 176 | def _state(self, label_job_id, role="worker"): # whether this action selection leads to worker increment or ps increment 177 | # cluster_state = self.cluster.get_cluster_state() 178 | input = self.observe() # NN input 179 | label = np.zeros(pm.ACTION_DIM) 180 | for i in range(pm.SCHED_WINDOW_SIZE): 181 | job = self.window_jobs[i] 182 | if job and job.id == label_job_id: 183 | if pm.PS_WORKER: 184 | if pm.BUNDLE_ACTION: 185 | if role == "worker": 186 | label[i * 3] = 1 187 | elif role == "ps": 188 | label[i * 3 + 1] = 1 189 | elif role == "bundle": 190 | label[i * 3 + 2] = 1 191 | else: 192 | if role == "worker": 193 | label[i * 2] = 1 194 | elif role == "ps": 195 | label[i * 2 + 1] = 1 196 | else: 197 | label[i] = 1 198 | self.data.append((input,label)) 199 | -------------------------------------------------------------------------------- /parameters.py: -------------------------------------------------------------------------------- 1 | # experiment use 2 | EXPERIMENT_NAME = None 3 | 4 | # random seed 5 | RANDOMNESS = False 6 | np_seed = 9973 # seed for numpy 7 | tf_seed = 53 # seed for tf 8 | trace_seed = 103 # seed for trace, not used 9 | 10 | # configuration 11 | LOG_MODE = "INFO" 12 | if LOG_MODE == "DEBUG": 13 | NUM_AGENTS = 1 14 | else: 15 | NUM_AGENTS = 1 # at most 28 for tesla p100 and 40 for gtx 1080ti 16 | 17 | TRAINING_MODE = "RL" # or "RL" 18 | if TRAINING_MODE == "SL": 19 | HEURISTIC = "DRF" # the heuristic algorithm used for supervised learning 20 | if TRAINING_MODE == "RL": 21 | VALUE_NET = True # disable/enable critic network 22 | else: 23 | VALUE_NET = False 24 | 25 | POLICY_NN_MODEL = None #"Models/policy_sl_ps_worker_100.ckpt" # path of the checkpointed model, or None 26 | VALUE_NN_MODEL = None # "Models/value_rl_ps_worker_1000.ckpt" # path of value network model 27 | SAVE_VALUE_MODEL = True 28 | if TRAINING_MODE == "SL" or VALUE_NN_MODEL is not None: 29 | SAVE_VALUE_MODEL = False 30 | if TRAINING_MODE == "SL": 31 | POLICY_NN_MODEL = None 32 | VALUE_NN_MODEL = None 33 | SUMMARY_DIR = "TensorBoard/" # tensorboard logging dir 34 | MODEL_DIR = "Models/" # checkpoint dir 35 | MAX_NUM_CHECKPOINTS = 10 # max number of saved checkpoints 36 | CHECKPOINT_INTERVAL = 10000 37 | if TRAINING_MODE == "SL": 38 | CHECKPOINT_INTERVAL = 100 39 | DISP_INTERVAL = 50 # display frequency 40 | VISUAL_GW_INTERVAL = 1000 # tf log gradients/weights frequency 41 | NUM_RECORD_AGENTS = 2 # log details of 2 agents in tensorboard and ignore others for saved space 42 | SKIP_FIRST_VAL = False # if False, the central agent will test the initialized model at first before training 43 | SELECT_ACTION_MAX_PROB = False # whether to select the action with the highest probability or select based on distribution, default based on distribution 44 | MASK_PROB = 1. # whether to mask actions mapped None jobs, set it to be lower seems to be worse 45 | ASSIGN_BUNDLE = True # assign 1 ps and 1 worker for each in the beginning of each timeslot to avoid starvation 46 | 47 | # hyperparameters 48 | SL_LOSS_FUNCTION = "Cross_Entropy" # "Mean_Square", "Cross_Entropy", "Absolute_Difference" 49 | OPTIMIZER = "Adam" # RMSProp 50 | FIX_LEARNING_RATE = True # keep constant learning rate 51 | ADJUST_LR_STEPS = [5000] # halving learning rate once reaching a certain step, not functional 52 | if TRAINING_MODE == "SL": 53 | LEARNING_RATE = 0.005 54 | else: 55 | LEARNING_RATE = 0.0001 56 | 57 | MINI_BATCH_SIZE = 256/NUM_AGENTS 58 | EPSILON_GREEDY = False # whether to enable epsilon greedy policy for exploration 59 | VARYING_EPSILON = True # different values of epsilon for agents 60 | EPSILON = 0.1 # not used 61 | ENTROPY_WEIGHT = 0.1 62 | ENTROPY_EPS = 1e-6 63 | MAX_ENTROPY_WEIGHT = 10.0 64 | ANNEALING_TEMPERATURE = 500.0 65 | FIX_ENTROPY_WEIGHT = True # if true, the entropy weight is ENTROPY_WEIGHT; else, it is calculated based on ANNEALING_TEMPERATURE and MAX_ENTROPY_WEIGHT 66 | if not FIX_ENTROPY_WEIGHT: 67 | assert not EPSILON_GREEDY # avoid using varying entropy and e-greedy together 68 | 69 | RAND_RANGE = 100000 70 | TOT_TRAIN_EPOCHS = 2000 # number of training epochs 71 | TOT_NUM_STEPS = 1000000 72 | if TRAINING_MODE == "SL": 73 | TOT_NUM_STEPS = 100000 74 | VAL_INTERVAL = 200 # validation interval 75 | if TRAINING_MODE == "SL": 76 | VAL_INTERVAL = 50 77 | VAL_ON_MASTER = True # validation on agent uses CPU instead of GPU, and may cause use up all memory, do not know why, so far it must be set true 78 | 79 | REPLAY_MEMORY_SIZE = 8192 # or 65536 80 | RANDOM_FILL_MEMORY = False 81 | PRIORITY_REPLAY = True 82 | PRIORITY_MEMORY_SORT_REWARD = True # use reward as priority 83 | PRIORITY_MEMORY_EVICT_PRIORITY = False # remove samples from experience buffer based on priority instead of age 84 | PRIORITY_MEMORY_SORT_WEIGHTED_REWARD_GRADIENTS = False # not used 85 | assert PRIORITY_MEMORY_SORT_REWARD + PRIORITY_MEMORY_SORT_WEIGHTED_REWARD_GRADIENTS <= 1 86 | if TRAINING_MODE == "SL": 87 | PRIORITY_REPLAY = False 88 | 89 | LT_REWARD_IN_TS = False # use long term reward within a timeslot 90 | LT_REWARD_NUM_TS = 1 # not implemented 91 | DISCOUNT_FACTOR = 0.99 92 | TS_REWARD_PLUS_JOB_REWARD = False # another way to assign reward 93 | NUM_UNCOMPLETED_JOB_REWARD = False # set the reward to be the number of uncompleted jobs 94 | assert LT_REWARD_IN_TS+TS_REWARD_PLUS_JOB_REWARD <= 1 95 | MEAN_REWARD_BASELINE = True # whether to use reward mean as baseline 96 | if MEAN_REWARD_BASELINE: 97 | assert PRIORITY_MEMORY_SORT_REWARD 98 | 99 | INJECT_SAMPLES = True # inject samples to experience buffer to get samples with high reward 100 | SAMPLE_INJECTION_PROB = 0.1 # probabilistically inject samples with high reward 101 | VARYING_SKIP_NUM_WORKERS = True 102 | MIN_ACTION_PROB_FOR_SKIP = 10**(-20) # 10**(-12) 103 | NUM_TS_PER_UPDATE = 1 # update once after passing x timeslot(s), default 1, i.e., update weights per timeslot 104 | VARYING_PS_WORKER_RATIO = True # explore different ratio of ps over worker 105 | STEP_TRAIN_CRITIC_NET = 0 # number of steps for pretraining critic network, default 0, not functional 106 | CHANGING_JOB_TYPES = False 107 | JOB_RESR_BALANCE = True 108 | FINE_GRAIN_JCT = True 109 | 110 | # cluster 111 | TESTBED = False 112 | LARGE_SCALE = True 113 | assert TESTBED+LARGE_SCALE < 2 114 | CLUSTER_NUM_NODES = 96 # should be at least 3 times of maximal number of uncompleted jobs at each ts, default 160 115 | if TESTBED: 116 | CLUSTER_NUM_NODES = 6 117 | elif LARGE_SCALE: 118 | CLUSTER_NUM_NODES = 500 119 | NUM_RESR_TYPES = 2 # number of resource types, e.g., cpu,gpu 120 | NUM_RESR_SLOTS = 8 # number of available resource slots on each machine 121 | 122 | # dataset 123 | JOB_EPOCH_EST_ERROR = 0 124 | TRAIN_SPEED_ERROR = 0 125 | REAL_SPEED_TRACE = True # whether to use real traces collected from experiment testbed 126 | FIX_JOB_LEN = True 127 | JOB_LEN_PATTERN = "Ali_Trace" # Ali_Trace, Normal 128 | JOB_ARRIVAL_PATTERN = "Ali_Trace" # Ali_Trace, Uniform, Google_Trace, Poisson 129 | TRAIN_EPOCH_SIZE = 100 # number of traces for training dataset 130 | TOT_NUM_JOBS = 60 # number of jobs in one trace 131 | MAX_ARRVS_PER_TS = 3 # max number of jobs arrived in one time slot 132 | MAX_NUM_EPOCHS = 30000 # maximum duration of jobs, epochs. default 200 133 | MAX_NUM_WORKERS = 16 134 | TS_DURATION = 1200.0 135 | if LARGE_SCALE: 136 | TOT_NUM_JOBS = 200 # number of jobs in one trace 137 | MAX_ARRVS_PER_TS = 10 # max number of jobs arrived in one time slot 138 | if TESTBED: 139 | TOT_NUM_JOBS = 10 140 | MAX_NUM_EPOCHS = 1000 141 | MAX_ARRVS_PER_TS = 5 142 | TS_DURATION = 300.0 143 | SCHED_WINDOW_SIZE = 4 144 | VAL_DATASET = 10 # number of traces for validation in each agent 145 | MAX_TS_LEN = 1000 # maximal timeslot length for one trace 146 | 147 | 148 | # neural network 149 | JOB_ORDER_SHUFFLE = False # whether to shuffle the order of the jobs in the scheduling window, can also be used for data augmentation 150 | if TRAINING_MODE == "SL": 151 | JOB_ORDER_SHUFFLE = True 152 | JOB_SORT_PRIORITY = "Arrival" # or Arrival, Resource, Progress, sort job based on resource or arrival 153 | SCHED_WINDOW_SIZE = 20 # maximum allowed number of jobs for NN input 154 | if LARGE_SCALE: 155 | SCHED_WINDOW_SIZE = 40 156 | PS_WORKER = True # whether consider ps and worker tasks separately or not 157 | INPUTS_GATE=[("TYPE",True), ("STAY",True), ("PROGRESS",True), ("DOM_RESR",True), ("WORKERS",True)] 158 | if PS_WORKER: 159 | INPUTS_GATE.append(("PS", True)) 160 | INJECT_SAMPLES = True 161 | BUNDLE_ACTION = True # add a 'bundle' action to each job, i.e., selecting a ps and a worker by one action 162 | TYPE_BINARY = False # 4 bits 163 | type_str, enable = INPUTS_GATE[0] 164 | if not enable: 165 | TYPE_BINARY = False 166 | STATE_DIM = (3*TYPE_BINARY + sum([enable for (_,enable) in INPUTS_GATE]), SCHED_WINDOW_SIZE) # type, # of time slots in the system so far, normalized remaining epoch, dom resource, # of workers 167 | SKIP_TS = True # whether we skip the timeslot 168 | if PS_WORKER: 169 | ACTION_DIM = 2 * SCHED_WINDOW_SIZE + SKIP_TS 170 | if BUNDLE_ACTION: 171 | ACTION_DIM = 3 * SCHED_WINDOW_SIZE + SKIP_TS 172 | else: 173 | ACTION_DIM = SCHED_WINDOW_SIZE + SKIP_TS 174 | 175 | INPUT_RESCALE = False # not implemented on heuristic algorithms yet 176 | JOB_CENTRAL_REPRESENTATION = False # treat each job as an input instead of treating each type of information of all jobs as an input 177 | ATTRIBUTE_CENTRAL_REPRESENTATION = False # treat each property of all jobs as an input, default fully connected to input 178 | ZERO_PADDING = True # how to represent None job as input 179 | FIRST_LAYER_TANH = False 180 | NN_SHORTCUT_CONN = False # connect the output of first layer to the NN layer before softmax output 181 | if NN_SHORTCUT_CONN: 182 | assert JOB_CENTRAL_REPRESENTATION # must enable JOB_CENTRAL_REPRESENTATION 183 | NUM_FCN_LAYERS = 2 # number of fully connected layers, must be > 0 184 | NUM_NEURONS_PER_FCN = STATE_DIM[0] * STATE_DIM[1] * 2 / 3 # default same number as input size 185 | BATCH_NORMALIZATION = True 186 | 187 | -------------------------------------------------------------------------------- /trace.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import parameters as pm 3 | from job import Job 4 | import speed 5 | import math 6 | 7 | class Trace: 8 | def __init__(self, logger=None): 9 | self.logger = logger 10 | 11 | # statistics collected from testbed measurement 12 | # self.speedtheta = [[2.8335, 3.9188, 0, 0.505], [1.8432, 3.8148, 0.0331, 0.3584], \ 13 | # [1.0203, 2.7765, 4.9247, 0.0016], [4.0933, 3.6790, 0, 0.6471], [1.188, 3.7519, 0.071, 0.028], [1.1826, 0.5554, 5.1176, 0.045]] 14 | self.num_types = 8 15 | self.models = ["resnet-50", "vgg-16", "resnext-110", "inception-bn", "seq2seq", "cnn-text-classification", "dssm", "wlm"] 16 | self.local_comp_times = [0.449, 0.535, 0.226, 0.815, 0.075, 0.585, 0.567, 0.154] # second 17 | self.model_sizes = [102.2, 553.4, 6.92, 42.1, 36.5, 24.0, 6.0, 19.2] # MB 18 | self.epoch_sizes = [115, 115, 390, 120, 780, 193, 349, 165] # number of samples per batch 19 | self.inter_bws = [91.875, 233.0, 59.5, 145.875, 120.125, 60.75, 92.125, 10.375] # MB/s 20 | self.intra_bws = [306.5, 427.75, 63.0, 1082.125, 181.125, 159.625, 65.625, 22.875] # MB/s 21 | 22 | # self.resr_workers = [[2, 4], [2, 4], [2, 4], [2, 4], [4, 0], [2, 4], [4, 0], [1, 4]] # cpu, gpu, 1 cpu = 1 slot, 1 gpu = 4 slots 23 | self.resr_workers = [[2, 4], [2, 4], [2, 4], [2, 4], [2, 4], [2, 0], [2, 0], [2, 4]] 24 | # self.resr_ps = [[3, 0], [4, 0], [3, 0], [3, 0], [1, 0], [3, 0], [1, 0], [1, 0]] 25 | self.resr_ps = [[2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0], [2, 0]] 26 | self.num_epochs = np.array([0.3, 0.96, 0.05, 0.54, 0.95, 0.46, 0.33, 0.23]) * pm.MAX_NUM_EPOCHS 27 | # self.num_epochs = np.array([0.39, 0.6, 0.05, 0.54, 0.99, 0.76, 0.93, 0.23]) * pm.MAX_NUM_EPOCHS 28 | 29 | self.speed_funcs = speed.speed_funcs 30 | 31 | # job arrival patterns 32 | self.arrv_pattern_1 = [1, 22, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 33 | 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1] 35 | self.arrv_pattern_2 = [2, 40, 2, 2, 2, 3, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 36 | 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1] 38 | self.arrv_pattern_3 = [2, 57, 3, 3, 3, 4, 3, 3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 2, 3, 4, 3, 2, 2, 2, 39 | 1, 2, 2, 2, 2, 2, 2, 3, 3, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 3, 3, 3, 3, 3, 40 | 2, 2, 2, 2, 3, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 3, 2] 41 | self.arrv_pattern_4 = [3, 74, 4, 4, 4, 6, 5, 4, 3, 3, 3, 3, 4, 4, 5, 3, 4, 4, 4, 4, 4, 2, 2, 4, 5, 4, 2, 3, 2, 42 | 2, 2, 3, 2, 3, 3, 3, 4, 4, 2, 3, 3, 2, 3, 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 4, 3, 4, 4, 4, 43 | 3, 3, 3, 3, 4, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 4, 3] 44 | self.arrv_pattern_5 = [4, 10, 5, 4, 5, 6, 5, 4, 3, 3, 4, 3, 4, 5, 5, 4, 5, 5, 5, 5, 4, 3, 3, 4, 6, 4, 3, 3, 3, 45 | 2, 3, 3, 3, 4, 3, 3, 4, 4, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 4, 4, 4, 4, 4, 4, 46 | 4, 4, 3, 4, 4, 3, 2, 4, 2, 3, 3, 4, 3, 3, 3, 4, 3] 47 | self.arrv_pattern_10 = [6, 134, 8, 7, 8, 10, 9, 7, 6, 5, 6, 5, 7, 8, 9, 6, 8, 8, 8, 8, 7, 5, 5, 7, 9, 7, 5, 5, 5, 48 | 3, 5, 6, 4, 6, 6, 6, 7, 7, 4, 5, 5, 3, 5, 5, 4, 6, 5, 6, 5, 3, 4, 4, 6, 7, 7, 7, 7, 7, 6, 49 | 6, 6, 6, 7, 6, 4, 6, 4, 5, 4, 6, 5, 6, 5, 7, 6] 50 | self.arrv_pattern_15 = [11, 224, 13, 12, 14, 18, 15, 12, 10, 9, 11, 9, 12, 14, 15, 10, 14, 14, 13, 14, 12, 8, 51 | 8, 12, 16, 12, 8, 9, 8, 6, 8, 10, 8, 11, 10, 10, 12, 12, 7, 9, 9, 6, 9, 9, 8, 10, 9, 52 | 10, 9, 6, 7, 7, 11, 12, 11, 13, 12, 12, 11, 11, 10, 11, 12, 10, 7, 11, 7, 8, 8, 11, 9, 53 | 10, 8, 12, 10] 54 | self.arrv_pattern_20 = [14, 288, 17, 16, 18, 23, 19, 16, 13, 11, 14, 11, 15, 18, 19, 13, 18, 18, 17, 18, 16, 11, 55 | 10, 15, 21, 15, 10, 11, 10, 8, 11, 12, 10, 14, 12, 13, 16, 16, 9, 12, 12, 7, 11, 12, 10, 56 | 13, 12, 13, 12, 8, 10, 9, 14, 15, 15, 16, 15, 16, 14, 14, 13, 14, 15, 13, 9, 14, 9, 11, 57 | 10, 14, 12, 13, 11, 15, 13] 58 | self.arrv_pattern_30 = [20, 403, 24, 23, 26, 32, 27, 22, 18, 16, 19, 16, 22, 26, 27, 19, 25, 26, 24, 25, 22, 15, 59 | 15, 21, 29, 21, 15, 16, 15, 11, 15, 18, 14, 20, 18, 19, 22, 23, 13, 17, 17, 11, 16, 16, 60 | 14, 18, 17, 18, 17, 11, 14, 13, 20, 21, 21, 23, 22, 23, 20, 20, 18, 20, 21, 19, 12, 20, 61 | 12, 16, 14, 20, 17, 18, 15, 21, 19] 62 | self.arrv_pattern_40 = [25, 504, 30, 29, 32, 40, 34, 28, 23, 20, 24, 20, 27, 32, 34, 24, 32, 33, 31, 32, 28, 19, 63 | 18, 27, 37, 27, 18, 20, 19, 14, 19, 22, 18, 25, 22, 23, 28, 28, 17, 22, 21, 13, 20, 21, 64 | 18, 23, 22, 23, 21, 14, 17, 17, 25, 27, 26, 29, 27, 28, 25, 25, 22, 26, 27, 23, 16, 26, 65 | 15, 20, 18, 25, 22, 23, 19, 27, 23] 66 | self.arrv_pattern_50 = [33, 672, 40, 38, 43, 54, 45, 38, 31, 27, 33, 27, 36, 43, 46, 32, 43, 44, 41, 42, 37, 26, 67 | 25, 36, 49, 36, 25, 27, 25, 19, 25, 30, 24, 34, 30, 31, 37, 38, 23, 29, 28, 18, 27, 28, 68 | 24, 30, 29, 30, 28, 18, 23, 22, 34, 36, 35, 39, 37, 38, 34, 34, 30, 34, 36, 31, 21, 34, 69 | 21, 26, 24, 34, 29, 30, 26, 36, 31] 70 | 71 | # ali trace, JCT 147 minutes on average 72 | self.ali_trace_arrv_pattern = [] 73 | 74 | def _get_pattern(self, max_arrvs_per_ts): 75 | if pm.JOB_ARRIVAL_PATTERN == "Uniform": 76 | return [max_arrvs_per_ts for _ in range(100)] 77 | elif pm.JOB_ARRIVAL_PATTERN == "Poisson": 78 | return np.random.poisson(max_arrvs_per_ts, 100) 79 | elif pm.JOB_ARRIVAL_PATTERN == "Google_Trace": 80 | if max_arrvs_per_ts == 1: 81 | return self.arrv_pattern_1 82 | elif max_arrvs_per_ts == 2: 83 | return self.arrv_pattern_2 84 | elif max_arrvs_per_ts == 3: 85 | return self.arrv_pattern_3 86 | elif max_arrvs_per_ts == 4: 87 | return self.arrv_pattern_4 88 | elif max_arrvs_per_ts == 5: 89 | return self.arrv_pattern_5 90 | elif max_arrvs_per_ts == 10: 91 | return self.arrv_pattern_10 92 | elif max_arrvs_per_ts == 15: 93 | return self.arrv_pattern_15 94 | elif max_arrvs_per_ts == 20: 95 | return self.arrv_pattern_20 96 | elif max_arrvs_per_ts == 30: 97 | return self.arrv_pattern_30 98 | elif max_arrvs_per_ts == 40: 99 | return self.arrv_pattern_40 100 | elif max_arrvs_per_ts == 50: 101 | return self.arrv_pattern_50 102 | else: 103 | self.logger.error("unrecognizable arrival pattern!") 104 | exit(-1) 105 | elif pm.JOB_ARRIVAL_PATTERN == "Ali_Trace": 106 | ratio = max(self.ali_trace_arrv_pattern)/float(max_arrvs_per_ts) 107 | trace = [] 108 | for arrv in self.ali_trace_arrv_pattern: 109 | trace.append(int(math.ceil(arrv/ratio))) 110 | return trace 111 | 112 | 113 | def _weibull_dist(self): 114 | # follow weibull distribution, according to paper revisiting size-based ... 115 | num_epochs = int(np.random.weibull(2) * pm.MAX_NUM_EPOCHS/3) 116 | if num_epochs == 0: 117 | num_epochs = 1 118 | elif num_epochs > pm.MAX_NUM_EPOCHS: 119 | num_epochs = pm.MAX_NUM_EPOCHS 120 | return num_epochs 121 | 122 | 123 | def get_trace(self, num_type=8): 124 | # google trace 125 | trace = dict() 126 | id = 1 127 | count_num_jobs = 0 128 | done = False 129 | arrv_pattern = self._get_pattern(pm.MAX_ARRVS_PER_TS) 130 | 131 | offset = np.random.randint(5) 132 | for ts in range(len(arrv_pattern)): 133 | num_jobs = min(pm.MAX_ARRVS_PER_TS, arrv_pattern[(ts+offset)%len(arrv_pattern)]) 134 | job_list = [] 135 | for j in range(num_jobs): 136 | assert num_type <= self.num_types 137 | if pm.JOB_LEN_PATTERN == "Normal": 138 | type = np.random.randint(0, num_type) 139 | elif pm.JOB_LEN_PATTERN == "Ali_Trace": 140 | prob_sum = np.sum(self.ali_trace_job_probs[:num_type]) 141 | cumsum = np.cumsum(self.ali_trace_job_probs[:num_type]) 142 | type = (cumsum > prob_sum*np.random.random()).argmax() 143 | index = type 144 | # type = self.importance_map[type] 145 | job = Job(id, type+1, self.logger) # type start from 1 146 | id += 1 147 | 148 | job.arrv_time = ts 149 | 150 | job.model = self.models[type] 151 | job.epoch_size = self.epoch_sizes[type] 152 | job.model_size = self.model_sizes[type] 153 | job.local_comp_time = self.local_comp_times[type] 154 | job.intra_bw = self.intra_bws[type] 155 | job.inter_bw = self.inter_bws[type] 156 | job.resr_ps = np.array(self.resr_ps[type]) 157 | job.resr_worker = np.array(self.resr_workers[type]) 158 | job.speed_func = self.speed_funcs[job.model] 159 | 160 | if pm.FIX_JOB_LEN: 161 | if pm.JOB_LEN_PATTERN == "Normal": 162 | job.num_epochs = int(self.num_epochs[type]) 163 | elif pm.JOB_LEN_PATTERN == "Ali_Trace": 164 | job.num_epochs = int(self.ali_trace_num_epochs[index]) 165 | else: 166 | if pm.JOB_LEN_PATTERN == "Normal": 167 | job.num_epochs = int(self.num_epochs[type])*np.random.randint(90,110)/100.0 # self._weibull_dist() 168 | else: 169 | job.num_epochs = int(self.ali_trace_num_epochs[type])*np.random.randint(90,110)/100.0 170 | 171 | num_epoch_error = pm.JOB_EPOCH_EST_ERROR*(2*np.random.rand()-1) 172 | job.real_num_epochs = (1+num_epoch_error)*job.num_epochs 173 | job_list.append(job) 174 | 175 | count_num_jobs += 1 176 | if count_num_jobs == pm.TOT_NUM_JOBS: 177 | done = True 178 | break 179 | trace[ts] = job_list 180 | if done: 181 | break 182 | assert count_num_jobs==pm.TOT_NUM_JOBS 183 | return trace 184 | 185 | 186 | if __name__ == '__main__': 187 | print "Generate job traces..." 188 | Trace().get_trace() 189 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tflearn 3 | import tensorflow as tf 4 | import parameters as pm 5 | 6 | 7 | class PolicyNetwork: 8 | def __init__(self, sess, scope, mode, logger): 9 | self.sess = sess 10 | self.state_dim = pm.STATE_DIM 11 | self.action_dim = pm.ACTION_DIM 12 | self.scope = scope 13 | self.mode = mode 14 | self.logger = logger 15 | 16 | self.input, self.output = self._create_nn() 17 | self.label = tf.placeholder(tf.float32, [None, self.action_dim]) 18 | self.action = tf.placeholder(tf.float32, [None, None]) 19 | self.advantage = tf.placeholder(tf.float32, [None, 1]) 20 | 21 | self.entropy = tf.reduce_mean(tf.multiply(self.output, tf.log(self.output + pm.ENTROPY_EPS))) 22 | self.entropy_weight = pm.ENTROPY_WEIGHT 23 | 24 | if self.mode == "SL": 25 | if pm.SL_LOSS_FUNCTION == "Mean_Square": 26 | self.loss = tf.reduce_mean(tflearn.mean_square (self.output, self.label)) 27 | elif pm.SL_LOSS_FUNCTION == "Cross_Entropy": 28 | self.loss = tf.reduce_mean(tflearn.categorical_crossentropy(self.output,self.label)) 29 | elif pm.SL_LOSS_FUNCTION == "Absolute_Difference": 30 | self.loss = tf.reduce_mean(tf.losses.absolute_difference(self.output, self.label)) 31 | elif self.mode == "RL": 32 | self.loss = tf.reduce_mean(tf.multiply(tf.log(tf.reduce_sum(tf.multiply(self.output, self.action), reduction_indices=1, keep_dims=True)), -self.advantage)) \ 33 | + self.entropy_weight * self.entropy 34 | #self.loss = tf.reduce_mean(tflearn.mean_square(self.output, self.label)) 35 | 36 | self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope) 37 | self.gradients = tf.gradients(self.loss, self.weights) 38 | 39 | self.lr = pm.LEARNING_RATE 40 | if pm.OPTIMIZER == "Adam": 41 | self.optimize = tf.train.AdamOptimizer(learning_rate=self.lr).apply_gradients(zip(self.gradients, self.weights)) 42 | elif pm.OPTIMIZER == "RMSProp": 43 | self.optimize = tf.train.RMSPropOptimizer(learning_rate=self.lr).apply_gradients(zip(self.gradients, self.weights)) 44 | 45 | self.weights_phs = [] 46 | for weight in self.weights: 47 | self.weights_phs.append(tf.placeholder(tf.float32, shape=weight.get_shape())) 48 | self.set_weights_op = [] 49 | for idx, weights_ph in enumerate(self.weights_phs): 50 | self.set_weights_op.append(self.weights[idx].assign(weights_ph)) 51 | 52 | self.loss_ring_buff = [0 for _ in range(20)] 53 | self.index_ring_buff = 0 54 | 55 | 56 | def _create_nn(self): 57 | with tf.variable_scope(self.scope): 58 | # type, arrival, progress, resource 59 | input = tflearn.input_data(shape=[None, self.state_dim[0], self.state_dim[1]], name="input") # row is info type, column is job 60 | 61 | if pm.JOB_CENTRAL_REPRESENTATION or pm.ATTRIBUTE_CENTRAL_REPRESENTATION: 62 | if pm.JOB_CENTRAL_REPRESENTATION: 63 | fc_list = [] 64 | for i in range(self.state_dim[1]): 65 | if pm.FIRST_LAYER_TANH: 66 | fc1 = tflearn.fully_connected(input[:, :, i], self.state_dim[0], activation="tanh", name="job_" + str(i)) 67 | else: 68 | fc1 = tflearn.fully_connected(input[:, :, i], self.state_dim[0], activation="relu", name="job_"+str(i)) 69 | if pm.BATCH_NORMALIZATION: 70 | fc1 = tflearn.batch_normalization(fc1, name="job_"+str(i)+"_bn") 71 | fc_list.append(fc1) 72 | else: 73 | j = 0 74 | fc_list = [] 75 | for (key, enable) in pm.INPUTS_GATE: # INPUTS_GATE=[("TYPE",True), ("STAY",False), ("PROGRESS",False), ("DOM_RESR",False), ("WORKERS",True)] 76 | if enable: 77 | if pm.FIRST_LAYER_TANH: 78 | fc1 = tflearn.fully_connected(input[:, j], pm.SCHED_WINDOW_SIZE, activation="tanh", name=key) 79 | else: 80 | fc1 = tflearn.fully_connected(input[:, j], pm.SCHED_WINDOW_SIZE, activation="relu", name=key) 81 | if pm.BATCH_NORMALIZATION: 82 | fc1 = tflearn.batch_normalization(fc1, name=key+"_bn") 83 | fc_list.append(fc1) 84 | j += 1 85 | if len(fc_list) == 1: 86 | merge_net = fc_list[0] 87 | if pm.BATCH_NORMALIZATION: 88 | merge_net = tflearn.batch_normalization(merge_net) 89 | else: 90 | merge_net = tflearn.merge(fc_list, 'concat', name="merge_net_1") 91 | if pm.BATCH_NORMALIZATION: 92 | merge_net = tflearn.batch_normalization(merge_net, name="merge_net_1_bn") 93 | dense_net_1 = tflearn.fully_connected(merge_net, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_1') 94 | else: 95 | dense_net_1 = tflearn.fully_connected(input, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_1') 96 | if pm.BATCH_NORMALIZATION: 97 | dense_net_1 = tflearn.batch_normalization(dense_net_1, name='dense_net_1_bn') 98 | 99 | for i in range(1, pm.NUM_FCN_LAYERS): 100 | dense_net_1 = tflearn.fully_connected(dense_net_1, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_' + str(i + 1)) 101 | if pm.BATCH_NORMALIZATION: 102 | dense_net_1 = tflearn.batch_normalization(dense_net_1, name='dense_net_' + str(i + 1) + 'bn') 103 | 104 | if pm.JOB_CENTRAL_REPRESENTATION and pm.NN_SHORTCUT_CONN: # add shortcut the last layer 105 | fc2_list = [] 106 | for fc in fc_list: 107 | merge_net_2 = tflearn.merge([fc, dense_net_1], 'concat') 108 | if pm.PS_WORKER: 109 | if pm.BUNDLE_ACTION: 110 | fc2 = tflearn.fully_connected(merge_net_2, 3, activation='linear') 111 | else: 112 | fc2 = tflearn.fully_connected(merge_net_2, 2, activation='linear') 113 | else: 114 | fc2 = tflearn.fully_connected(merge_net_2, 1, activation='linear') 115 | fc2_list.append(fc2) 116 | 117 | if pm.SKIP_TS: 118 | fc2 = tflearn.fully_connected(dense_net_1, 1, activation='linear') 119 | fc2_list.append(fc2) 120 | merge_net_3 = tflearn.merge(fc2_list, 'concat') 121 | output = tflearn.activation(merge_net_3, activation="softmax", name="policy_output") 122 | else: 123 | output = tflearn.fully_connected(dense_net_1, self.action_dim, activation="softmax", name="policy_output") 124 | return input, output 125 | 126 | 127 | def get_sl_loss(self, input, label): 128 | assert self.mode == "SL" 129 | return self.sess.run([self.output, self.loss], feed_dict={self.input:input, self.label:label}) 130 | 131 | 132 | def predict(self, input): 133 | return self.sess.run(self.output, feed_dict={self.input:input}) 134 | 135 | 136 | def get_sl_gradients(self, input, label): 137 | assert self.mode == "SL" 138 | return self.sess.run([self.entropy, self.loss, self.gradients], feed_dict={self.input:input, self.label:label}) 139 | 140 | 141 | def get_rl_gradients(self, input, output, action, advantage): 142 | assert self.mode == "RL" 143 | return self.sess.run([self.entropy, self.loss, self.gradients], 144 | feed_dict={self.input:input, self.output:output, self.action:action, 145 | self.advantage:advantage}) 146 | 147 | 148 | def apply_gradients(self, gradients): 149 | self.sess.run(self.optimize, feed_dict={i:d for i,d in zip(self.gradients,gradients)}) 150 | 151 | 152 | def set_weights(self, weights): 153 | self.sess.run(self.set_weights_op, feed_dict={i:d for i,d in zip(self.weights_phs, weights)}) 154 | 155 | 156 | def get_weights(self): 157 | return self.sess.run(self.weights) 158 | 159 | def get_num_weights(self): 160 | with tf.variable_scope(self.scope): 161 | total_parameters = 0 162 | for variable in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope): 163 | # shape is an array of tf.Dimension 164 | shape = variable.get_shape() 165 | # print variable.name 166 | variable_parameters = 1 167 | for dim in shape: 168 | variable_parameters *= dim.value 169 | # print "varable in each layer {0}".format(variable_parameters) 170 | total_parameters += variable_parameters 171 | return total_parameters 172 | 173 | # adjust entropy weight 174 | def anneal_entropy_weight(self, step): 175 | if pm.FIX_ENTROPY_WEIGHT: 176 | self.entropy_weight = pm.ENTROPY_WEIGHT 177 | else: 178 | self.entropy_weight = max(pm.MAX_ENTROPY_WEIGHT * 2 / (1 + np.exp(step / pm.ANNEALING_TEMPERATURE)), 0.1) 179 | 180 | 181 | 182 | class ValueNetwork: 183 | def __init__(self, sess, scope, mode, logger): 184 | self.sess = sess 185 | self.state_dim = pm.STATE_DIM 186 | self.action_dim = pm.ACTION_DIM 187 | self.scope = scope 188 | self.mode = mode 189 | self.logger = logger 190 | 191 | self.input, self.output = self._create_nn() 192 | self.label = tf.placeholder(tf.float32, [None, self.action_dim]) 193 | self.action = tf.placeholder(tf.float32, [None, None]) 194 | 195 | self.entropy_weight = pm.ENTROPY_WEIGHT 196 | 197 | self.td_target = tf.placeholder(tf.float32, [None, 1]) 198 | self.loss = tflearn.mean_square(self.output, self.td_target) 199 | 200 | self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope) 201 | self.gradients = tf.gradients(self.loss, self.weights) 202 | 203 | self.lr = pm.LEARNING_RATE 204 | if pm.OPTIMIZER == "Adam": 205 | self.optimize = tf.train.AdamOptimizer(learning_rate=self.lr).apply_gradients(zip(self.gradients, self.weights)) 206 | elif pm.OPTIMIZER == "RMSProp": 207 | self.optimize = tf.train.RMSPropOptimizer(learning_rate=self.lr).apply_gradients(zip(self.gradients, self.weights)) 208 | self.weights_phs = [] 209 | for weight in self.weights: 210 | self.weights_phs.append(tf.placeholder(tf.float32, shape=weight.get_shape())) 211 | self.set_weights_op = [] 212 | for idx, weights_ph in enumerate(self.weights_phs): 213 | self.set_weights_op.append(self.weights[idx].assign(weights_ph)) 214 | 215 | 216 | def _create_nn(self): 217 | with tf.variable_scope(self.scope): 218 | # type, arrival, progress, resource 219 | input = tflearn.input_data(shape=[None, self.state_dim[0], self.state_dim[1]], name="input") # row is info type, column is job 220 | 221 | if pm.JOB_CENTRAL_REPRESENTATION or pm.ATTRIBUTE_CENTRAL_REPRESENTATION: 222 | if pm.JOB_CENTRAL_REPRESENTATION: 223 | fc_list = [] 224 | for i in range(self.state_dim[1]): 225 | if pm.FIRST_LAYER_TANH: 226 | fc1 = tflearn.fully_connected(input[:, :, i], self.state_dim[0], activation="tanh", name="job_" + str(i)) 227 | else: 228 | fc1 = tflearn.fully_connected(input[:, :, i], self.state_dim[0], activation="relu", name="job_"+str(i)) 229 | if pm.BATCH_NORMALIZATION: 230 | fc1 = tflearn.batch_normalization(fc1, name="job_"+str(i)+"_bn") 231 | fc_list.append(fc1) 232 | else: 233 | j = 0 234 | fc_list = [] 235 | for (key, enable) in pm.INPUTS_GATE: # INPUTS_GATE=[("TYPE",True), ("STAY",False), ("PROGRESS",False), ("DOM_RESR",False), ("WORKERS",True)] 236 | if enable: 237 | if pm.FIRST_LAYER_TANH: 238 | fc1 = tflearn.fully_connected(input[:, j], pm.SCHED_WINDOW_SIZE, activation="tanh", name=key) 239 | else: 240 | fc1 = tflearn.fully_connected(input[:, j], pm.SCHED_WINDOW_SIZE, activation="relu", name=key) 241 | if pm.BATCH_NORMALIZATION: 242 | fc1 = tflearn.batch_normalization(fc1, name=key+"_bn") 243 | fc_list.append(fc1) 244 | j += 1 245 | if len(fc_list) == 1: 246 | merge_net = fc_list[0] 247 | if pm.BATCH_NORMALIZATION: 248 | merge_net = tflearn.batch_normalization(merge_net) 249 | else: 250 | merge_net = tflearn.merge(fc_list, 'concat', name="merge_net_1") 251 | if pm.BATCH_NORMALIZATION: 252 | merge_net = tflearn.batch_normalization(merge_net, name="merge_net_1_bn") 253 | dense_net_1 = tflearn.fully_connected(merge_net, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_1') 254 | else: 255 | dense_net_1 = tflearn.fully_connected(input, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_1') 256 | if pm.BATCH_NORMALIZATION: 257 | dense_net_1 = tflearn.batch_normalization(dense_net_1, name='dense_net_1_bn') 258 | 259 | for i in range(1, pm.NUM_FCN_LAYERS): 260 | dense_net_1 = tflearn.fully_connected(dense_net_1, pm.NUM_NEURONS_PER_FCN, activation='relu', name='dense_net_' + str(i + 1)) 261 | if pm.BATCH_NORMALIZATION: 262 | dense_net_1 = tflearn.batch_normalization(dense_net_1, name='dense_net_' + str(i + 1) + 'bn') 263 | 264 | if pm.JOB_CENTRAL_REPRESENTATION and pm.NN_SHORTCUT_CONN: # a more layer if critic adds shortcut 265 | fc2_list = [] 266 | for fc in fc_list: 267 | merge_net_2 = tflearn.merge([fc, dense_net_1], 'concat') 268 | if pm.PS_WORKER: 269 | if pm.BUNDLE_ACTION: 270 | fc2 = tflearn.fully_connected(merge_net_2, 3, activation='relu') 271 | else: 272 | fc2 = tflearn.fully_connected(merge_net_2, 2, activation='relu') 273 | else: 274 | fc2 = tflearn.fully_connected(merge_net_2, 1, activation='relu') 275 | fc2_list.append(fc2) 276 | 277 | if pm.SKIP_TS: 278 | fc2 = tflearn.fully_connected(dense_net_1, 1, activation='relu') 279 | fc2_list.append(fc2) 280 | merge_net_3 = tflearn.merge(fc2_list, 'concat', name='merge_net_3') 281 | if pm.BATCH_NORMALIZATION: 282 | merge_net_3 = tflearn.batch_normalization(merge_net_3, name='merge_net_3_bn') 283 | output = tflearn.fully_connected(merge_net_3, 1, activation="linear", name="value_output") 284 | else: 285 | output = tflearn.fully_connected(dense_net_1, 1, activation="linear", name="value_output") 286 | return input, output 287 | 288 | def get_loss(self, input): 289 | return self.sess.run(self.loss, feed_dict={self.input: input}) 290 | 291 | 292 | def predict(self, input): 293 | return self.sess.run(self.output, feed_dict={self.input:input}) 294 | 295 | 296 | def get_rl_gradients(self, input, output, td_target): 297 | return self.sess.run([self.loss, self.gradients], 298 | feed_dict={self.input:input, self.output:output, self.td_target:td_target}) 299 | 300 | 301 | def apply_gradients(self, gradients): 302 | self.sess.run(self.optimize, feed_dict={i:d for i,d in zip(self.gradients,gradients)}) 303 | 304 | 305 | def set_weights(self, weights): 306 | self.sess.run(self.set_weights_op, feed_dict={i:d for i,d in zip(self.weights_phs, weights)}) 307 | 308 | 309 | def get_weights(self): 310 | return self.sess.run(self.weights) 311 | 312 | def get_num_weights(self): 313 | with tf.variable_scope(self.scope): 314 | total_parameters = 0 315 | for variable in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope): 316 | # shape is an array of tf.Dimension 317 | shape = variable.get_shape() 318 | # print variable.name 319 | variable_parameters = 1 320 | for dim in shape: 321 | variable_parameters *= dim.value 322 | # print "varable in each layer {0}".format(variable_parameters) 323 | total_parameters += variable_parameters 324 | return total_parameters 325 | 326 | 327 | 328 | -------------------------------------------------------------------------------- /experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import params_template as pm 4 | import datetime 5 | import copy 6 | import time 7 | import numpy as np 8 | import os.path 9 | import multiprocessing 10 | 11 | # default sl hyper-parameters configuration 12 | sl_config_dict = {"TRAINING_MODE":"SL", "VALUE_NET":False, \ 13 | "POLICY_NN_MODEL":None, "VALUE_NN_MODEL":None, "CHECKPOINT_INTERVAL":50, \ 14 | "LEARNING_RATE":0.005, "TOT_NUM_STEPS":200, "VAL_INTERVAL":50, \ 15 | "NUM_TS_PER_UPDATE":5, "JOB_ORDER_SHUFFLE":True} 16 | NUM_TEST = 5 17 | PARALLELISM = 10 18 | TASK_ID = -1 19 | 20 | def replace_params(map, dir): 21 | pm_md = globals().get('pm', None) 22 | train_config = dict() 23 | if pm_md: 24 | train_config = {key: value for key, value in pm_md.__dict__.iteritems() if not (key.startswith('__') or key.startswith('_'))} 25 | 26 | f = open(dir+"parameters.py", 'w') 27 | for key, _ in train_config.iteritems(): 28 | if key in map.keys(): 29 | train_config[key] = map[key] 30 | if isinstance(train_config[key], basestring): 31 | f.write(str(key) + " = " + "'" + str(train_config[key]) + "'" + '\n') 32 | else: 33 | f.write(str(key) + " = " + str(train_config[key])+'\n') 34 | f.close() 35 | 36 | 37 | def get_config(id, exp_name, test_value): 38 | config = dict() 39 | config["EXPERIMENT_NAME"] = exp_name + "_" + str(test_value) 40 | if id == 1: 41 | config["SCHED_WINDOW_SIZE"] = test_value 42 | config["STATE_DIM"] = (sum([enable for (_,enable) in pm.INPUTS_GATE]), test_value) 43 | config["ACTION_DIM"] = 3 * test_value + pm.SKIP_TS 44 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_,enable) in pm.INPUTS_GATE]) * test_value 45 | elif id == 2: 46 | config["NUM_FCN_LAYERS"] = 1 47 | config["NUM_NEURONS_PER_FCN"] = test_value 48 | elif id == 3 or id == 24: 49 | config["NUM_FCN_LAYERS"] = test_value 50 | config["NUM_NEURONS_PER_FCN"] = pm.STATE_DIM[0]*pm.STATE_DIM[1]*2/3 51 | elif id == 4: 52 | config["BUNDLE_ACTION"] = test_value 53 | if test_value == False: 54 | config["ACTION_DIM"] = 2 * pm.SCHED_WINDOW_SIZE + pm.SKIP_TS 55 | elif id == 5: 56 | config["JOB_ARRIVAL_PATTERN"] = test_value 57 | elif id == 6: 58 | config["BATCH_NORMALIZATION"] = test_value 59 | elif id == 7: 60 | config["SL_LOSS_FUNCTION"] = test_value 61 | elif id == 8: 62 | ["Norm_Progress", "Job_Progress", "Num_Uncompleted_Jobs"] 63 | if test_value == "Norm_Progress": 64 | config["TS_REWARD_PLUS_JOB_REWARD"] = False 65 | config["NUM_UNCOMPLETED_JOB_REWARD"] = False 66 | elif test_value == "Job_Progress": 67 | config["TS_REWARD_PLUS_JOB_REWARD"] = True 68 | config["NUM_UNCOMPLETED_JOB_REWARD"] = False 69 | elif test_value == "Num_Uncompleted_Jobs": 70 | config["TS_REWARD_PLUS_JOB_REWARD"] = False 71 | config["NUM_UNCOMPLETED_JOB_REWARD"] = True 72 | elif id == 9: 73 | if not test_value: 74 | config["REPLAY_MEMORY_SIZE"] = 256 75 | elif id == 10: 76 | config["VALUE_NET"] = test_value 77 | elif id == 11: 78 | if test_value: 79 | config["INJECT_SAMPLES"] = True 80 | config["EPSILON_GREEDY"] = False 81 | else: 82 | config["INJECT_SAMPLES"] = False 83 | config["EPSILON_GREEDY"] = True 84 | elif id == 12: 85 | config["JOB_ARRIVAL_PATTERN"] = test_value 86 | config["HEURISTIC"] = "DRF" 87 | elif id == 13: 88 | config["JOB_ARRIVAL_PATTERN"] = test_value 89 | config["HEURISTIC"] = "SRTF" 90 | elif id == 14: 91 | config["JOB_ARRIVAL_PATTERN"] = test_value 92 | config["HEURISTIC"] = "Tetris" 93 | elif id == 15: 94 | config["JOB_ARRIVAL_PATTERN"] = test_value 95 | config["HEURISTIC"] = "Optimus" 96 | elif id == 16: 97 | config["HEURISTIC"] = test_value 98 | config["MAX_NUM_WORKERS"] = 8 99 | elif id == 17: 100 | config["NUM_AGENTS"] = test_value 101 | config["MINI_BATCH_SIZE"] = 256/test_value 102 | elif id == 18: 103 | config["CHANGING_JOB_TYPES"] = test_value 104 | elif id == 19: 105 | config["REAL_SPEED_TRACE"] = test_value 106 | elif id == 20: 107 | if test_value == "testbed": 108 | config["TESTBED"] = True 109 | config["CLUSTER_NUM_NODES"] = 6 110 | config["TOT_NUM_JOBS"] = 10 111 | config["MAX_NUM_EPOCHS"] = 1000 112 | config["MAX_ARRVS_PER_TS"] = 5 113 | config["TS_DURATION"] = 300.0 114 | window_size = 4 115 | config["SCHED_WINDOW_SIZE"] = window_size 116 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 117 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 118 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 119 | elif test_value == "large-1": 120 | config["LARGE_SCALE"] = True 121 | config["CLUSTER_NUM_NODES"] = 100 122 | config["TOT_NUM_JOBS"] = 120 123 | config["MAX_NUM_EPOCHS"] = 80000 124 | config["MAX_ARRVS_PER_TS"] = 6 125 | config["TS_DURATION"] = 1200.0 126 | window_size = 30 127 | config["SCHED_WINDOW_SIZE"] = window_size 128 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 129 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 130 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 131 | elif test_value == "large-2": 132 | config["LARGE_SCALE"] = True 133 | config["CLUSTER_NUM_NODES"] = 100 134 | config["TOT_NUM_JOBS"] = 180 135 | config["MAX_NUM_EPOCHS"] = 80000 136 | config["MAX_ARRVS_PER_TS"] = 9 137 | config["TS_DURATION"] = 1200.0 138 | window_size = 36 139 | config["SCHED_WINDOW_SIZE"] = window_size 140 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 141 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 142 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 143 | elif test_value == "large-3": 144 | config["LARGE_SCALE"] = True 145 | config["CLUSTER_NUM_NODES"] = 120 146 | config["TOT_NUM_JOBS"] = 180 147 | config["MAX_NUM_EPOCHS"] = 80000 148 | config["MAX_ARRVS_PER_TS"] = 9 149 | config["TS_DURATION"] = 1200.0 150 | window_size = 36 151 | config["SCHED_WINDOW_SIZE"] = window_size 152 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 153 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 154 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 155 | elif test_value == "large-4": 156 | config["LARGE_SCALE"] = True 157 | config["CLUSTER_NUM_NODES"] = 500 158 | config["TOT_NUM_JOBS"] = 600 159 | config["MAX_NUM_EPOCHS"] = 80000 160 | config["MAX_ARRVS_PER_TS"] = 30 161 | config["TS_DURATION"] = 1200.0 162 | config["MAX_NUM_WORKERS"] = 50 163 | window_size = 180 164 | config["SCHED_WINDOW_SIZE"] = window_size 165 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 166 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 167 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 168 | elif test_value == "large-5": 169 | config["LARGE_SCALE"] = True 170 | config["CLUSTER_NUM_NODES"] = 500 171 | config["TOT_NUM_JOBS"] = 600 172 | config["MAX_NUM_EPOCHS"] = 80000 173 | config["MAX_ARRVS_PER_TS"] = 30 174 | config["TS_DURATION"] = 1200.0 175 | config["MAX_NUM_WORKERS"] = 100 176 | window_size = 180 177 | config["SCHED_WINDOW_SIZE"] = window_size 178 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 179 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 180 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 181 | elif test_value == "large-6": 182 | config["LARGE_SCALE"] = True 183 | config["CLUSTER_NUM_NODES"] = 500 184 | config["TOT_NUM_JOBS"] = 600 185 | config["MAX_NUM_EPOCHS"] = 80000 186 | config["MAX_ARRVS_PER_TS"] = 30 187 | config["TS_DURATION"] = 1200.0 188 | config["MAX_NUM_WORKERS"] = 100 189 | config["VALUE_NET"] = False 190 | window_size = 180 191 | config["SCHED_WINDOW_SIZE"] = window_size 192 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 193 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 194 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 195 | elif test_value == "small": # by default 196 | config["CLUSTER_NUM_NODES"] = 48 197 | config["TOT_NUM_JOBS"] = 60 198 | config["MAX_NUM_EPOCHS"] = 80000 199 | config["MAX_ARRVS_PER_TS"] = 3 200 | config["TS_DURATION"] = 1200.0 201 | window_size = 20 202 | config["SCHED_WINDOW_SIZE"] = window_size 203 | config["STATE_DIM"] = (sum([enable for (_, enable) in pm.INPUTS_GATE]), window_size) 204 | config["ACTION_DIM"] = 3 * window_size + pm.SKIP_TS 205 | config["NUM_NEURONS_PER_FCN"] = sum([enable for (_, enable) in pm.INPUTS_GATE]) * window_size 206 | elif id == 21: 207 | config["JOB_RESR_BALANCE"] = test_value 208 | elif id == 22: 209 | if not test_value: 210 | config["POLICY_NN_MODEL"] = None 211 | elif id == 23: 212 | config["JOB_EPOCH_EST_ERROR"] = test_value 213 | elif id == 25: 214 | config["TRAIN_SPEED_ERROR"] = test_value 215 | return config 216 | 217 | 218 | def process_results(root_dir, exp_name, test_values): 219 | results = dict() 220 | for test_value in test_values: 221 | jcts = [] 222 | makespans = [] 223 | rewards = [] 224 | for j in range(NUM_TEST): 225 | dir = root_dir + exp_name + "_" + str(test_value) + "/" + str(j) + '/' 226 | file = dir+exp_name+"_"+str(test_value)+"/rl_validation.txt" 227 | assert os.path.exists(file) 228 | f = open(file, 'r') 229 | temp_jcts = [] 230 | temp_makespans = [] 231 | temp_rewards = [] 232 | for line in f: 233 | segs = line.replace("\n",'').split(" ") 234 | temp_jcts.append(float(segs[2])) 235 | temp_makespans.append(float(segs[3])) 236 | temp_rewards.append(float(segs[4])) 237 | # find the min jct 238 | min_index = np.argmin(temp_jcts) 239 | jcts.append(temp_jcts[min_index]) 240 | makespans.append(temp_makespans[min_index]) 241 | rewards.append(temp_rewards[min_index]) 242 | results[test_value] = (str(np.average(jcts))+"+-"+str(np.std(jcts)),\ 243 | str(np.average(makespans))+"+-"+str(np.std(makespans)),\ 244 | str(np.average(rewards))+"+-"+str(np.std(rewards))) 245 | f = open(root_dir+"results.txt", "w") 246 | for item in results.items(): 247 | f.write(str(item) + "\n") 248 | f.close() 249 | print results 250 | return results 251 | 252 | 253 | def _sl_rl(dir, config, device): 254 | # SL 255 | sl_config = copy.deepcopy(sl_config_dict) 256 | for key, value in config.items(): 257 | if key not in sl_config: # sl_config_dict has higher priority 258 | sl_config[key] = value 259 | os.system("mkdir -p " + dir) 260 | os.system("cp *.py *.txt " + dir) 261 | replace_params(sl_config, dir) 262 | if TASK_ID != 17: 263 | os.system("cd " + dir + " && CUDA_VISIBLE_DEVICES=" + str(device) + " python train.py") 264 | else: 265 | os.system("cd " + dir + " && python train.py") 266 | 267 | time.sleep(3) 268 | # RL 269 | replace_params(config, dir) 270 | if TASK_ID != 17: 271 | os.system("cd " + dir + " && CUDA_VISIBLE_DEVICES=" + str(device) + " python train.py") 272 | else: 273 | os.system("cd " + dir + " && python train.py") 274 | 275 | 276 | def _baseline(dir, config): 277 | os.system("mkdir -p " + dir) 278 | os.system("cp *.py *.txt " + dir) 279 | replace_params(config, dir) 280 | os.system("cd " + dir + " && python comparison.py") 281 | 282 | 283 | def run(id, exp_name, test_values): 284 | print "running experiments for", exp_name 285 | tic = time.time() 286 | root_dir = exp_name + "-" + datetime.datetime.today().strftime('%Y%m%d_%H%M%S') + "/" 287 | 288 | pool = multiprocessing.Pool(processes=PARALLELISM) 289 | for i in range(len(test_values)): 290 | test_value = test_values[i] 291 | print "testing", exp_name, "with value", test_value 292 | parent_dir = root_dir + exp_name + "_" + str(test_value) + "/" 293 | for j in range(NUM_TEST): 294 | print "round", j 295 | dir = parent_dir + str(j) + '/' 296 | config = get_config(id, exp_name, test_value) 297 | device = (i*NUM_TEST+j)%2 298 | if id in [12, 13, 14, 15]: 299 | # _baseline(dir, config) 300 | pool.apply_async(_baseline, args=(dir, config)) 301 | else: 302 | # _sl_rl(dir, config, device) 303 | pool.apply_async(_sl_rl, args=(dir, config, device)) 304 | if id in [12, 13, 14, 15]: 305 | time.sleep(0.3) 306 | else: 307 | time.sleep(3) 308 | 309 | pool.close() 310 | pool.join() 311 | 312 | results = process_results(root_dir, exp_name, test_values) 313 | print "finish testing all values of", exp_name 314 | print "the result is:", results 315 | toc = time.time() 316 | print "elapsed time: ", toc - tic, "seconds" 317 | 318 | 319 | 320 | def main(id): 321 | global PARALLELISM, TASK_ID 322 | TASK_ID = id 323 | if id == 1: 324 | exp_name = "sched_window_size" 325 | test_values = [10, 20, 30, 40, 50, 60] 326 | elif id == 2: 327 | exp_name = "number_of_neurons" 328 | test_values = [16, 32, 64, 96, 128, 160, 192, 256] 329 | elif id == 3: 330 | PARALLELISM = 5 331 | exp_name = "number_of_hidden_layers" 332 | test_values = [1, 2, 3, 4] 333 | elif id == 4: 334 | exp_name = "bundle_action" # bundle false error 335 | test_values = [False, True] 336 | elif id == 5: 337 | exp_name = "job_arrival_distribution" 338 | test_values = ["Ali_Trace", "Uniform", "Poisson", "Google_Trace"] 339 | elif id == 6: 340 | exp_name = "batch_normalization" 341 | test_values = [False, True] 342 | elif id == 7: 343 | exp_name = "sl_loss_function" 344 | test_values = ["Mean_Square", "Cross_Entropy", "Absolute_Difference"] 345 | elif id == 8: 346 | exp_name = "job_reward_function" 347 | test_values = ["Norm_Progress", "Job_Progress", "Num_Uncompleted_Jobs"] 348 | elif id == 9: 349 | exp_name = "experience_replay" 350 | test_values = [False, True] 351 | elif id == 10: 352 | exp_name = "critic_network" 353 | test_values = [False, True] 354 | elif id == 11: 355 | exp_name = "exploration" 356 | test_values = [False, True] 357 | elif id == 12: 358 | exp_name = "DRF_baseline" 359 | test_values = ["Ali_Trace", "Uniform", "Poisson", "Google_Trace"] 360 | elif id == 13: 361 | exp_name = "SRTF_baseline" 362 | test_values = ["Ali_Trace", "Uniform", "Poisson", "Google_Trace"] 363 | elif id == 14: 364 | exp_name = "Tetris_baseline" 365 | test_values = ["Ali_Trace", "Uniform", "Poisson", "Google_Trace"] 366 | elif id == 15: 367 | exp_name = "Optimus_baseline" 368 | test_values = ["Ali_Trace", "Uniform", "Poisson", "Google_Trace"] 369 | elif id == 16: 370 | exp_name = "SL_heuristics" 371 | test_values = ["Optimus", "FIFO", "SRTF"] 372 | elif id == 17: 373 | PARALLELISM = 5 374 | exp_name = "a3c" 375 | test_values = [5, 4, 3, 2, 1] 376 | elif id == 18: 377 | exp_name = "changing_job_types" 378 | test_values = [True] 379 | elif id == 19: 380 | exp_name = "analytical_model" 381 | test_values = [False] 382 | elif id == 20: 383 | exp_name = "cluster_scale" 384 | test_values = ["large-4", "large-5", "large-6", "large-1", "large-2", "large-3", "testbed", "small"] 385 | elif id == 21: 386 | exp_name = "job_resr_balance" 387 | test_values = [True, False] 388 | elif id == 22: 389 | exp_name = "enable_SL_or_not" 390 | test_values = [True, False] 391 | elif id == 23: 392 | exp_name = "estimation_error_num_epoch" # error 393 | test_values = [0.05, 0.1, 0.15, 0.2, 0.25] 394 | elif id == 24: 395 | PARALLELISM = 3 396 | exp_name = "number_of_hidden_layers" 397 | test_values = [5, 6, 7] 398 | elif id == 25: 399 | exp_name = "train_speed_error" 400 | test_values = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3] 401 | 402 | run(id, exp_name, test_values) 403 | 404 | 405 | if __name__ == "__main__": 406 | if len(sys.argv) != 2: 407 | print "a script for running experiment" 408 | print "Usage: please input one of following experiment IDs" 409 | print "1: scheduling window size" 410 | print "2: number of neurons" 411 | print "3: number of hidden layers" 412 | print "4: bundle action" 413 | print "5: job arrival distribution" 414 | print "6: batch normalization" 415 | print "7: sl loss function" 416 | print "8: job reward function" 417 | print "9: experience replay" 418 | print "10: critic network" 419 | print "11: exploration" 420 | print "12: DRF baseline" 421 | print "13: SRTF baseline" 422 | print "14: Tetris baseline" 423 | print "15: Optimus baseline" 424 | print "16: SL heuristics" 425 | print "17: a3c, change train_a3c.py to train.py, change parallelism, make sure a correct total batch size before running" 426 | print "18: changing job types during training" 427 | print "19: training on analytical model" 428 | print "20: cluster scale" 429 | print "21: job resource balance" 430 | print "22: enable SL or not" 431 | print "23: estimation error of epoch number" 432 | print "25: train speed error" 433 | exit(1) 434 | main(int(sys.argv[1])) -------------------------------------------------------------------------------- /rl_env.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import time 3 | import numpy as np 4 | import parameters as pm 5 | from cluster import Cluster 6 | import log 7 | from scheduler_base import Scheduler 8 | 9 | 10 | class RL_Env(Scheduler): 11 | def __init__(self, name, trace, logger, training_mode=True): 12 | Scheduler.__init__(self, name, trace, logger) 13 | 14 | self.epsilon = 0.0 15 | self.training_mode = training_mode 16 | self.sched_seq = [] 17 | self.job_prog_in_ts = dict() 18 | self.window_jobs = None 19 | self.jobstats = dict() 20 | for stats_name in [ 21 | "arrival", "ts_completed", "tot_completed", "duration", 22 | "uncompleted", "running", "total", "backlog", "cpu_util", 23 | "gpu_util" 24 | ]: 25 | self.jobstats[stats_name] = [] 26 | if pm.PS_WORKER and pm.BUNDLE_ACTION: 27 | self.action_freq = [0 for _ in range(3)] 28 | # prepare for the first timeslot 29 | self._prepare() 30 | 31 | def _prepare(self): 32 | # admit new jobs 33 | num_arrv_jobs = 0 34 | if self.curr_ts in self.trace: 35 | for job in self.trace[self.curr_ts]: 36 | job.reset() 37 | self.uncompleted_jobs.add(job) 38 | if not self.training_mode: 39 | job.training = False 40 | num_arrv_jobs += 1 41 | self.logger.debug(job.info()) 42 | self.jobstats["arrival"].append(num_arrv_jobs) 43 | self.jobstats["total"].append( 44 | len(self.completed_jobs) + len(self.uncompleted_jobs)) 45 | self.jobstats["backlog"].append( 46 | max(len(self.uncompleted_jobs) - pm.SCHED_WINDOW_SIZE, 0)) 47 | 48 | # reset 49 | self._sched_states() # get scheduling states in this ts 50 | self.running_jobs.clear() 51 | self.node_used_resr_queue = Queue.PriorityQueue() 52 | for i in range(pm.CLUSTER_NUM_NODES): 53 | self.node_used_resr_queue.put((0, i)) 54 | self.cluster.clear() 55 | 56 | for job in self.uncompleted_jobs: 57 | if pm.ASSIGN_BUNDLE and pm.PS_WORKER: # assign each job a bundle of ps and worker first to avoid job starvation 58 | _, node = self.node_used_resr_queue.get() 59 | resr_reqs = job.resr_worker + job.resr_ps 60 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 61 | if succ: 62 | job.num_ps = 1 63 | job.curr_ps_placement = [node] 64 | job.num_workers = 1 65 | job.curr_worker_placement = [node] 66 | job.dom_share = np.max(1.0 * 67 | (job.num_workers * job.resr_worker + 68 | job.num_ps * job.resr_ps) / 69 | self.cluster.CLUSTER_RESR_CAPS) 70 | self.running_jobs.add(job) 71 | else: 72 | job.num_workers = 0 73 | job.curr_worker_placement = [] 74 | job.num_ps = 0 75 | job.curr_ps_placement = [] 76 | job.dom_share = 0 77 | self.node_used_resr_queue.put( 78 | (np.sum(node_used_resrs), 79 | node)) # always put back to avoid blocking in step() 80 | else: 81 | job.num_workers = 0 82 | job.curr_worker_placement = [] 83 | if pm.PS_WORKER: 84 | job.num_ps = 0 85 | job.curr_ps_placement = [] 86 | job.dom_share = 0 87 | 88 | if pm.VARYING_SKIP_NUM_WORKERS: 89 | self.skip_num_workers = np.random.randint(1, pm.MAX_NUM_WORKERS) 90 | else: 91 | self.skip_num_workers = 8 #np.random.randint(0,pm.MAX_NUM_WORKERS) 92 | if pm.VARYING_PS_WORKER_RATIO: 93 | self.ps_worker_ratio = np.random.randint(3, 8) 94 | else: 95 | self.ps_worker_ratio = 5 96 | 97 | def _move(self): 98 | self._progress() 99 | if len(self.completed_jobs) == pm.TOT_NUM_JOBS: 100 | self.end = True 101 | else: 102 | # next timeslot 103 | self.curr_ts += 1 104 | if self.curr_ts > pm.MAX_TS_LEN: 105 | self.logger.error( 106 | "Exceed the maximal number of timeslot for one trace!") 107 | self.logger.error("Results: " + str(self.get_results())) 108 | self.logger.error("Stats: " + str(self.get_jobstats())) 109 | for job in self.uncompleted_jobs: 110 | self.logger.error("Uncompleted job " + str(job.id) + 111 | " tot_epoch: " + str(job.num_epochs) + 112 | " prog: " + str(job.progress) + 113 | " workers: " + str(job.num_workers)) 114 | raise RuntimeError 115 | self._prepare() 116 | 117 | # step forward by one action 118 | def step(self, output): 119 | # mask and adjust probability 120 | mask = np.ones(pm.ACTION_DIM) 121 | for i in range(len(self.window_jobs)): 122 | if self.window_jobs[ 123 | i] is None: # what if job workers are already maximum 124 | if pm.PS_WORKER: 125 | if pm.BUNDLE_ACTION: # worker, ps, bundle 126 | mask[3 * i] = 0.0 127 | mask[3 * i + 1] = 0.0 128 | mask[3 * i + 2] = 0.0 129 | else: 130 | mask[2 * i] = 0.0 131 | mask[2 * i + 1] = 0.0 132 | else: 133 | mask[i] = 0.0 134 | else: 135 | if pm.PS_WORKER: 136 | worker_full = False 137 | ps_full = False 138 | if self.window_jobs[i].num_workers >= pm.MAX_NUM_WORKERS: 139 | worker_full = True 140 | if self.window_jobs[i].num_ps >= pm.MAX_NUM_WORKERS: 141 | ps_full = True 142 | if worker_full: 143 | if pm.BUNDLE_ACTION: 144 | mask[3 * i] = 0.0 145 | else: 146 | mask[2 * i] = 0.0 147 | if ps_full: 148 | if pm.BUNDLE_ACTION: 149 | mask[3 * i + 1] = 0.0 150 | else: 151 | mask[2 * i + 1] = 0.0 152 | if (worker_full or ps_full) and pm.BUNDLE_ACTION: 153 | mask[3 * i + 2] = 0.0 154 | 155 | masked_output = np.reshape(output[0] * mask, (1, len(mask))) 156 | sum_prob = np.sum(masked_output) 157 | action_vec = np.zeros(len(mask)) 158 | move_on = True 159 | valid_state = False 160 | if ((not pm.PS_WORKER) and sum(mask[:len(self.window_jobs)]) == 0) \ 161 | or (pm.PS_WORKER and (not pm.BUNDLE_ACTION) and sum(mask[:2*len(self.window_jobs)]) == 0) \ 162 | or (pm.PS_WORKER and pm.BUNDLE_ACTION and sum(mask[:3*len(self.window_jobs)]) == 0): 163 | self.logger.debug( 164 | "All jobs are None, move on and do not save it as a sample") 165 | self._move() 166 | elif sum_prob <= 0: 167 | self.logger.info( 168 | "All actions are masked or some action with probability 1 is masked!!!" 169 | ) 170 | if pm.EXPERIMENT_NAME is None: 171 | self.logger.info( 172 | "Output: " + str(output) 173 | ) # Output: [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]], WHY? 174 | self.logger.info("Mask: " + str(mask)) 175 | self.logger.info("Window_jobs: " + str(self.window_jobs)) 176 | num_worker_ps_str = "" 177 | for job in self.window_jobs: 178 | if job: 179 | num_worker_ps_str += str(job.id) + ": " + str( 180 | job.num_ps) + " " + str(job.num_workers) + "," 181 | self.logger.info("Job: " + num_worker_ps_str) 182 | self._move() 183 | else: 184 | masked_output = masked_output / sum_prob 185 | if self.training_mode: 186 | # select action 187 | if np.random.rand( 188 | ) > pm.MASK_PROB: # only valid for training mode 189 | masked_output = np.reshape(output[0], (1, len(mask))) 190 | action_cumsum = np.cumsum(masked_output) 191 | action = (action_cumsum > np.random.randint(1, pm.RAND_RANGE) / 192 | float(pm.RAND_RANGE)).argmax() 193 | 194 | if pm.EPSILON_GREEDY: 195 | if np.random.rand() < self.epsilon: 196 | val_actions = [] 197 | for i in range(len(masked_output[0])): 198 | if masked_output[0][ 199 | i] > pm.MIN_ACTION_PROB_FOR_SKIP: 200 | val_actions.append(i) 201 | action = val_actions[np.random.randint( 202 | 0, len(val_actions))] 203 | 204 | if pm.INJECT_SAMPLES: 205 | if (not pm.REAL_SPEED_TRACE) and (not pm.PS_WORKER): 206 | allMaxResr = True 207 | for job in self.window_jobs: 208 | if job: 209 | if job.num_workers > self.skip_num_workers: 210 | continue 211 | else: 212 | allMaxResr = False 213 | break 214 | if allMaxResr and masked_output[0][len( 215 | action_vec 216 | ) - 1] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 217 | ) <= pm.SAMPLE_INJECTION_PROB: # choose to skip if prob larger than a small num, else NaN 218 | action = len(action_vec) - 1 219 | self.logger.debug("Got 1.") 220 | elif pm.REAL_SPEED_TRACE and pm.PS_WORKER: 221 | # shuffle = np.random.choice(len(self.window_jobs), len(self.window_jobs), replace=False) # shuffle is a must, otherwise NN selects only the first several actions!!! 222 | if pm.JOB_RESR_BALANCE and pm.BUNDLE_ACTION: 223 | max_num_ps_worker = 0 224 | min_num_ps_worker = 10**10 225 | index_min_job = -1 226 | for i in range(len(self.window_jobs)): 227 | job = self.window_jobs[i] 228 | if job: 229 | num_ps_worker = job.num_ps + job.num_workers 230 | if num_ps_worker > max_num_ps_worker: 231 | max_num_ps_worker = num_ps_worker 232 | if num_ps_worker < min_num_ps_worker: 233 | min_num_ps_worker = num_ps_worker 234 | index_min_job = i 235 | if min_num_ps_worker and index_min_job != -1 and max_num_ps_worker / min_num_ps_worker > np.random.randint( 236 | 3, 6): 237 | if masked_output[0][ 238 | 3 * index_min_job + 239 | 2] > pm.MIN_ACTION_PROB_FOR_SKIP and masked_output[ 240 | 0][3 * 241 | index_min_job] > pm.MIN_ACTION_PROB_FOR_SKIP: 242 | if np.random.rand() < 0.5: 243 | action = 3 * index_min_job + 2 244 | else: 245 | action = 3 * index_min_job 246 | 247 | shuffle = [_ for _ in range(len(self.window_jobs))] 248 | for i in shuffle: 249 | job = self.window_jobs[i] 250 | if job: 251 | if pm.BUNDLE_ACTION: 252 | # if one of three actions: ps/worker/bundle has low probability, enforce to select it 253 | if min(self.action_freq) > 0 and min( 254 | self.action_freq) * 1.0 / sum( 255 | self.action_freq) < 0.001: 256 | index = np.argmin(self.action_freq) 257 | if mask[3 * i + 258 | index] > 0 and masked_output[0][ 259 | 3 * i + 260 | index] > pm.MIN_ACTION_PROB_FOR_SKIP: 261 | action = 3 * i + index 262 | self.logger.debug("Got 0: " + 263 | str(index)) 264 | break 265 | if (job.num_workers == 0 266 | or job.num_ps == 0): 267 | if job.num_ps == 0 and job.num_workers == 0 and mask[ 268 | 3 * i + 269 | 2] > 0 and masked_output[0][ 270 | 3 * i + 271 | 2] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 272 | ) < 0.5: 273 | action = 3 * i + 2 274 | self.logger.debug("Got 1") 275 | if job.num_workers == 0 and mask[ 276 | 3 * 277 | i] > 0 and masked_output[0][ 278 | 3 * 279 | i] > pm.MIN_ACTION_PROB_FOR_SKIP: 280 | action = 3 * i 281 | if job.num_ps == 0 and mask[ 282 | 3 * i + 283 | 1] > 0 and masked_output[0][ 284 | 3 * 285 | i] > pm.MIN_ACTION_PROB_FOR_SKIP: 286 | action = 3 * i + 1 287 | break 288 | elif job.num_ps > job.num_workers * self.ps_worker_ratio and np.random.rand( 289 | ) < 0.5: 290 | if mask[3 * i + 2] > 0 and masked_output[0][ 291 | 3 * i + 292 | 2] > pm.MIN_ACTION_PROB_FOR_SKIP and mask[ 293 | 3 * 294 | i] > 0 and masked_output[0][ 295 | 3 * 296 | i] > pm.MIN_ACTION_PROB_FOR_SKIP: 297 | if np.random.rand() < 0.5: 298 | # increase this job's bundle 299 | action = 3 * i + 2 300 | self.logger.debug("Got 2.") 301 | else: 302 | action = 3 * i 303 | self.logger.debug("Got 2.") 304 | break 305 | elif job.num_workers >= job.num_ps * 0.5 and np.random.rand( 306 | ) < 0.5: 307 | if mask[3 * i + 2] > 0 and masked_output[0][ 308 | 3 * i + 309 | 2] > pm.MIN_ACTION_PROB_FOR_SKIP and mask[ 310 | 3 * i + 311 | 1] > 0 and masked_output[0][ 312 | 3 * i + 313 | 1] > pm.MIN_ACTION_PROB_FOR_SKIP: 314 | if np.random.rand() < 0.01: 315 | # increase this job's bundle 316 | action = 3 * i + 2 317 | self.logger.debug("Got 3.") 318 | else: 319 | # incrase ps 320 | action = 3 * i + 1 321 | self.logger.debug("Got 4.") 322 | break 323 | else: 324 | if job.num_workers == 0 and mask[ 325 | 2 * i] > 0 and masked_output[0][ 326 | 2 * 327 | i] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 328 | ) < 0.01: 329 | action = 2 * i 330 | self.logger.debug("Got 1.") 331 | break 332 | elif job.num_ps == 0 and mask[ 333 | 2 * i + 334 | 1] > 0 and masked_output[0][ 335 | 2 * i + 336 | 1] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 337 | ) < 0.01: 338 | action = 2 * i + 1 339 | self.logger.debug("Got 2.") 340 | break 341 | elif job.num_ps >= job.num_workers * self.ps_worker_ratio and mask[ 342 | 2 * i] > 0 and masked_output[0][ 343 | 2 * 344 | i] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 345 | ) < 0.5: 346 | # increase this job's worker 347 | action = 2 * i 348 | self.logger.debug("Got 3.") 349 | break 350 | elif job.num_workers >= job.num_ps * self.ps_worker_ratio and mask[ 351 | 2 * i + 352 | 1] > 0 and masked_output[0][ 353 | 2 * i + 354 | 1] > pm.MIN_ACTION_PROB_FOR_SKIP and np.random.rand( 355 | ) < 0.5: 356 | # increase this job's ps 357 | action = 2 * i + 1 358 | self.logger.debug("Got 4.") 359 | break 360 | else: 361 | if pm.SELECT_ACTION_MAX_PROB: # only available for validation 362 | action = np.argmax( 363 | masked_output 364 | ) # output is [[...]] # always select the action with max probability 365 | else: 366 | action_cumsum = np.cumsum(masked_output) 367 | action = (action_cumsum > 368 | np.random.randint(1, pm.RAND_RANGE) / 369 | float(pm.RAND_RANGE)).argmax() 370 | 371 | action_vec[action] = 1 372 | # check whether skip this timeslot 373 | if pm.SKIP_TS and action == len(action_vec) - 1: 374 | self._move() 375 | # filter out the first action that causes 0 reward??? NO 376 | # if sum([job.num_workers+job.num_ps for job in self.uncompleted_jobs]) > 0: 377 | valid_state = True 378 | self.sched_seq.append(None) 379 | self.logger.debug("Skip action is selected!") 380 | self.logger.debug("Output: " + str(output)) 381 | self.logger.debug("Masked output: " + str(masked_output)) 382 | else: 383 | # count action freq 384 | if pm.PS_WORKER and pm.BUNDLE_ACTION: 385 | self.action_freq[action % 3] += 1 386 | 387 | # allocate resource 388 | if pm.PS_WORKER: 389 | if pm.BUNDLE_ACTION: 390 | job = self.window_jobs[action / 3] 391 | else: 392 | job = self.window_jobs[action / 2] 393 | else: 394 | job = self.window_jobs[action] 395 | if job is None: 396 | self._move() 397 | self.logger.debug("The selected action is None!") 398 | else: 399 | _, node = self.node_used_resr_queue.get() 400 | # get resource requirement of the selected action 401 | if pm.PS_WORKER: 402 | if pm.BUNDLE_ACTION: 403 | if action % 3 == 0: 404 | resr_reqs = job.resr_worker 405 | elif action % 3 == 1: 406 | resr_reqs = job.resr_ps 407 | else: 408 | resr_reqs = job.resr_worker + job.resr_ps 409 | else: 410 | if action % 2 == 0: # worker 411 | resr_reqs = job.resr_worker 412 | else: 413 | resr_reqs = job.resr_ps 414 | else: 415 | resr_reqs = job.resr_worker 416 | succ, node_used_resrs = self.cluster.alloc(resr_reqs, node) 417 | if succ: 418 | move_on = False 419 | # change job tasks and placement 420 | if pm.PS_WORKER: 421 | if pm.BUNDLE_ACTION: 422 | if action % 3 == 0: # worker 423 | job.num_workers += 1 424 | job.curr_worker_placement.append(node) 425 | elif action % 3 == 1: # ps 426 | job.num_ps += 1 427 | job.curr_ps_placement.append(node) 428 | else: # bundle 429 | job.num_ps += 1 430 | job.curr_ps_placement.append(node) 431 | job.num_workers += 1 432 | job.curr_worker_placement.append(node) 433 | else: 434 | if action % 2 == 0: # worker 435 | job.num_workers += 1 436 | job.curr_worker_placement.append(node) 437 | else: # ps 438 | job.num_ps += 1 439 | job.curr_ps_placement.append(node) 440 | else: 441 | job.num_workers += 1 442 | job.curr_worker_placement.append(node) 443 | 444 | job.dom_share = np.max( 445 | 1.0 * (job.num_workers * job.resr_worker + 446 | job.num_ps * job.resr_ps) / 447 | self.cluster.CLUSTER_RESR_CAPS) 448 | self.node_used_resr_queue.put( 449 | (np.sum(node_used_resrs), node)) 450 | self.running_jobs.add(job) 451 | valid_state = True 452 | self.sched_seq.append(job) 453 | else: 454 | self._move() 455 | self.logger.debug("No enough resources!") 456 | if move_on: 457 | reward = self.rewards[-1] * move_on 458 | else: 459 | reward = 0 460 | return masked_output, action_vec, reward, move_on, valid_state # invalid state, action and output when move on except for skip ts 461 | 462 | def get_jobstats(self): 463 | self.jobstats["duration"] = [(job.end_time - job.arrv_time + 1) 464 | for job in self.completed_jobs] 465 | for name, value in self.jobstats.items(): 466 | self.logger.debug(name + ": length " + str(len(value)) + " " + 467 | str(value)) 468 | return self.jobstats 469 | 470 | def _sched_states(self): 471 | self.states = [] 472 | for job in self.running_jobs: 473 | self.states.append((job.id, job.type, job.num_workers, job.num_ps)) 474 | 475 | def get_job_reward(self): 476 | job_reward = [] 477 | for job in self.sched_seq: 478 | if job is None: # skip 479 | if len(self.job_prog_in_ts) > 0: 480 | job_reward.append(self.rewards[-1] / 481 | len(self.job_prog_in_ts)) 482 | else: 483 | job_reward.append(0) 484 | else: 485 | job_reward.append(self.job_prog_in_ts[job]) 486 | self.sched_seq = [] 487 | self.job_prog_in_ts.clear() 488 | 489 | self.logger.info("Action Frequency: " + str(self.action_freq)) 490 | return job_reward 491 | 492 | def get_sched_states(self): 493 | return self.states 494 | 495 | def _progress(self): 496 | reward = 0 497 | num_ts_completed = 0 498 | for job in self.running_jobs: 499 | norm_prog = job.step() / job.num_epochs 500 | self.job_prog_in_ts[job] = norm_prog 501 | reward += norm_prog 502 | if job.progress >= job.real_num_epochs: 503 | if pm.FINE_GRAIN_JCT: 504 | job.end_time = self.curr_ts - 1 + job.get_run_time_in_ts() 505 | else: 506 | job.end_time = self.curr_ts 507 | # self.running_jobs.remove(job) # it means running in this ts, so no need to delete 508 | self.uncompleted_jobs.remove(job) 509 | self.completed_jobs.add(job) 510 | num_ts_completed += 1 511 | self.rewards.append(reward) 512 | 513 | self.jobstats["running"].append(len(self.running_jobs)) 514 | self.jobstats["tot_completed"].append(len(self.completed_jobs)) 515 | self.jobstats["uncompleted"].append(len(self.uncompleted_jobs)) 516 | self.jobstats["ts_completed"].append(num_ts_completed) 517 | cpu_util, gpu_util = self.cluster.get_cluster_util() 518 | self.jobstats["cpu_util"].append(cpu_util) 519 | self.jobstats["gpu_util"].append(gpu_util) 520 | 521 | 522 | def test(): 523 | import log, trace 524 | logger = log.getLogger(name="agent_" + str(id), level="INFO") 525 | job_trace = trace.Trace(logger).get_trace() 526 | env = RL_Env("RL", job_trace, logger) 527 | while not env.end: 528 | data = env.step() 529 | for item in data: 530 | print item 531 | print "-----------------------------" 532 | raw_input("Next? ") 533 | 534 | print env.get_results() 535 | 536 | 537 | if __name__ == '__main__': 538 | test() 539 | -------------------------------------------------------------------------------- /train_a3c.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import multiprocessing 4 | import tensorflow as tf 5 | import os 6 | import parameters as pm 7 | import trace 8 | import network 9 | import drf_env 10 | import fifo_env 11 | import srtf_env 12 | import tetris_env 13 | import rl_env 14 | import log 15 | import validate 16 | import collections 17 | import memory 18 | import prioritized_memory 19 | import tb_log 20 | import copy 21 | import comparison 22 | 23 | 24 | def collect_stats(stats_qs, tb_logger, step): 25 | policy_entropys = [] 26 | policy_losses = [] 27 | value_losses = [] 28 | td_losses = [] 29 | step_rewards = [] 30 | jcts = [] 31 | makespans = [] 32 | rewards = [] 33 | val_losses = [] 34 | val_jcts = [] 35 | val_makespans = [] 36 | val_rewards = [] 37 | for id in range(pm.NUM_AGENTS): 38 | while not stats_qs[id].empty(): 39 | stats = stats_qs[id].get() 40 | tag_prefix = "SAgent " + str(id) + " " 41 | if stats[0] == "step:sl": 42 | _, entropy, loss = stats 43 | policy_entropys.append(entropy) 44 | policy_losses.append(loss) 45 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 46 | tb_logger.add_scalar(tag=tag_prefix+"SL Loss", value=loss, step=step) 47 | tb_logger.add_scalar(tag=tag_prefix + "SL Entropy", value=entropy, step=step) 48 | elif stats[0] == "val": 49 | _, val_loss, jct, makespan, reward = stats 50 | val_losses.append(val_loss) 51 | val_jcts.append(jct) 52 | val_makespans.append(makespan) 53 | val_rewards.append(reward) 54 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 55 | tb_logger.add_scalar(tag=tag_prefix+"Val Loss", value=val_loss, step=step) 56 | tb_logger.add_scalar(tag=tag_prefix+"Val JCT", value=jct, step=step) 57 | tb_logger.add_scalar(tag=tag_prefix+"Val Makespan", value=makespan, step=step) 58 | tb_logger.add_scalar(tag=tag_prefix+"Val Reward", value=reward, step=step) 59 | elif stats[0] == "step:policy": 60 | _, entropy, loss, td_loss, step_reward, output = stats 61 | policy_entropys.append(entropy) 62 | policy_losses.append(loss) 63 | td_losses.append(td_loss) 64 | step_rewards.append(step_reward) 65 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 66 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=entropy, step=step) 67 | tb_logger.add_scalar(tag=tag_prefix+"Policy Loss", value=loss, step=step) 68 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss", value=td_loss, step=step) 69 | tb_logger.add_scalar(tag=tag_prefix+"Step Reward", value=step_reward, step=step) 70 | tb_logger.add_histogram(tag=tag_prefix+"Output", value=output, step=step) 71 | elif stats[0] == "step:policy+value": 72 | _, entropy, policy_loss, value_loss, td_loss, step_reward, output = stats 73 | policy_entropys.append(entropy) 74 | policy_losses.append(policy_loss) 75 | value_losses.append(value_loss) 76 | td_losses.append(td_loss) 77 | step_rewards.append(step_reward) 78 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 79 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=entropy, step=step) 80 | tb_logger.add_scalar(tag=tag_prefix+"Policy Loss", value=policy_loss, step=step) 81 | tb_logger.add_scalar(tag=tag_prefix + "Value Loss", value=value_loss, step=step) 82 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss", value=td_loss, step=step) 83 | tb_logger.add_scalar(tag=tag_prefix + "Step Reward", value=step_reward, step=step) 84 | tb_logger.add_histogram(tag=tag_prefix + "Output", value=output, step=step) 85 | elif stats[0] == "trace:sched_result": 86 | _, jct, makespan, reward = stats 87 | jcts.append(jct) 88 | makespans.append(makespan) 89 | rewards.append(reward) 90 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 91 | tb_logger.add_scalar(tag=tag_prefix + "Avg JCT", value=jct, step=step) 92 | tb_logger.add_scalar(tag=tag_prefix + "Makespan", value=makespan, step=step) 93 | tb_logger.add_scalar(tag=tag_prefix + "Reward", value=reward, step=step) 94 | elif stats[0] == "trace:job_stats": 95 | _, episode, jobstats = stats 96 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 97 | job_stats_tag_prefix = tag_prefix + "Trace " + str(episode) + " Step " + str(step) + " " 98 | for i in range(len(jobstats["arrival"])): 99 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Arrival", value=jobstats["arrival"][i], step=i) 100 | for i in range(len(jobstats["ts_completed"])): 101 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Ts_completed", value=jobstats["ts_completed"][i], step=i) 102 | for i in range(len(jobstats["tot_completed"])): 103 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Tot_completed", value=jobstats["tot_completed"][i], step=i) 104 | for i in range(len(jobstats["uncompleted"])): 105 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Uncompleted", value=jobstats["uncompleted"][i], step=i) 106 | for i in range(len(jobstats["running"])): 107 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Running", value=jobstats["running"][i], step=i) 108 | for i in range(len(jobstats["total"])): 109 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Total jobs", value=jobstats["total"][i], step=i) 110 | for i in range(len(jobstats["backlog"])): 111 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Backlog", value=jobstats["backlog"][i], step=i) 112 | for i in range(len(jobstats["cpu_util"])): 113 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "CPU_Util", value=jobstats["cpu_util"][i], step=i) 114 | for i in range(len(jobstats["gpu_util"])): 115 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "GPU_Util", value=jobstats["gpu_util"][i], step=i) 116 | tb_logger.add_histogram(tag=job_stats_tag_prefix + "JCT", value=jobstats["duration"], step=step) 117 | 118 | tag_prefix = "Central " 119 | if len(policy_entropys) > 0: 120 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=sum(policy_entropys) / len(policy_entropys), step=step) 121 | if len(policy_losses) > 0: 122 | tb_logger.add_scalar(tag=tag_prefix + "Policy Loss", value=sum(policy_losses) / len(policy_losses), step=step) 123 | if len(value_losses) > 0: 124 | tb_logger.add_scalar(tag=tag_prefix + "Value Loss", value=sum(value_losses) / len(value_losses), step=step) 125 | if len(td_losses) > 0: 126 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss / Advantage", value=sum(td_losses) / len(td_losses), step=step) 127 | if len(step_rewards) > 0: 128 | tb_logger.add_scalar(tag=tag_prefix + "Batch Reward", value=sum(step_rewards) / len(step_rewards), step=step) 129 | if len(jcts) > 0: 130 | tb_logger.add_scalar(tag=tag_prefix + "JCT", value=sum(jcts) / len(jcts), step=step) 131 | if len(makespans) > 0: 132 | tb_logger.add_scalar(tag=tag_prefix + "Makespan", value=sum(makespans) / len(makespans), step=step) 133 | if len(rewards) > 0: 134 | tb_logger.add_scalar(tag=tag_prefix + "Reward", value=sum(rewards) / len(rewards), step=step) 135 | if len(val_losses) > 0: 136 | tb_logger.add_scalar(tag=tag_prefix + "Val Loss", value=sum(val_losses) / len(val_losses), step=step) 137 | if len(val_jcts) > 0: 138 | tb_logger.add_scalar(tag=tag_prefix + "Val JCT", value=sum(val_jcts) / len(val_jcts), step=step) 139 | if len(val_makespans) > 0: 140 | tb_logger.add_scalar(tag=tag_prefix + "Val Makespan", value=sum(val_makespans) / len(val_makespans), step=step) 141 | if len(val_rewards) > 0: 142 | tb_logger.add_scalar(tag=tag_prefix + "Val Reward", value=sum(val_rewards) / len(val_rewards), step=step) 143 | tb_logger.flush() 144 | 145 | 146 | def test(policy_net, validation_traces, logger, step, tb_logger): 147 | val_tic = time.time() 148 | tag_prefix = "Central " 149 | try: 150 | if pm.TRAINING_MODE == "SL": 151 | val_loss = validate.val_loss(policy_net, copy.deepcopy(validation_traces), logger, step) 152 | tb_logger.add_scalar(tag=tag_prefix + "Val Loss", value=val_loss, step=step) 153 | jct, makespan, reward = validate.val_jmr(policy_net, copy.deepcopy(validation_traces), logger, step, tb_logger) 154 | tb_logger.add_scalar(tag=tag_prefix + "Val JCT", value=jct, step=step) 155 | tb_logger.add_scalar(tag=tag_prefix + "Val Makespan", value=makespan, step=step) 156 | tb_logger.add_scalar(tag=tag_prefix + "Val Reward", value=reward, step=step) 157 | tb_logger.flush() 158 | val_toc = time.time() 159 | logger.info("Central Agent:" + " Validation at step " + str(step) + " Time: " + '%.3f' % (val_toc - val_tic)) 160 | 161 | # log results 162 | f = open(LOG_DIR + "rl_validation.txt", 'a') 163 | f.write("step " + str(step) + ": " + str(jct) + " " + str(makespan) + " " + str(reward) + "\n") 164 | f.close() 165 | 166 | return (jct, makespan, reward) 167 | except Exception as e: 168 | logger.error("Error when validation! " + str(e)) 169 | tb_logger.add_text(tag="validation error", value=str(e), step=step) 170 | 171 | 172 | def log_config(tb_logger): 173 | # log all configurations in parameters and backup py 174 | global LOG_DIR 175 | if pm.EXPERIMENT_NAME is None: 176 | LOG_DIR = "./backup/" 177 | else: 178 | LOG_DIR = "./" + pm.EXPERIMENT_NAME + "/" 179 | 180 | os.system("rm -rf " + LOG_DIR) 181 | os.system("mkdir -p " + LOG_DIR + "; cp *.py *.txt " + LOG_DIR) 182 | 183 | pm_md = globals().get('pm', None) 184 | train_config = dict() 185 | if pm_md: 186 | train_config = {key: value for key, value in pm_md.__dict__.iteritems() if not (key.startswith('__') or key.startswith('_'))} 187 | train_config_str = "" 188 | for key, value in train_config.iteritems(): 189 | train_config_str += "{:<30}{:<100}".format(key, value) + "\n\n" 190 | 191 | tb_logger.add_text(tag="Config", value=train_config_str, step=0) 192 | tb_logger.flush() 193 | 194 | if pm.TRAINING_MODE == "SL": 195 | f = open(pm.MODEL_DIR + "sl_model.config", "w") 196 | else: 197 | f = open(pm.MODEL_DIR + "rl_model.config", "w") 198 | f.write(train_config_str) 199 | f.close() 200 | 201 | f = open(LOG_DIR + "config.md", 'w') 202 | f.write(train_config_str) 203 | f.close() 204 | 205 | 206 | def central_agent(net_weights_qs, net_gradients_qs, stats_qs): 207 | logger = log.getLogger(name="central_agent", level=pm.LOG_MODE) 208 | logger.info("Start central agent...") 209 | 210 | if not pm.RANDOMNESS: 211 | np.random.seed(pm.np_seed) 212 | tf.set_random_seed(pm.tf_seed) 213 | 214 | config = tf.ConfigProto() 215 | config.allow_soft_placement=False 216 | config.gpu_options.allow_growth = True 217 | tb_logger = tb_log.Logger(pm.SUMMARY_DIR) 218 | log_config(tb_logger) 219 | 220 | with tf.Session(config=config) as sess: 221 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 222 | if pm.VALUE_NET: 223 | value_net = network.ValueNetwork(sess, "value_net", pm.TRAINING_MODE, logger) 224 | logger.info("Create the policy network, with "+str(policy_net.get_num_weights())+" parameters") 225 | 226 | sess.run(tf.global_variables_initializer()) 227 | tb_logger.add_graph(sess.graph) 228 | tb_logger.flush() 229 | policy_tf_saver = tf.train.Saver(max_to_keep=pm.MAX_NUM_CHECKPOINTS, var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='policy_net')) 230 | if pm.POLICY_NN_MODEL is not None: 231 | policy_tf_saver.restore(sess, pm.POLICY_NN_MODEL) 232 | logger.info("Policy model "+pm.POLICY_NN_MODEL+" is restored.") 233 | 234 | if pm.VALUE_NET: 235 | value_tf_saver = tf.train.Saver(max_to_keep=pm.MAX_NUM_CHECKPOINTS, var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='value_net')) 236 | if pm.VALUE_NN_MODEL is not None: 237 | value_tf_saver.restore(sess, pm.VALUE_NN_MODEL) 238 | logger.info("Value model " + pm.VALUE_NN_MODEL + " is restored.") 239 | 240 | step = 1 241 | start_t = time.time() 242 | 243 | if pm.VAL_ON_MASTER: 244 | validation_traces = [] # validation traces 245 | tags_prefix = ["DRF: ", "SRTF: ", "FIFO: ", "Tetris: ", "Optimus: "] 246 | for i in range(pm.VAL_DATASET): 247 | validation_traces.append(trace.Trace(None).get_trace()) 248 | stats = comparison.compare(copy.deepcopy(validation_traces), logger) # deep copy to avoid changes to validation_traces 249 | if not pm.SKIP_FIRST_VAL: 250 | stats.append(test(policy_net, copy.deepcopy(validation_traces), logger, step=0, tb_logger=tb_logger)) 251 | tags_prefix.append("Init_NN: ") 252 | 253 | f = open(LOG_DIR + "baselines.txt", 'w') 254 | for i in range(len(stats)): 255 | jct, makespan, reward = stats[i] 256 | value = "JCT: " + str(jct) + " Makespan: " + str(makespan) + " Reward: " + str(reward) + "\n" 257 | f.write(value) 258 | tb_logger.add_text(tag=tags_prefix[i], value=value, step=step) 259 | f.close() 260 | tb_logger.flush() 261 | logger.info("Finish validation for heuristics and initialized NN.") 262 | 263 | updated_agents = [] # updated agents in async, will change each time after centeral agent get gradients 264 | for i in range(pm.NUM_AGENTS): 265 | updated_agents.append(i) 266 | 267 | while step <= pm.TOT_NUM_STEPS: 268 | # send updated parameters to agents 269 | policy_weights = policy_net.get_weights() 270 | if pm.VALUE_NET: 271 | value_weights = value_net.get_weights() 272 | for i in updated_agents: 273 | assert net_weights_qs[i].qsize() == 0 274 | net_weights_qs[i].put((policy_weights, value_weights)) 275 | else:# only put weights for the updated agents 276 | for i in updated_agents: 277 | assert net_weights_qs[i].qsize() == 0 278 | net_weights_qs[i].put(policy_weights) 279 | updated_agents[:] = [] 280 | # display speed 281 | if step % pm.DISP_INTERVAL == 0: 282 | elaps_t = time.time() - start_t 283 | speed = step / elaps_t 284 | logger.info("Central agent: Step " + str( 285 | step) + " Speed " + '%.3f' % speed + " batches/sec" + " Time " + '%.3f' % elaps_t + " seconds") 286 | 287 | 288 | # statistics 289 | if pm.TRAINING_MODE == "RL": 290 | policy_net.anneal_entropy_weight(step) 291 | tb_logger.add_scalar(tag="Entropy Weight", value=policy_net.entropy_weight, step=step) 292 | if pm.EPSILON_GREEDY: 293 | eps = 2 / (1 + np.exp(step / pm.ANNEALING_TEMPERATURE)) * 0.6 294 | tb_logger.add_scalar(tag="Epsilon Greedy", value=eps, step=step) 295 | 296 | collect_stats(stats_qs, tb_logger, step) 297 | if not pm.FIX_LEARNING_RATE: 298 | if step in pm.ADJUST_LR_STEPS: 299 | policy_net.lr /= 2 300 | if pm.VALUE_NET: 301 | value_net.lr /= 2 302 | logger.info("Learning rate is decreased to " + str(policy_net.lr) + " at step " + str(step)) 303 | if step < pm.STEP_TRAIN_CRITIC_NET: # set policy net lr to 0 to train critic net only 304 | policy_net.lr = 0.0 305 | 306 | if step % pm.DISP_INTERVAL == 0: 307 | tb_logger.add_scalar(tag="Learning rate", value=policy_net.lr, step=step) 308 | 309 | # save model 310 | if step % pm.CHECKPOINT_INTERVAL == 0: 311 | name_prefix = "" 312 | if pm.TRAINING_MODE == "SL": 313 | name_prefix += "sl_" 314 | else: 315 | name_prefix += "rl_" 316 | if pm.PS_WORKER: 317 | name_prefix += "ps_worker_" 318 | else: 319 | name_prefix += "worker_" 320 | 321 | model_name = pm.MODEL_DIR + "policy_" + name_prefix + str(step) + ".ckpt" 322 | path = policy_tf_saver.save(sess, model_name) 323 | logger.info("Policy model saved: " + path) 324 | if pm.VALUE_NET and pm.SAVE_VALUE_MODEL: 325 | model_name = pm.MODEL_DIR + "value_" + name_prefix + str(step) + ".ckpt" 326 | path = value_tf_saver.save(sess, model_name) 327 | logger.info("Value model saved: " + path) 328 | 329 | # validation 330 | if pm.VAL_ON_MASTER and step % pm.VAL_INTERVAL == 0: 331 | test(policy_net, copy.deepcopy(validation_traces), logger, step, tb_logger) 332 | 333 | # poll and update parameters 334 | # only calc gradients once one queue is not empty 335 | while True: 336 | for i in range(0, pm.NUM_AGENTS): 337 | if net_gradients_qs[i].qsize() == 1: 338 | updated_agents.append(i) 339 | if pm.VALUE_NET: 340 | policy_gradients, value_gradients = net_gradients_qs[i].get() 341 | value_net.apply_gradients(value_gradients) 342 | assert len(value_weights) == len(value_gradients) 343 | else: 344 | policy_gradients = net_gradients_qs[i].get() 345 | policy_net.apply_gradients(policy_gradients) 346 | assert len(policy_weights) == len(policy_gradients) 347 | if len(updated_agents) > 0: 348 | break 349 | # break when obtaining at least one agent's push 350 | # poll_ids = set([i for i in range(pm.NUM_AGENTS)]) 351 | # avg_policy_grads = [] 352 | # avg_value_grads = [] 353 | # while True: 354 | # for i in poll_ids.copy(): 355 | # try: 356 | # if pm.VALUE_NET: 357 | # policy_gradients, value_gradients = net_gradients_qs[i].get(False) 358 | # else: 359 | # policy_gradients = net_gradients_qs[i].get(False) 360 | # poll_ids.remove(i) 361 | # if len(avg_policy_grads) == 0: 362 | # avg_policy_grads = policy_gradients 363 | # else: 364 | # for j in range(len(avg_policy_grads)): 365 | # avg_policy_grads[j] += policy_gradients[j] 366 | # if pm.VALUE_NET: 367 | # if len(avg_value_grads) == 0: 368 | # avg_value_grads = value_gradients 369 | # else: 370 | # for j in range(len(avg_value_grads)): 371 | # avg_value_grads[j] += value_gradients[j] 372 | # except: 373 | # continue 374 | # if len(poll_ids) == 0: 375 | # break 376 | # for i in range(0, len(avg_policy_grads)): 377 | # avg_policy_grads[i] = avg_policy_grads[i] / pm.NUM_AGENTS 378 | # policy_net.apply_gradients(avg_policy_grads) 379 | # 380 | # if pm.VALUE_NET: 381 | # for i in range(0, len(avg_value_grads)): 382 | # avg_value_grads[i] = avg_value_grads[i] / pm.NUM_AGENTS 383 | # value_net.apply_gradients(avg_value_grads) 384 | 385 | # visualize gradients and weights 386 | if step % pm.VISUAL_GW_INTERVAL == 0 and pm.EXPERIMENT_NAME is None: 387 | assert len(policy_weights) == len(policy_gradients) 388 | for i in range(0,len(policy_weights),10): 389 | tb_logger.add_histogram(tag="Policy weights " + str(i), value=policy_weights[i], step=step) 390 | tb_logger.add_histogram(tag="Policy gradients " + str(i), value=policy_gradients[i], step=step) 391 | if pm.VALUE_NET: 392 | assert len(value_weights) == len(value_gradients) 393 | for i in range(0,len(value_weights),10): 394 | tb_logger.add_histogram(tag="Value weights " + str(i), value=value_weights[i], step=step) 395 | tb_logger.add_histogram(tag="Value gradients " + str(i), value=value_gradients[i], step=step) 396 | 397 | step += 1 398 | 399 | logger.info("Training ends...") 400 | if pm.VALUE_NET: 401 | for i in range(pm.NUM_AGENTS): 402 | net_weights_qs[i].put(("exit", "exit")) 403 | else: 404 | for i in range(pm.NUM_AGENTS): 405 | net_weights_qs[i].put("exit") 406 | # os.system("sudo pkill -9 python") 407 | exit(0) 408 | 409 | 410 | def sl_agent(net_weights_q, net_gradients_q, stats_q, id): 411 | logger = log.getLogger(name="agent_"+str(id), level=pm.LOG_MODE) 412 | logger.info("Start supervised learning, agent " + str(id) + " ...") 413 | 414 | if not pm.RANDOMNESS: 415 | np.random.seed(pm.np_seed+id+1) 416 | 417 | config = tf.ConfigProto() 418 | config.gpu_options.allow_growth = True 419 | with tf.Session(config=config) as sess, tf.device("/gpu:"+str(id%2)): 420 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 421 | sess.run(tf.global_variables_initializer()) # to avoid batch normalization error 422 | 423 | global_step = 1 424 | avg_jct = [] 425 | avg_makespan = [] 426 | avg_reward = [] 427 | if not pm.VAL_ON_MASTER: 428 | validation_traces = [] # validation traces 429 | for i in range(pm.VAL_DATASET): 430 | validation_traces.append(trace.Trace(None).get_trace()) 431 | # generate training traces 432 | traces = [] 433 | for episode in range(pm.TRAIN_EPOCH_SIZE): 434 | job_trace = trace.Trace(None).get_trace() 435 | traces.append(job_trace) 436 | mem_store = memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 437 | logger.info("Filling experience buffer...") 438 | for epoch in range(pm.TOT_TRAIN_EPOCHS): 439 | for episode in range(pm.TRAIN_EPOCH_SIZE): 440 | tic = time.time() 441 | job_trace = copy.deepcopy(traces[episode]) 442 | if pm.HEURISTIC == "DRF": 443 | env = drf_env.DRF_Env("DRF", job_trace, logger) 444 | elif pm.HEURISTIC == "FIFO": 445 | env = fifo_env.FIFO_Env("FIFO", job_trace, logger) 446 | elif pm.HEURISTIC == "SRTF": 447 | env = srtf_env.SRTF_Env("SRTF", job_trace, logger) 448 | elif pm.HEURISTIC == "Tetris": 449 | env = tetris_env.Tetris_Env("Tetris", job_trace, logger) 450 | 451 | while not env.end: 452 | if pm.LOG_MODE == "DEBUG": 453 | time.sleep(0.01) 454 | data = env.step() 455 | logger.debug("ts length:" + str(len(data))) 456 | 457 | for (input, label) in data: 458 | mem_store.store(input, 0, label, 0) 459 | 460 | if mem_store.full(): 461 | # prepare a training batch 462 | _, trajectories, _ = mem_store.sample(pm.MINI_BATCH_SIZE) 463 | input_batch = [traj.state for traj in trajectories] 464 | label_batch = [traj.action for traj in trajectories] 465 | 466 | # if global_step % 10 == 0: 467 | # print "input", input_batch[0] 468 | # print "label", label_batch[0] 469 | 470 | # pull latest weights before training 471 | weights = net_weights_q.get() 472 | if isinstance(weights, basestring) and weights == "exit": 473 | logger.info("Agent " + str(id) + " exits.") 474 | exit(0) 475 | policy_net.set_weights(weights) 476 | 477 | # superversed learning to calculate gradients 478 | entropy, loss, policy_grads = policy_net.get_sl_gradients(np.stack(input_batch),np.vstack(label_batch)) 479 | for i in range(len(policy_grads)): 480 | assert np.any(np.isnan(policy_grads[i])) == False 481 | 482 | # send gradients to the central agent 483 | net_gradients_q.put(policy_grads) 484 | 485 | # validation 486 | if not pm.VAL_ON_MASTER and global_step % pm.VAL_INTERVAL == 0: 487 | val_tic = time.time() 488 | val_loss = validate.val_loss(policy_net, validation_traces, logger, global_step) 489 | jct, makespan, reward = validate.val_jmr(policy_net, validation_traces, logger, global_step) 490 | stats_q.put(("val", val_loss, jct, makespan, reward)) 491 | val_toc = time.time() 492 | logger.info("Agent " + str(id) + " Validation at step " + str(global_step) + " Time: " + '%.3f'%(val_toc-val_tic)) 493 | stats_q.put(("step:sl", entropy, loss)) 494 | 495 | global_step += 1 496 | 497 | num_jobs, jct, makespan, reward = env.get_results() 498 | avg_jct.append(jct) 499 | avg_makespan.append(makespan) 500 | avg_reward.append(reward) 501 | if global_step%pm.DISP_INTERVAL == 0: 502 | logger.info("Agent\t AVG JCT\t Makespan\t Reward") 503 | logger.info(str(id) + " \t \t " + '%.3f' %(sum(avg_jct)/len(avg_jct)) + " \t\t" + " " + '%.3f' %(1.0*sum(avg_makespan)/len(avg_makespan)) \ 504 | + " \t" + " " + '%.3f' %(sum(avg_reward)/len(avg_reward))) 505 | 506 | 507 | 508 | def rl_agent(net_weights_q, net_gradients_q, stats_q, id): 509 | logger = log.getLogger(name="agent_"+str(id), level=pm.LOG_MODE,mode="w",fh=True,ch=True,prefix="Agent " +str(id)) 510 | logger.info("Start reinforcement learning, agent " + str(id) + " ...") 511 | 512 | if not pm.RANDOMNESS: 513 | np.random.seed(pm.np_seed+id+1) 514 | 515 | config = tf.ConfigProto() 516 | config.gpu_options.allow_growth = True 517 | with tf.Session(config=config) as sess, tf.device("/gpu:"+str(id%2)): 518 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 519 | if pm.VALUE_NET: 520 | value_net = network.ValueNetwork(sess, "value_net", pm.TRAINING_MODE, logger) 521 | sess.run(tf.global_variables_initializer()) # to avoid batch normalization error 522 | if pm.VALUE_NET: 523 | policy_weights, value_weights = net_weights_q.get() 524 | value_net.set_weights(value_weights) 525 | else: 526 | policy_weights = net_weights_q.get() 527 | policy_net.set_weights(policy_weights) # initialization from master 528 | first_time = True 529 | 530 | global_step = 1 531 | if not pm.VAL_ON_MASTER: 532 | validation_traces = [] 533 | for i in range(pm.VAL_DATASET): 534 | validation_traces.append(trace.Trace(None).get_trace()) 535 | if pm.PRIORITY_REPLAY: 536 | mem_store = prioritized_memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 537 | else: 538 | mem_store = memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 539 | logger.info("Filling experience buffer...") 540 | 541 | # generate training data 542 | traces = [] 543 | for episode in range(pm.TRAIN_EPOCH_SIZE): 544 | job_trace = trace.Trace(None).get_trace() 545 | traces.append(job_trace) 546 | 547 | if pm.EPSILON_GREEDY: 548 | if pm.VARYING_EPSILON: 549 | temperature = pm.ANNEALING_TEMPERATURE * (1 + float(id)/pm.NUM_AGENTS) 550 | else: 551 | temperature = pm.ANNEALING_TEMPERATURE 552 | 553 | for epoch in range(pm.TOT_TRAIN_EPOCHS): 554 | for episode in range(pm.TRAIN_EPOCH_SIZE): 555 | tic = time.time() 556 | env = rl_env.RL_Env("RL", copy.deepcopy(traces[episode]), logger) 557 | 558 | states = [] 559 | masked_outputs = [] 560 | actions = [] 561 | rewards = [] 562 | ts = 0 563 | while not env.end: 564 | if pm.LOG_MODE == "DEBUG": 565 | time.sleep(0.01) 566 | state = env.observe() 567 | output = policy_net.predict(np.reshape(state, (1, pm.STATE_DIM[0], pm.STATE_DIM[1]))) 568 | if pm.EPSILON_GREEDY: # greedy epsilon 569 | env.epsilon = 2 / (1 + np.exp(global_step / temperature)) 570 | masked_output, action, reward, move_on, valid_state = env.step(output) 571 | 572 | if valid_state: # do not save state when move on except skip_ts, but need to save reward!!! 573 | states.append(state) 574 | masked_outputs.append(masked_output) 575 | actions.append(action) 576 | rewards.append(reward) 577 | if move_on: 578 | ts += 1 579 | # ts_reward = reward 580 | if ts%pm.LT_REWARD_NUM_TS == 0 and len(states) > 0: # states can be [] due to no jobs in the ts 581 | # lt_reward = sum(rewards) 582 | # ts_rewards = [0 for _ in range(pm.LT_REWARD_NUM_TS)] 583 | # ts_rewards[-1] = lt_reward 584 | # for i in reversed(range(0, len(ts_rewards) - 1)): 585 | # ts_rewards[i] += ts_rewards[i + 1] * pm.DISCOUNT_FACTOR 586 | 587 | if pm.LT_REWARD_IN_TS: 588 | for i in reversed(range(0,len(rewards)-1)): 589 | rewards[i] += rewards[i+1]*pm.DISCOUNT_FACTOR 590 | elif pm.TS_REWARD_PLUS_JOB_REWARD: 591 | rewards = env.get_job_reward() 592 | assert len(rewards) == len(states) 593 | else: 594 | rewards = [reward for _ in range(len(states))] 595 | 596 | # randomly fill samples to memory 597 | if pm.RANDOM_FILL_MEMORY: 598 | indexes = np.random.choice(len(states), size=pm.MINI_BATCH_SIZE, replace=False) 599 | for i in indexes: 600 | mem_store.store(states[i], masked_outputs[i], actions[i], rewards[i]) 601 | else: 602 | for i in range(len(states)): 603 | mem_store.store(states[i], masked_outputs[i], actions[i], rewards[i]) 604 | 605 | if mem_store.full() and ts%pm.NUM_TS_PER_UPDATE == 0: 606 | # print "start training RL" 607 | # prepare a training batch 608 | mem_indexes, trajectories, IS_weights = mem_store.sample(pm.MINI_BATCH_SIZE) 609 | states_batch = [traj.state for traj in trajectories] 610 | outputs_batch = [traj.output for traj in trajectories] 611 | actions_batch = [traj.action for traj in trajectories] 612 | rewards_batch = [traj.reward for traj in trajectories] 613 | 614 | # pull latest weights before training 615 | if not first_time: # avoid pulling twice at the first update 616 | if pm.VALUE_NET: 617 | policy_weights, value_weights = net_weights_q.get() 618 | if isinstance(policy_weights, basestring) and policy_weights == "exit": 619 | logger.info("Agent " + str(id) + " exits.") 620 | exit(0) 621 | policy_net.set_weights(policy_weights) 622 | value_net.set_weights(value_weights) 623 | else: 624 | policy_weights = net_weights_q.get() 625 | if isinstance(policy_weights, basestring) and policy_weights == "exit": 626 | logger.info("Agent " + str(id) + " exits.") 627 | exit(0) 628 | policy_net.set_weights(policy_weights) 629 | else: 630 | first_time = False 631 | 632 | # set entropy weight, both agent and central agent need to be set 633 | policy_net.anneal_entropy_weight(global_step) 634 | 635 | # reinforcement learning to calculate gradients 636 | if pm.VALUE_NET: 637 | value_output = value_net.predict(np.stack(states_batch)) 638 | td_loss = np.vstack(rewards_batch) - value_output 639 | adjusted_td_loss = td_loss * np.vstack(IS_weights) 640 | policy_entropy, policy_loss, policy_grads = policy_net.get_rl_gradients(np.stack(states_batch), \ 641 | np.vstack(outputs_batch), np.vstack(actions_batch), adjusted_td_loss) 642 | value_loss, value_grads = value_net.get_rl_gradients(np.stack(states_batch), value_output, np.vstack(rewards_batch)) 643 | else: 644 | if pm.PRIORITY_MEMORY_SORT_REWARD and pm.MEAN_REWARD_BASELINE: 645 | td_loss = np.vstack(rewards_batch) - mem_store.avg_reward() 646 | else: 647 | td_loss = np.vstack(rewards_batch) - 0 648 | adjusted_td_loss = td_loss * np.vstack(IS_weights) 649 | policy_entropy, policy_loss, policy_grads = policy_net.get_rl_gradients(np.stack(states_batch), np.vstack(outputs_batch), np.vstack(actions_batch), adjusted_td_loss) 650 | 651 | for aa in range(len(actions_batch)): 652 | if actions_batch[aa][-1] == 1: 653 | # print "rewards:", rewards_batch[aa], "td_loss:", td_loss[aa] 654 | logger.debug("rewards:" + str(rewards_batch[aa]) + "td_loss:" + str(td_loss[aa])) 655 | 656 | for i in range(len(policy_grads)): 657 | try: 658 | assert np.any(np.isnan(policy_grads[i])) == False 659 | # print np.mean(np.abs(policy_grads[i])) # 10^-5 to 10^-2 660 | except Exception as e: 661 | logger.error("Error: " + str(e)) 662 | logger.error("Gradients: " + str(policy_grads[i])) 663 | logger.error("Input type: " + str(states_batch[:,0])) 664 | logger.error("Masked Output: " + str(outputs_batch)) 665 | logger.error("Action: " + str(actions_batch)) 666 | logger.error("TD Loss: " + str(td_loss)) 667 | logger.error("Policy Loss: " + str(policy_loss)) 668 | logger.error("Policy Entropy: " + str(policy_entropy)) 669 | exit(1) # another option is to continue 670 | if pm.VALUE_NET: 671 | for i in range(len(value_grads)): 672 | try: 673 | assert np.any(np.isnan(value_grads[i])) == False 674 | except Exception as e: 675 | logger.error("Error: " + str(e) + " " + str(policy_grads[i])) 676 | exit(1) 677 | 678 | # send gradients to the central agent 679 | if pm.VALUE_NET: 680 | net_gradients_q.put((policy_grads, value_grads)) 681 | else: 682 | net_gradients_q.put(policy_grads) 683 | if pm.PRIORITY_REPLAY: 684 | mem_store.update(mem_indexes, abs(td_loss)) 685 | # validation 686 | if not pm.VAL_ON_MASTER and global_step % pm.VAL_INTERVAL == 0: 687 | val_loss = validate.val_loss(policy_net, validation_traces, logger, global_step) 688 | jct, makespan, reward = validate.val_jmr(policy_net, validation_traces, logger, 689 | global_step) 690 | stats_q.put(("val", val_loss, jct, makespan, reward)) 691 | 692 | # statistics 693 | if pm.VALUE_NET: 694 | stats_q.put(("step:policy+value", policy_entropy, policy_loss, value_loss, sum(td_loss)/len(td_loss), sum(rewards_batch)/len(rewards_batch), output)) 695 | else: 696 | stats_q.put(("step:policy", policy_entropy, policy_loss, sum(td_loss)/len(td_loss), sum(rewards_batch)/len(rewards_batch), output)) 697 | global_step += 1 698 | 699 | # clear 700 | states = [] 701 | masked_outputs = [] 702 | actions = [] 703 | rewards = [] 704 | 705 | # collect statistics after training one trace 706 | num_jobs, jct, makespan, reward = env.get_results() 707 | stats_q.put(("trace:sched_result", jct, makespan, reward)) 708 | if (epoch*pm.TRAIN_EPOCH_SIZE+episode)%pm.DISP_INTERVAL == 0: 709 | if (epoch*pm.TRAIN_EPOCH_SIZE+episode)%50 == 0: 710 | stats_q.put(("trace:job_stats", episode, env.get_jobstats())) 711 | toc = time.time() 712 | logger.info("--------------------------------------------------------------") 713 | logger.info("Agent " + str(id) + " Epoch " + str(epoch) + " Trace " + str(episode) + " Step " + str(global_step)) 714 | logger.info("# of Jobs\t AVG JCT\t Makespan\t Reward\t Time") 715 | logger.info(str(num_jobs) + " \t" + " \t" + " " + '%.3f' %jct + " \t\t" + " " + '%.3f' %makespan \ 716 | + "\t\t" + " " + '%.3f' %reward + "\t" + " " + '%.3f' % (toc - tic)) 717 | 718 | 719 | def main(): 720 | os.system("rm -f *.log") 721 | os.system("sudo pkill -9 tensorboard; sleep 3") 722 | 723 | net_weights_qs = [multiprocessing.Queue(1) for i in range(pm.NUM_AGENTS)] 724 | net_gradients_qs = [multiprocessing.Queue(1) for i in range(pm.NUM_AGENTS)] 725 | stats_qs = [multiprocessing.Queue() for i in range(pm.NUM_AGENTS)] 726 | 727 | os.system("mkdir -p " + pm.MODEL_DIR + "; mkdir -p " + pm.SUMMARY_DIR) 728 | if pm.EXPERIMENT_NAME is None: 729 | cmd = "cd " + pm.SUMMARY_DIR + " && rm -rf *; tensorboard --logdir=./" 730 | board = multiprocessing.Process(target=lambda: os.system(cmd), args=()) 731 | board.start() 732 | time.sleep(3) # let tensorboard start first since it will clear the dir 733 | 734 | # central_agent(net_weights_qs, net_gradients_qs, stats_qs) 735 | master = multiprocessing.Process(target=central_agent, args=(net_weights_qs, net_gradients_qs, stats_qs,)) 736 | master.start() 737 | #agent(net_weights_qs[0], net_gradients_qs[0], stats_qs[0], 0) 738 | #exit() 739 | 740 | if pm.TRAINING_MODE == "SL": 741 | agents = [multiprocessing.Process(target=sl_agent, args=(net_weights_qs[i], net_gradients_qs[i], stats_qs[i],i,)) for i in range(pm.NUM_AGENTS)] 742 | elif pm.TRAINING_MODE == "RL": 743 | agents = [multiprocessing.Process(target=rl_agent, args=(net_weights_qs[i], net_gradients_qs[i], stats_qs[i], i,)) for i in range(pm.NUM_AGENTS)] 744 | for i in range(pm.NUM_AGENTS): 745 | agents[i].start() 746 | 747 | master.join() 748 | 749 | 750 | if __name__ == "__main__": 751 | main() -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import multiprocessing 4 | import tensorflow as tf 5 | import os 6 | import parameters as pm 7 | import trace 8 | import network 9 | import drf_env 10 | import fifo_env 11 | import srtf_env 12 | import tetris_env 13 | import optimus_env 14 | import rl_env 15 | import log 16 | import validate 17 | import collections 18 | import memory 19 | import prioritized_memory 20 | import tb_log 21 | import copy 22 | import comparison 23 | 24 | 25 | def collect_stats(stats_qs, tb_logger, step): 26 | policy_entropys = [] 27 | policy_losses = [] 28 | value_losses = [] 29 | td_losses = [] 30 | step_rewards = [] 31 | jcts = [] 32 | makespans = [] 33 | rewards = [] 34 | val_losses = [] 35 | val_jcts = [] 36 | val_makespans = [] 37 | val_rewards = [] 38 | for id in range(pm.NUM_AGENTS): 39 | while not stats_qs[id].empty(): 40 | stats = stats_qs[id].get() 41 | tag_prefix = "SAgent " + str(id) + " " 42 | if stats[0] == "step:sl": 43 | _, entropy, loss = stats 44 | policy_entropys.append(entropy) 45 | policy_losses.append(loss) 46 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 47 | tb_logger.add_scalar(tag=tag_prefix+"SL Loss", value=loss, step=step) 48 | tb_logger.add_scalar(tag=tag_prefix + "SL Entropy", value=entropy, step=step) 49 | elif stats[0] == "val": 50 | _, val_loss, jct, makespan, reward = stats 51 | val_losses.append(val_loss) 52 | val_jcts.append(jct) 53 | val_makespans.append(makespan) 54 | val_rewards.append(reward) 55 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 56 | tb_logger.add_scalar(tag=tag_prefix+"Val Loss", value=val_loss, step=step) 57 | tb_logger.add_scalar(tag=tag_prefix+"Val JCT", value=jct, step=step) 58 | tb_logger.add_scalar(tag=tag_prefix+"Val Makespan", value=makespan, step=step) 59 | tb_logger.add_scalar(tag=tag_prefix+"Val Reward", value=reward, step=step) 60 | elif stats[0] == "step:policy": 61 | _, entropy, loss, td_loss, step_reward, output = stats 62 | policy_entropys.append(entropy) 63 | policy_losses.append(loss) 64 | td_losses.append(td_loss) 65 | step_rewards.append(step_reward) 66 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 67 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=entropy, step=step) 68 | tb_logger.add_scalar(tag=tag_prefix+"Policy Loss", value=loss, step=step) 69 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss", value=td_loss, step=step) 70 | tb_logger.add_scalar(tag=tag_prefix+"Step Reward", value=step_reward, step=step) 71 | tb_logger.add_histogram(tag=tag_prefix+"Output", value=output, step=step) 72 | elif stats[0] == "step:policy+value": 73 | _, entropy, policy_loss, value_loss, td_loss, step_reward, output = stats 74 | policy_entropys.append(entropy) 75 | policy_losses.append(policy_loss) 76 | value_losses.append(value_loss) 77 | td_losses.append(td_loss) 78 | step_rewards.append(step_reward) 79 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 80 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=entropy, step=step) 81 | tb_logger.add_scalar(tag=tag_prefix+"Policy Loss", value=policy_loss, step=step) 82 | tb_logger.add_scalar(tag=tag_prefix + "Value Loss", value=value_loss, step=step) 83 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss", value=td_loss, step=step) 84 | tb_logger.add_scalar(tag=tag_prefix + "Step Reward", value=step_reward, step=step) 85 | tb_logger.add_histogram(tag=tag_prefix + "Output", value=output, step=step) 86 | elif stats[0] == "trace:sched_result": 87 | _, jct, makespan, reward = stats 88 | jcts.append(jct) 89 | makespans.append(makespan) 90 | rewards.append(reward) 91 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 92 | tb_logger.add_scalar(tag=tag_prefix + "Avg JCT", value=jct, step=step) 93 | tb_logger.add_scalar(tag=tag_prefix + "Makespan", value=makespan, step=step) 94 | tb_logger.add_scalar(tag=tag_prefix + "Reward", value=reward, step=step) 95 | elif stats[0] == "trace:job_stats": 96 | _, episode, jobstats = stats 97 | if id < pm.NUM_RECORD_AGENTS and pm.EXPERIMENT_NAME is None: 98 | job_stats_tag_prefix = tag_prefix + "Trace " + str(episode) + " Step " + str(step) + " " 99 | for i in range(len(jobstats["arrival"])): 100 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Arrival", value=jobstats["arrival"][i], step=i) 101 | for i in range(len(jobstats["ts_completed"])): 102 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Ts_completed", value=jobstats["ts_completed"][i], step=i) 103 | for i in range(len(jobstats["tot_completed"])): 104 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Tot_completed", value=jobstats["tot_completed"][i], step=i) 105 | for i in range(len(jobstats["uncompleted"])): 106 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Uncompleted", value=jobstats["uncompleted"][i], step=i) 107 | for i in range(len(jobstats["running"])): 108 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Running", value=jobstats["running"][i], step=i) 109 | for i in range(len(jobstats["total"])): 110 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Total jobs", value=jobstats["total"][i], step=i) 111 | for i in range(len(jobstats["backlog"])): 112 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "Backlog", value=jobstats["backlog"][i], step=i) 113 | for i in range(len(jobstats["cpu_util"])): 114 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "CPU_Util", value=jobstats["cpu_util"][i], step=i) 115 | for i in range(len(jobstats["gpu_util"])): 116 | tb_logger.add_scalar(tag=job_stats_tag_prefix + "GPU_Util", value=jobstats["gpu_util"][i], step=i) 117 | tb_logger.add_histogram(tag=job_stats_tag_prefix + "JCT", value=jobstats["duration"], step=step) 118 | 119 | tag_prefix = "Central " 120 | if len(policy_entropys) > 0: 121 | tb_logger.add_scalar(tag=tag_prefix + "Policy Entropy", value=sum(policy_entropys) / len(policy_entropys), step=step) 122 | if len(policy_losses) > 0: 123 | tb_logger.add_scalar(tag=tag_prefix + "Policy Loss", value=sum(policy_losses) / len(policy_losses), step=step) 124 | if len(value_losses) > 0: 125 | tb_logger.add_scalar(tag=tag_prefix + "Value Loss", value=sum(value_losses) / len(value_losses), step=step) 126 | if len(td_losses) > 0: 127 | tb_logger.add_scalar(tag=tag_prefix + "TD Loss / Advantage", value=sum(td_losses) / len(td_losses), step=step) 128 | if len(step_rewards) > 0: 129 | tb_logger.add_scalar(tag=tag_prefix + "Batch Reward", value=sum(step_rewards) / len(step_rewards), step=step) 130 | if len(jcts) > 0: 131 | tb_logger.add_scalar(tag=tag_prefix + "JCT", value=sum(jcts) / len(jcts), step=step) 132 | # log results 133 | if pm.TRAINING_MODE == "SL": 134 | f = open(LOG_DIR + "sl_train_jct.txt", 'a') 135 | else: 136 | f = open(LOG_DIR + "rl_train_jct.txt", 'a') 137 | f.write("step " + str(step) + ": " + str(sum(jcts) / len(jcts)) + "\n") 138 | f.close() 139 | if len(makespans) > 0: 140 | tb_logger.add_scalar(tag=tag_prefix + "Makespan", value=sum(makespans) / len(makespans), step=step) 141 | # log results 142 | if pm.TRAINING_MODE == "SL": 143 | f = open(LOG_DIR + "sl_train_makespan.txt", 'a') 144 | else: 145 | f = open(LOG_DIR + "rl_train_makespan.txt", 'a') 146 | f.write("step " + str(step) + ": " + str(sum(makespans) / len(makespans)) + "\n") 147 | f.close() 148 | if len(rewards) > 0: 149 | tb_logger.add_scalar(tag=tag_prefix + "Reward", value=sum(rewards) / len(rewards), step=step) 150 | if len(val_losses) > 0: 151 | tb_logger.add_scalar(tag=tag_prefix + "Val Loss", value=sum(val_losses) / len(val_losses), step=step) 152 | if len(val_jcts) > 0: 153 | tb_logger.add_scalar(tag=tag_prefix + "Val JCT", value=sum(val_jcts) / len(val_jcts), step=step) 154 | if len(val_makespans) > 0: 155 | tb_logger.add_scalar(tag=tag_prefix + "Val Makespan", value=sum(val_makespans) / len(val_makespans), step=step) 156 | if len(val_rewards) > 0: 157 | tb_logger.add_scalar(tag=tag_prefix + "Val Reward", value=sum(val_rewards) / len(val_rewards), step=step) 158 | tb_logger.flush() 159 | 160 | 161 | def test(policy_net, validation_traces, logger, step, tb_logger): 162 | val_tic = time.time() 163 | tag_prefix = "Central " 164 | try: 165 | if pm.TRAINING_MODE == "SL": 166 | val_loss = validate.val_loss(policy_net, copy.deepcopy(validation_traces), logger, step) 167 | tb_logger.add_scalar(tag=tag_prefix + "Val Loss", value=val_loss, step=step) 168 | jct, makespan, reward = validate.val_jmr(policy_net, copy.deepcopy(validation_traces), logger, step, tb_logger) 169 | tb_logger.add_scalar(tag=tag_prefix + "Val JCT", value=jct, step=step) 170 | tb_logger.add_scalar(tag=tag_prefix + "Val Makespan", value=makespan, step=step) 171 | tb_logger.add_scalar(tag=tag_prefix + "Val Reward", value=reward, step=step) 172 | tb_logger.flush() 173 | val_toc = time.time() 174 | logger.info("Central Agent:" + " Validation at step " + str(step) + " Time: " + '%.3f' % (val_toc - val_tic)) 175 | 176 | # log results 177 | if pm.TRAINING_MODE == "SL": 178 | f = open(LOG_DIR + "sl_validation.txt", 'a') 179 | else: 180 | f = open(LOG_DIR + "rl_validation.txt", 'a') 181 | f.write("step " + str(step) + ": " + str(jct) + " " + str(makespan) + " " + str(reward) + "\n") 182 | f.close() 183 | 184 | return (jct, makespan, reward) 185 | except Exception as e: 186 | logger.error("Error when validation! " + str(e)) 187 | tb_logger.add_text(tag="validation error", value=str(e), step=step) 188 | 189 | 190 | def log_config(tb_logger): 191 | # log all configurations in parameters and backup py 192 | global LOG_DIR 193 | if pm.EXPERIMENT_NAME is None: 194 | LOG_DIR = "./backup/" 195 | else: 196 | LOG_DIR = "./" + pm.EXPERIMENT_NAME + "/" 197 | 198 | # os.system("rm -rf " + LOG_DIR) 199 | os.system("mkdir -p " + LOG_DIR + "; cp *.py *.txt " + LOG_DIR) 200 | 201 | pm_md = globals().get('pm', None) 202 | train_config = dict() 203 | if pm_md: 204 | train_config = {key: value for key, value in pm_md.__dict__.iteritems() if not (key.startswith('__') or key.startswith('_'))} 205 | train_config_str = "" 206 | for key, value in train_config.iteritems(): 207 | train_config_str += "{:<30}{:<100}".format(key, value) + "\n\n" 208 | 209 | tb_logger.add_text(tag="Config", value=train_config_str, step=0) 210 | tb_logger.flush() 211 | 212 | if pm.TRAINING_MODE == "SL": 213 | f = open(pm.MODEL_DIR + "sl_model.config", "w") 214 | else: 215 | f = open(pm.MODEL_DIR + "rl_model.config", "w") 216 | f.write(train_config_str) 217 | f.close() 218 | 219 | f = open(LOG_DIR + "config.md", 'w') 220 | f.write(train_config_str) 221 | f.close() 222 | 223 | 224 | def central_agent(net_weights_qs, net_gradients_qs, stats_qs): 225 | logger = log.getLogger(name="central_agent", level=pm.LOG_MODE) 226 | logger.info("Start central agent...") 227 | 228 | if not pm.RANDOMNESS: 229 | np.random.seed(pm.np_seed) 230 | tf.set_random_seed(pm.tf_seed) 231 | 232 | config = tf.ConfigProto() 233 | config.allow_soft_placement=False 234 | config.gpu_options.allow_growth = True 235 | tb_logger = tb_log.Logger(pm.SUMMARY_DIR) 236 | log_config(tb_logger) 237 | 238 | with tf.Session(config=config) as sess: 239 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 240 | if pm.VALUE_NET: 241 | value_net = network.ValueNetwork(sess, "value_net", pm.TRAINING_MODE, logger) 242 | logger.info("Create the policy network, with "+str(policy_net.get_num_weights())+" parameters") 243 | 244 | sess.run(tf.global_variables_initializer()) 245 | tb_logger.add_graph(sess.graph) 246 | tb_logger.flush() 247 | policy_tf_saver = tf.train.Saver(max_to_keep=pm.MAX_NUM_CHECKPOINTS, var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='policy_net')) 248 | if pm.POLICY_NN_MODEL is not None: 249 | policy_tf_saver.restore(sess, pm.POLICY_NN_MODEL) 250 | logger.info("Policy model "+pm.POLICY_NN_MODEL+" is restored.") 251 | 252 | if pm.VALUE_NET: 253 | value_tf_saver = tf.train.Saver(max_to_keep=pm.MAX_NUM_CHECKPOINTS, var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='value_net')) 254 | if pm.VALUE_NN_MODEL is not None: 255 | value_tf_saver.restore(sess, pm.VALUE_NN_MODEL) 256 | logger.info("Value model " + pm.VALUE_NN_MODEL + " is restored.") 257 | 258 | step = 1 259 | start_t = time.time() 260 | 261 | if pm.VAL_ON_MASTER: 262 | validation_traces = [] # validation traces 263 | tags_prefix = ["DRF: ", "SRTF: ", "FIFO: ", "Tetris: ", "Optimus: "] 264 | for i in range(pm.VAL_DATASET): 265 | validation_traces.append(trace.Trace(None).get_trace()) 266 | stats = comparison.compare(copy.deepcopy(validation_traces), logger) # deep copy to avoid changes to validation_traces 267 | if not pm.SKIP_FIRST_VAL: 268 | stats.append(test(policy_net, copy.deepcopy(validation_traces), logger, step=0, tb_logger=tb_logger)) 269 | tags_prefix.append("Init_NN: ") 270 | 271 | f = open(LOG_DIR + "baselines.txt", 'w') 272 | for i in range(len(stats)): 273 | jct, makespan, reward = stats[i] 274 | value = tags_prefix[i] + " JCT: " + str(jct) + " Makespan: " + str(makespan) + " Reward: " + str(reward) + "\n" 275 | f.write(value) 276 | tb_logger.add_text(tag=tags_prefix[i], value=value, step=step) 277 | f.close() 278 | tb_logger.flush() 279 | logger.info("Finish validation for heuristics and initialized NN.") 280 | 281 | while step <= pm.TOT_NUM_STEPS: 282 | # send updated parameters to agents 283 | policy_weights = policy_net.get_weights() 284 | if pm.VALUE_NET: 285 | value_weights = value_net.get_weights() 286 | for i in range(pm.NUM_AGENTS): 287 | net_weights_qs[i].put((policy_weights, value_weights)) 288 | else: 289 | for i in range(pm.NUM_AGENTS): 290 | net_weights_qs[i].put(policy_weights) 291 | 292 | # display speed 293 | if step % pm.DISP_INTERVAL == 0: 294 | elaps_t = time.time() - start_t 295 | speed = step / elaps_t 296 | logger.info("Central agent: Step " + str( 297 | step) + " Speed " + '%.3f' % speed + " batches/sec" + " Time " + '%.3f' % elaps_t + " seconds") 298 | 299 | 300 | # statistics 301 | if pm.TRAINING_MODE == "RL": 302 | policy_net.anneal_entropy_weight(step) 303 | tb_logger.add_scalar(tag="Entropy Weight", value=policy_net.entropy_weight, step=step) 304 | if pm.EPSILON_GREEDY: 305 | eps = 2 / (1 + np.exp(step / pm.ANNEALING_TEMPERATURE)) * 0.6 306 | tb_logger.add_scalar(tag="Epsilon Greedy", value=eps, step=step) 307 | 308 | collect_stats(stats_qs, tb_logger, step) 309 | if not pm.FIX_LEARNING_RATE: 310 | if step in pm.ADJUST_LR_STEPS: 311 | policy_net.lr /= 2 312 | if pm.VALUE_NET: 313 | value_net.lr /= 2 314 | logger.info("Learning rate is decreased to " + str(policy_net.lr) + " at step " + str(step)) 315 | if step < pm.STEP_TRAIN_CRITIC_NET: # set policy net lr to 0 to train critic net only 316 | policy_net.lr = 0.0 317 | 318 | if step % pm.DISP_INTERVAL == 0: 319 | tb_logger.add_scalar(tag="Learning rate", value=policy_net.lr, step=step) 320 | 321 | # save model 322 | if step % pm.CHECKPOINT_INTERVAL == 0: 323 | name_prefix = "" 324 | if pm.TRAINING_MODE == "SL": 325 | name_prefix += "sl_" 326 | else: 327 | name_prefix += "rl_" 328 | if pm.PS_WORKER: 329 | name_prefix += "ps_worker_" 330 | else: 331 | name_prefix += "worker_" 332 | 333 | model_name = pm.MODEL_DIR + "policy_" + name_prefix + str(step) + ".ckpt" 334 | path = policy_tf_saver.save(sess, model_name) 335 | logger.info("Policy model saved: " + path) 336 | if pm.VALUE_NET and pm.SAVE_VALUE_MODEL: 337 | model_name = pm.MODEL_DIR + "value_" + name_prefix + str(step) + ".ckpt" 338 | path = value_tf_saver.save(sess, model_name) 339 | logger.info("Value model saved: " + path) 340 | 341 | # validation 342 | if pm.VAL_ON_MASTER and step % pm.VAL_INTERVAL == 0: 343 | test(policy_net, copy.deepcopy(validation_traces), logger, step, tb_logger) 344 | 345 | # poll and update parameters 346 | poll_ids = set([i for i in range(pm.NUM_AGENTS)]) 347 | avg_policy_grads = [] 348 | avg_value_grads = [] 349 | while True: 350 | for i in poll_ids.copy(): 351 | try: 352 | if pm.VALUE_NET: 353 | policy_gradients, value_gradients = net_gradients_qs[i].get(False) 354 | else: 355 | policy_gradients = net_gradients_qs[i].get(False) 356 | poll_ids.remove(i) 357 | if len(avg_policy_grads) == 0: 358 | avg_policy_grads = policy_gradients 359 | else: 360 | for j in range(len(avg_policy_grads)): 361 | avg_policy_grads[j] += policy_gradients[j] 362 | if pm.VALUE_NET: 363 | if len(avg_value_grads) == 0: 364 | avg_value_grads = value_gradients 365 | else: 366 | for j in range(len(avg_value_grads)): 367 | avg_value_grads[j] += value_gradients[j] 368 | except: 369 | continue 370 | if len(poll_ids) == 0: 371 | break 372 | for i in range(0, len(avg_policy_grads)): 373 | avg_policy_grads[i] = avg_policy_grads[i] / pm.NUM_AGENTS 374 | policy_net.apply_gradients(avg_policy_grads) 375 | 376 | if pm.VALUE_NET: 377 | for i in range(0, len(avg_value_grads)): 378 | avg_value_grads[i] = avg_value_grads[i] / pm.NUM_AGENTS 379 | value_net.apply_gradients(avg_value_grads) 380 | 381 | # visualize gradients and weights 382 | if step % pm.VISUAL_GW_INTERVAL == 0 and pm.EXPERIMENT_NAME is None: 383 | assert len(policy_weights) == len(avg_policy_grads) 384 | for i in range(0,len(policy_weights),10): 385 | tb_logger.add_histogram(tag="Policy weights " + str(i), value=policy_weights[i], step=step) 386 | tb_logger.add_histogram(tag="Policy gradients " + str(i), value=avg_policy_grads[i], step=step) 387 | if pm.VALUE_NET: 388 | assert len(value_weights) == len(avg_value_grads) 389 | for i in range(0,len(value_weights),10): 390 | tb_logger.add_histogram(tag="Value weights " + str(i), value=value_weights[i], step=step) 391 | tb_logger.add_histogram(tag="Value gradients " + str(i), value=avg_value_grads[i], step=step) 392 | 393 | step += 1 394 | 395 | logger.info("Training ends...") 396 | if pm.VALUE_NET: 397 | for i in range(pm.NUM_AGENTS): 398 | net_weights_qs[i].put(("exit", "exit")) 399 | else: 400 | for i in range(pm.NUM_AGENTS): 401 | net_weights_qs[i].put("exit") 402 | # os.system("sudo pkill -9 python") 403 | exit(0) 404 | 405 | 406 | def sl_agent(net_weights_q, net_gradients_q, stats_q, id): 407 | logger = log.getLogger(name="agent_"+str(id), level=pm.LOG_MODE) 408 | logger.info("Start supervised learning, agent " + str(id) + " ...") 409 | 410 | if not pm.RANDOMNESS: 411 | np.random.seed(pm.np_seed+id+1) 412 | 413 | config = tf.ConfigProto() 414 | config.gpu_options.allow_growth = True 415 | with tf.Session(config=config) as sess, tf.device("/gpu:"+str(id%2)): 416 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 417 | sess.run(tf.global_variables_initializer()) # to avoid batch normalization error 418 | 419 | global_step = 1 420 | avg_jct = [] 421 | avg_makespan = [] 422 | avg_reward = [] 423 | if not pm.VAL_ON_MASTER: 424 | validation_traces = [] # validation traces 425 | for i in range(pm.VAL_DATASET): 426 | validation_traces.append(trace.Trace(None).get_trace()) 427 | # generate training traces 428 | traces = [] 429 | for episode in range(pm.TRAIN_EPOCH_SIZE): 430 | job_trace = trace.Trace(None).get_trace() 431 | traces.append(job_trace) 432 | mem_store = memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 433 | logger.info("Filling experience buffer...") 434 | for epoch in range(pm.TOT_TRAIN_EPOCHS): 435 | for episode in range(pm.TRAIN_EPOCH_SIZE): 436 | tic = time.time() 437 | job_trace = copy.deepcopy(traces[episode]) 438 | if pm.HEURISTIC == "DRF": 439 | env = drf_env.DRF_Env("DRF", job_trace, logger) 440 | elif pm.HEURISTIC == "FIFO": 441 | env = fifo_env.FIFO_Env("FIFO", job_trace, logger) 442 | elif pm.HEURISTIC == "SRTF": 443 | env = srtf_env.SRTF_Env("SRTF", job_trace, logger) 444 | elif pm.HEURISTIC == "Tetris": 445 | env = tetris_env.Tetris_Env("Tetris", job_trace, logger) 446 | elif pm.HEURISTIC == "Optimus": 447 | env = optimus_env.Optimus_Env("Optimus", job_trace, logger) 448 | 449 | while not env.end: 450 | if pm.LOG_MODE == "DEBUG": 451 | time.sleep(0.01) 452 | data = env.step() 453 | logger.debug("ts length:" + str(len(data))) 454 | 455 | for (input, label) in data: 456 | mem_store.store(input, 0, label, 0) 457 | 458 | if mem_store.full(): 459 | # prepare a training batch 460 | _, trajectories, _ = mem_store.sample(pm.MINI_BATCH_SIZE) 461 | input_batch = [traj.state for traj in trajectories] 462 | label_batch = [traj.action for traj in trajectories] 463 | 464 | # if global_step % 10 == 0: 465 | # print "input", input_batch[0] 466 | # print "label", label_batch[0] 467 | 468 | # pull latest weights before training 469 | weights = net_weights_q.get() 470 | if isinstance(weights, basestring) and weights == "exit": 471 | logger.info("Agent " + str(id) + " exits.") 472 | exit(0) 473 | policy_net.set_weights(weights) 474 | 475 | # superversed learning to calculate gradients 476 | entropy, loss, policy_grads = policy_net.get_sl_gradients(np.stack(input_batch),np.vstack(label_batch)) 477 | for i in range(len(policy_grads)): 478 | assert np.any(np.isnan(policy_grads[i])) == False 479 | 480 | # send gradients to the central agent 481 | net_gradients_q.put(policy_grads) 482 | 483 | # validation 484 | if not pm.VAL_ON_MASTER and global_step % pm.VAL_INTERVAL == 0: 485 | val_tic = time.time() 486 | val_loss = validate.val_loss(policy_net, validation_traces, logger, global_step) 487 | jct, makespan, reward = validate.val_jmr(policy_net, validation_traces, logger, global_step) 488 | stats_q.put(("val", val_loss, jct, makespan, reward)) 489 | val_toc = time.time() 490 | logger.info("Agent " + str(id) + " Validation at step " + str(global_step) + " Time: " + '%.3f'%(val_toc-val_tic)) 491 | stats_q.put(("step:sl", entropy, loss)) 492 | 493 | global_step += 1 494 | 495 | num_jobs, jct, makespan, reward = env.get_results() 496 | avg_jct.append(jct) 497 | avg_makespan.append(makespan) 498 | avg_reward.append(reward) 499 | if global_step%pm.DISP_INTERVAL == 0: 500 | logger.info("Agent\t AVG JCT\t Makespan\t Reward") 501 | logger.info(str(id) + " \t \t " + '%.3f' %(sum(avg_jct)/len(avg_jct)) + " \t\t" + " " + '%.3f' %(1.0*sum(avg_makespan)/len(avg_makespan)) \ 502 | + " \t" + " " + '%.3f' %(sum(avg_reward)/len(avg_reward))) 503 | 504 | 505 | 506 | def rl_agent(net_weights_q, net_gradients_q, stats_q, id): 507 | logger = log.getLogger(name="agent_"+str(id), level=pm.LOG_MODE,mode="w",fh=True,ch=True,prefix="Agent " +str(id)) 508 | logger.info("Start reinforcement learning, agent " + str(id) + " ...") 509 | 510 | if not pm.RANDOMNESS: 511 | np.random.seed(pm.np_seed+id+1) 512 | 513 | config = tf.ConfigProto() 514 | config.gpu_options.allow_growth = True 515 | with tf.Session(config=config) as sess, tf.device("/gpu:"+str(id%2)): 516 | policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger) 517 | if pm.VALUE_NET: 518 | value_net = network.ValueNetwork(sess, "value_net", pm.TRAINING_MODE, logger) 519 | sess.run(tf.global_variables_initializer()) # to avoid batch normalization error 520 | if pm.VALUE_NET: 521 | policy_weights, value_weights = net_weights_q.get() 522 | value_net.set_weights(value_weights) 523 | else: 524 | policy_weights = net_weights_q.get() 525 | policy_net.set_weights(policy_weights) # initialization from master 526 | first_time = True 527 | 528 | global_step = 1 529 | if not pm.VAL_ON_MASTER: 530 | validation_traces = [] 531 | for i in range(pm.VAL_DATASET): 532 | validation_traces.append(trace.Trace(None).get_trace()) 533 | if pm.PRIORITY_REPLAY: 534 | mem_store = prioritized_memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 535 | else: 536 | mem_store = memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE) 537 | logger.info("Filling experience buffer...") 538 | 539 | # generate training data 540 | traces = [] 541 | for episode in range(pm.TRAIN_EPOCH_SIZE): 542 | job_trace = trace.Trace(None).get_trace() 543 | traces.append(job_trace) 544 | 545 | if pm.EPSILON_GREEDY: 546 | if pm.VARYING_EPSILON: 547 | temperature = pm.ANNEALING_TEMPERATURE * (1 + float(id)/pm.NUM_AGENTS) 548 | else: 549 | temperature = pm.ANNEALING_TEMPERATURE 550 | gates = [True, True, True] 551 | for epoch in range(pm.TOT_TRAIN_EPOCHS): 552 | for episode in range(pm.TRAIN_EPOCH_SIZE): 553 | if pm.CHANGING_JOB_TYPES: 554 | if global_step >= 0 and gates[0]: 555 | gates[0] = False 556 | traces = [] 557 | for episode in range(pm.TRAIN_EPOCH_SIZE): 558 | job_trace = trace.Trace(None).get_trace(4) 559 | traces.append(job_trace) 560 | logger.info("Changing job types 4") 561 | elif global_step >= 1000 and gates[1]: 562 | gates[1] = False 563 | traces = [] 564 | for episode in range(pm.TRAIN_EPOCH_SIZE): 565 | job_trace = trace.Trace(None).get_trace(6) 566 | traces.append(job_trace) 567 | logger.info("Changing job types 6") 568 | elif global_step >= 2000 and gates[2]: 569 | gates[2] = False 570 | traces = [] 571 | for episode in range(pm.TRAIN_EPOCH_SIZE): 572 | job_trace = trace.Trace(None).get_trace(8) 573 | traces.append(job_trace) 574 | logger.info("Changing job types 8") 575 | tic = time.time() 576 | env = rl_env.RL_Env("RL", copy.deepcopy(traces[episode]), logger) 577 | states = [] 578 | masked_outputs = [] 579 | actions = [] 580 | rewards = [] 581 | ts = 0 582 | while not env.end: 583 | if pm.LOG_MODE == "DEBUG": 584 | time.sleep(0.01) 585 | state = env.observe() 586 | output = policy_net.predict(np.reshape(state, (1, pm.STATE_DIM[0], pm.STATE_DIM[1]))) 587 | if pm.EPSILON_GREEDY: # greedy epsilon 588 | env.epsilon = 2 / (1 + np.exp(global_step / temperature)) 589 | masked_output, action, reward, move_on, valid_state = env.step(output) 590 | 591 | if valid_state: # do not save state when move on except skip_ts, but need to save reward!!! 592 | states.append(state) 593 | masked_outputs.append(masked_output) 594 | actions.append(action) 595 | rewards.append(reward) 596 | if move_on: 597 | ts += 1 598 | # ts_reward = reward 599 | if ts%pm.LT_REWARD_NUM_TS == 0 and len(states) > 0: # states can be [] due to no jobs in the ts 600 | # lt_reward = sum(rewards) 601 | # ts_rewards = [0 for _ in range(pm.LT_REWARD_NUM_TS)] 602 | # ts_rewards[-1] = lt_reward 603 | # for i in reversed(range(0, len(ts_rewards) - 1)): 604 | # ts_rewards[i] += ts_rewards[i + 1] * pm.DISCOUNT_FACTOR 605 | 606 | if pm.LT_REWARD_IN_TS: 607 | for i in reversed(range(0,len(rewards)-1)): 608 | rewards[i] += rewards[i+1]*pm.DISCOUNT_FACTOR 609 | elif pm.TS_REWARD_PLUS_JOB_REWARD: 610 | rewards = env.get_job_reward() 611 | assert len(rewards) == len(states) 612 | else: 613 | rewards = [reward for _ in range(len(states))] 614 | 615 | # randomly fill samples to memory 616 | if pm.RANDOM_FILL_MEMORY: 617 | indexes = np.random.choice(len(states), size=pm.MINI_BATCH_SIZE, replace=False) 618 | for i in indexes: 619 | mem_store.store(states[i], masked_outputs[i], actions[i], rewards[i]) 620 | else: 621 | for i in range(len(states)): 622 | mem_store.store(states[i], masked_outputs[i], actions[i], rewards[i]) 623 | 624 | if mem_store.full() and ts%pm.NUM_TS_PER_UPDATE == 0: 625 | # prepare a training batch 626 | mem_indexes, trajectories, IS_weights = mem_store.sample(pm.MINI_BATCH_SIZE) 627 | states_batch = [traj.state for traj in trajectories] 628 | outputs_batch = [traj.output for traj in trajectories] 629 | actions_batch = [traj.action for traj in trajectories] 630 | rewards_batch = [traj.reward for traj in trajectories] 631 | 632 | # pull latest weights before training 633 | if not first_time: # avoid pulling twice at the first update 634 | if pm.VALUE_NET: 635 | policy_weights, value_weights = net_weights_q.get() 636 | if isinstance(policy_weights, basestring) and policy_weights == "exit": 637 | logger.info("Agent " + str(id) + " exits.") 638 | exit(0) 639 | policy_net.set_weights(policy_weights) 640 | value_net.set_weights(value_weights) 641 | else: 642 | policy_weights = net_weights_q.get() 643 | if isinstance(policy_weights, basestring) and policy_weights == "exit": 644 | logger.info("Agent " + str(id) + " exits.") 645 | exit(0) 646 | policy_net.set_weights(policy_weights) 647 | else: 648 | first_time = False 649 | 650 | # set entropy weight, both agent and central agent need to be set 651 | policy_net.anneal_entropy_weight(global_step) 652 | 653 | # reinforcement learning to calculate gradients 654 | if pm.VALUE_NET: 655 | value_output = value_net.predict(np.stack(states_batch)) 656 | td_loss = np.vstack(rewards_batch) - value_output 657 | adjusted_td_loss = td_loss * np.vstack(IS_weights) 658 | policy_entropy, policy_loss, policy_grads = policy_net.get_rl_gradients(np.stack(states_batch), \ 659 | np.vstack(outputs_batch), np.vstack(actions_batch), adjusted_td_loss) 660 | value_loss, value_grads = value_net.get_rl_gradients(np.stack(states_batch), value_output, np.vstack(rewards_batch)) 661 | else: 662 | if pm.PRIORITY_MEMORY_SORT_REWARD and pm.MEAN_REWARD_BASELINE: 663 | td_loss = np.vstack(rewards_batch) - mem_store.avg_reward() 664 | else: 665 | td_loss = np.vstack(rewards_batch) - 0 666 | adjusted_td_loss = td_loss * np.vstack(IS_weights) 667 | policy_entropy, policy_loss, policy_grads = policy_net.get_rl_gradients(np.stack(states_batch), np.vstack(outputs_batch), np.vstack(actions_batch), adjusted_td_loss) 668 | 669 | for aa in range(len(actions_batch)): 670 | if actions_batch[aa][-1] == 1: 671 | # print "rewards:", rewards_batch[aa], "td_loss:", td_loss[aa] 672 | logger.debug("rewards:" + str(rewards_batch[aa]) + "td_loss:" + str(td_loss[aa])) 673 | 674 | for i in range(len(policy_grads)): 675 | try: 676 | assert np.any(np.isnan(policy_grads[i])) == False 677 | # print np.mean(np.abs(policy_grads[i])) # 10^-5 to 10^-2 678 | except Exception as e: 679 | logger.error("Error: " + str(e)) 680 | logger.error("Gradients: " + str(policy_grads[i])) 681 | logger.error("Input type: " + str(states_batch[:,0])) 682 | logger.error("Masked Output: " + str(outputs_batch)) 683 | logger.error("Action: " + str(actions_batch)) 684 | logger.error("TD Loss: " + str(td_loss)) 685 | logger.error("Policy Loss: " + str(policy_loss)) 686 | logger.error("Policy Entropy: " + str(policy_entropy)) 687 | exit(1) # another option is to continue 688 | if pm.VALUE_NET: 689 | for i in range(len(value_grads)): 690 | try: 691 | assert np.any(np.isnan(value_grads[i])) == False 692 | except Exception as e: 693 | logger.error("Error: " + str(e) + " " + str(policy_grads[i])) 694 | exit(1) 695 | 696 | # send gradients to the central agent 697 | if pm.VALUE_NET: 698 | net_gradients_q.put((policy_grads, value_grads)) 699 | else: 700 | net_gradients_q.put(policy_grads) 701 | if pm.PRIORITY_REPLAY: 702 | mem_store.update(mem_indexes, abs(td_loss)) 703 | # validation 704 | if not pm.VAL_ON_MASTER and global_step % pm.VAL_INTERVAL == 0: 705 | val_loss = validate.val_loss(policy_net, validation_traces, logger, global_step) 706 | jct, makespan, reward = validate.val_jmr(policy_net, validation_traces, logger, 707 | global_step) 708 | stats_q.put(("val", val_loss, jct, makespan, reward)) 709 | 710 | # statistics 711 | if pm.VALUE_NET: 712 | stats_q.put(("step:policy+value", policy_entropy, policy_loss, value_loss, sum(td_loss)/len(td_loss), sum(rewards_batch)/len(rewards_batch), output)) 713 | else: 714 | stats_q.put(("step:policy", policy_entropy, policy_loss, sum(td_loss)/len(td_loss), sum(rewards_batch)/len(rewards_batch), output)) 715 | global_step += 1 716 | 717 | # clear 718 | states = [] 719 | masked_outputs = [] 720 | actions = [] 721 | rewards = [] 722 | 723 | # collect statistics after training one trace 724 | num_jobs, jct, makespan, reward = env.get_results() 725 | stats_q.put(("trace:sched_result", jct, makespan, reward)) 726 | if (epoch*pm.TRAIN_EPOCH_SIZE+episode)%pm.DISP_INTERVAL == 0: 727 | if (epoch*pm.TRAIN_EPOCH_SIZE+episode)%50 == 0: 728 | stats_q.put(("trace:job_stats", episode, env.get_jobstats())) 729 | toc = time.time() 730 | logger.info("--------------------------------------------------------------") 731 | logger.info("Agent " + str(id) + " Epoch " + str(epoch) + " Trace " + str(episode) + " Step " + str(global_step)) 732 | logger.info("# of Jobs\t AVG JCT\t Makespan\t Reward\t Time") 733 | logger.info(str(num_jobs) + " \t" + " \t" + " " + '%.3f' %jct + " \t\t" + " " + '%.3f' %makespan \ 734 | + "\t\t" + " " + '%.3f' %reward + "\t" + " " + '%.3f' % (toc - tic)) 735 | 736 | 737 | def main(): 738 | os.system("rm -f *.log") 739 | os.system("sudo pkill -9 tensorboard; sleep 3") 740 | 741 | net_weights_qs = [multiprocessing.Queue(1) for i in range(pm.NUM_AGENTS)] 742 | net_gradients_qs = [multiprocessing.Queue(1) for i in range(pm.NUM_AGENTS)] 743 | stats_qs = [multiprocessing.Queue() for i in range(pm.NUM_AGENTS)] 744 | 745 | os.system("mkdir -p " + pm.MODEL_DIR + "; mkdir -p " + pm.SUMMARY_DIR) 746 | if pm.EXPERIMENT_NAME is None: 747 | cmd = "cd " + pm.SUMMARY_DIR + " && rm -rf *; tensorboard --logdir=./" 748 | board = multiprocessing.Process(target=lambda: os.system(cmd), args=()) 749 | board.start() 750 | time.sleep(3) # let tensorboard start first since it will clear the dir 751 | 752 | # central_agent(net_weights_qs, net_gradients_qs, stats_qs) 753 | master = multiprocessing.Process(target=central_agent, args=(net_weights_qs, net_gradients_qs, stats_qs,)) 754 | master.start() 755 | #agent(net_weights_qs[0], net_gradients_qs[0], stats_qs[0], 0) 756 | #exit() 757 | 758 | if pm.TRAINING_MODE == "SL": 759 | agents = [multiprocessing.Process(target=sl_agent, args=(net_weights_qs[i], net_gradients_qs[i], stats_qs[i],i,)) for i in range(pm.NUM_AGENTS)] 760 | elif pm.TRAINING_MODE == "RL": 761 | agents = [multiprocessing.Process(target=rl_agent, args=(net_weights_qs[i], net_gradients_qs[i], stats_qs[i], i,)) for i in range(pm.NUM_AGENTS)] 762 | for i in range(pm.NUM_AGENTS): 763 | agents[i].start() 764 | 765 | master.join() 766 | 767 | 768 | if __name__ == "__main__": 769 | main() --------------------------------------------------------------------------------