├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── agent.py ├── dag_dataset.py ├── dag_generator.py ├── graph.py ├── ilp_solver.py ├── logger.py ├── node.py ├── policy.py ├── preprocess.py ├── rl.py └── sl.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep-Reinforcement-Learning-Based Scheduler for High-Level Synthesis 2 | 3 | This is the implementation of Deep-Reinforcement-Learning-Based Scheduler for High-Level Synthesis (HLS). This work has been published in ICCAD'19. 4 | 5 | ``` 6 | @inproceedings{DBLP:conf/iccad/ChenS19, 7 | author = {Hongzheng Chen and 8 | Minghua Shen}, 9 | editor = {David Z. Pan}, 10 | title = {A Deep-Reinforcement-Learning-Based Scheduler for {FPGA} {HLS}}, 11 | booktitle = {Proceedings of the International Conference on Computer-Aided Design, 12 | {ICCAD} 2019, Westminster, CO, USA, November 4-7, 2019}, 13 | pages = {1--8}, 14 | publisher = {{ACM}}, 15 | year = {2019}, 16 | url = {https://doi.org/10.1109/ICCAD45719.2019.8942126}, 17 | doi = {10.1109/ICCAD45719.2019.8942126}, 18 | } 19 | ``` 20 | 21 | To run the program, please follow the instructions below. 22 | 23 | ```bash 24 | # Generate DAGs for supervised learning 25 | $ python3 dag_generator.py 26 | 27 | # Supervised learning 28 | $ python3 sl.py 29 | 30 | # Reinforcement learning 31 | # Use --use_network to pass in pre-trained SL networks 32 | $ python3 rl.py 33 | ``` 34 | 35 | Prepare the test DAGs in `DAG` folder and name them as `dag_X.dot` (where `X` should be a number different from those DAGs in the training set). 36 | 37 | ```bash 38 | # Test the Xth DAG 39 | $ python3 rl.py --test X 40 | ``` 41 | 42 | Other parameter settings can be found in the source code. 43 | 44 | 45 | ## Requirements 46 | * Python 3.6 47 | * Pytorch v0.4 48 | * Visdom v0.1 49 | * Pulp v1.6.8 50 | * Numpy v1.14 51 | * Matplotlib v2.2.2 -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the Agent class. 8 | ''' 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | from torch.distributions import Categorical 15 | import numpy as np 16 | from policy import Policy 17 | 18 | class Agent(object): 19 | def __init__(self, state_size, use_network="", device="cuda",lr=5e-4): 20 | super(Agent, self).__init__() 21 | self.device = device 22 | if use_network == "": 23 | net = Policy(state_size[0]).to(self.device) 24 | print("Build a new network!") 25 | else: 26 | try: 27 | net = torch.load("./Networks/" + use_network).to(self.device) 28 | net.classifier = nn.Sequential(*list(net.classifier.children())[:-1]) # delete the softmax layer 29 | print("Loaded %s." % use_network) 30 | except: 31 | net = Policy(state_size[0]).to(self.device) 32 | print("No such network named %s. Rebuild a new network!" % use_network) 33 | self.policy = net 34 | # self.policy = net.eval() # avoid dropout 35 | self.optimizer = optim.Adam(self.policy.parameters(),lr=lr) 36 | 37 | def get_sl_action(self, state): 38 | output = self.policy(state) # bs(1)*50 39 | # randomly select 40 | action = torch.topk(output,1) 41 | action = action[1] # op 42 | criterion = nn.NLLLoss() 43 | nllloss = criterion(output,torch.Tensor([action]).type(torch.LongTensor).to(self.device).resize_((1,))) 44 | return nllloss, action 45 | 46 | def get_action(self, state, legal_move): 47 | output = self.policy.forward_without_softmax(state) # bs(1)*50 48 | legal_move_dict = legal_move[1] 49 | legal_move = torch.tensor(legal_move[0]).long().to(self.device) 50 | legal_prob = torch.index_select(output,1,legal_move) 51 | # randomly select 52 | if len(legal_prob.shape) == 2 and legal_prob.shape[1] != 1: 53 | m = Categorical(F.softmax(legal_prob,dim=1)) 54 | index = m.sample().item() 55 | else: 56 | index = 0 57 | action = legal_move_dict[index] 58 | criterion = nn.NLLLoss() 59 | nllloss = criterion(F.log_softmax(legal_prob,dim=1),torch.Tensor([index]).type(torch.LongTensor).to(self.device).resize_((1,))) 60 | del output 61 | return nllloss, action # log_prob, action 62 | 63 | def get_deterministic_action(self, state, legal_move): 64 | output = self.policy(state) # bs(1)*50 65 | legal_move_dict = legal_move[1] 66 | legal_move = torch.tensor(legal_move[0]).long().to(self.device) 67 | legal_prob = torch.index_select(output,1,legal_move) 68 | action = torch.topk(legal_prob,1) 69 | action = action[1] # op 70 | if len(legal_prob.shape) == 2 and legal_prob.shape[1] != 1: 71 | action = legal_move_dict[action.item()] 72 | else: 73 | action = legal_move_dict[0] 74 | log_prob = output[0][action] # requires_grad 75 | return log_prob, action 76 | 77 | def update_weight(self, all_log_probs, all_rewards, baseline=False): 78 | gamma = 0.99 79 | eps = np.finfo(np.float32).eps.item() 80 | tot_loss = [] 81 | res_rewards, avg_reward = [], [] 82 | # baseline `1/N\sum_{i=1}^N r(\tau)` 83 | for log_prob, temp_rewards in zip(all_log_probs,all_rewards): 84 | # a full trace \tau 85 | R = 0 86 | rewards = [] 87 | for r in temp_rewards[::-1]: 88 | R = r + gamma * R 89 | rewards.insert(0, R) 90 | avg_reward.append(rewards[0]) # r(\tau) 91 | res_rewards.append(rewards) 92 | if baseline: 93 | avg_reward = np.array(avg_reward).mean() 94 | else: 95 | avg_reward = 0 96 | for log_prob, rewards in zip(all_log_probs,res_rewards): 97 | rewards = torch.tensor(rewards).to(self.device) 98 | rewards = rewards - avg_reward # minus baseline 99 | loss = torch.Tensor([0]).float().to(self.device) 100 | for step, (nllloss, reward) in enumerate(zip(log_prob,rewards)): 101 | # if prob is very small (say 0.01) then -log(prob) is extremely large 102 | # reward needs to be small to make loss small 103 | loss += nllloss * reward # minus! 104 | tot_loss.append(loss) 105 | # tot_loss.append(torch.dot(torch.tensor(log_prob).to(self.device),rewards)) 106 | # backpropagate 107 | self.optimizer.zero_grad() 108 | # loss = torch.stack(tot_loss, dim=0).sum() / len(tot_loss) 109 | tot_loss = torch.cat(tot_loss).mean() # sum() 110 | tot_loss.backward() 111 | self.optimizer.step() 112 | res = tot_loss.item() 113 | del tot_loss 114 | return res -------------------------------------------------------------------------------- /dag_dataset.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the DagDataset class. 8 | ''' 9 | 10 | import numpy as np 11 | from torch.utils.data import Dataset 12 | 13 | class DagDataset(Dataset): 14 | def __init__(self,state_action_pair): 15 | super(DagDataset, self).__init__() 16 | self.state_action_pair = np.array(state_action_pair) 17 | 18 | def __len__(self): 19 | return len(self.state_action_pair) 20 | 21 | def __getitem__(self, idx): 22 | eps = np.finfo(np.float32).eps.item() 23 | state = np.array(self.state_action_pair[idx][0]).astype(np.float64) 24 | state = (state - state.mean(axis = 0)) / (state.std(axis = 0) + eps) 25 | return (state,np.array(self.state_action_pair[idx][1])) -------------------------------------------------------------------------------- /dag_generator.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the random DAG generator. 8 | ''' 9 | from graph import Graph 10 | import random 11 | 12 | NUM_GRAPH = 5000 13 | 14 | class DAGGen(object): 15 | def __init__(self, num, tot_node=50, min_per_layer=1, max_per_layer=5, link_rate=0.5, mul_rate=0.3): 16 | res = "digraph {\n" 17 | res += " node [fontcolor=black]\n" 18 | res += " property [mul=%d,lf=%.1f]\n" % (random.randint(2,5),random.uniform(1.0,2.0)) 19 | nowNode = 0 20 | edges = [] 21 | pre_layer = [] 22 | while nowNode < tot_node: 23 | newNode = random.randint(min_per_layer, max_per_layer) 24 | if nowNode + newNode > tot_node: 25 | newNode = tot_node - nowNode 26 | cur_layer = [] 27 | for i in range(nowNode,nowNode + newNode): 28 | cur_layer.append(i) 29 | for j in pre_layer: 30 | for k in cur_layer: 31 | if random.random() < link_rate: 32 | edges.append((j,k)) 33 | pre_layer = cur_layer[:] 34 | nowNode += newNode 35 | for i in range(tot_node): 36 | if random.random() < mul_rate: 37 | typename = "mul" 38 | else: 39 | typename = "add" 40 | res += " %d [ label = %s ];\n" % (i, typename) 41 | for (step,edge) in enumerate(edges): 42 | res += " %d -> %d [ name = %d ];\n" % (edge[0],edge[1],step) 43 | res += "}\n" 44 | output = open("./DAG/dag_" + str(num) + ".dot","w") 45 | output.write(res) 46 | output.close() 47 | 48 | for i in range(1,NUM_GRAPH+1): 49 | DAGGen(i,tot_node=random.randint(10,50),mul_rate=random.uniform(0.3,0.5)) 50 | if i % 100 == 0: 51 | print("Generated %d / %d DAGs." % (i,NUM_GRAPH)) -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the definition and implementation of Graph class. 8 | ''' 9 | 10 | import re, sys 11 | import numpy as np 12 | from node import Node 13 | 14 | class Graph(object): 15 | def __init__(self, mode, mul=2): 16 | self.mode = mode 17 | self.mul_delay = mul 18 | self._LC = 1 19 | self.vertex = 0 20 | self.edge = 0 21 | self.adjlist = [] 22 | self.depth = 0 23 | self.order = [] 24 | self.revOrder = [] 25 | self.totLatency = 0 26 | self.numScheduledOp = 0 27 | # state[0]: Current schedule 28 | # state[1]: Current possible move 29 | # state[2]: All possible move 30 | self.state = [] 31 | # reward and punishment 32 | self.reward = dict() 33 | self.reward["penalty"] = 0 34 | self.reward["small"] = 0 35 | self.reward["nothing"] = 0 36 | 37 | def setLatencyFactor(self,lc): 38 | self._LC = lc 39 | 40 | def setConstrainedL(self,conL): 41 | self.CONSTRAINED_L = conL 42 | 43 | def getConstrainedL(self): 44 | return self.CONSTRAINED_L+1 45 | 46 | def getMulDelay(self): 47 | return self.mul_delay 48 | 49 | def getLf(self): 50 | return self._LC 51 | 52 | def setMAXRESOURCE(self,r): 53 | self.maxNr = {"MUL":r[0], "ALU":r[1]} 54 | print("Constrained resources: MUL: %d ALU: %d" % (self.maxNr["MUL"],self.maxNr["ALU"])) 55 | 56 | def initialize(self): 57 | self.dfs() # obtain CONSTRAINED_L 58 | self.currNr = {"MUL":0, "ALU":0} 59 | self.bestNr = {"MUL":0x3f3f3f, "ALU":0x3f3f3f} 60 | self.nrt = {"MUL":np.array([0]*(self.CONSTRAINED_L+1)), "ALU":np.array([0]*(self.CONSTRAINED_L+1))} 61 | 62 | def read(self,infile): 63 | # print("Begin parsing...") 64 | for line in infile: 65 | if not ("label" in line or "name" in line): 66 | if "property" in line: 67 | res = re.split("=|,|\\].*",line) 68 | self.mul_delay = int(res[1]) 69 | self.setLatencyFactor(float(res[3])) 70 | else: 71 | continue 72 | elif "label" in line: 73 | res = re.split(" *\\[ *label *= *| *\\];| +",line) 74 | op, op_type = res[1], res[2] 75 | self.add_vertex(op,op_type) 76 | else: 77 | res = re.split(" *\\[ *name *= *| *\\];| *-> *| +",line) 78 | src, des = res[1], res[2] 79 | self.add_edge(src,des) 80 | # print("Finish parsing!") 81 | 82 | def mapR(self,type_,mode=0): 83 | if (type_ == "mul" or type_ == "MUL" or type_ == "div" or type_ == "DIV"): 84 | return ("MUL" if mode == 0 else 0) 85 | else: 86 | return ("ALU" if mode == 0 else 1) 87 | 88 | def add_vertex(self,name_,type_): 89 | delay = 1 90 | if self.mapR(type_) == "MUL": 91 | delay = self.mul_delay 92 | v = Node(self.vertex,name_,type_,delay) 93 | self.vertex += 1 94 | self.adjlist.append(v) 95 | 96 | def add_edge(self,src,des): 97 | for i in range(len(self.adjlist)): 98 | if self.adjlist[i].name == src: 99 | for j in range(len(self.adjlist)): 100 | if self.adjlist[j].name == des: 101 | self.adjlist[i].succ.append(j) 102 | self.adjlist[j].pred.append(i) 103 | self.edge += 1 104 | break 105 | 106 | def dfsASAP(self,num): 107 | if self.mark[num]: 108 | return 109 | if len(self.adjlist[num].pred) == 0: 110 | self.adjlist[num].setASAP(-1,0) 111 | else: 112 | for j in self.adjlist[num].pred: 113 | self.dfsASAP(j) 114 | self.adjlist[num].setASAP(j,self.adjlist[j].getASAP() + self.adjlist[j].delay) 115 | self.depth = max(self.adjlist[num].getASAP() + self.adjlist[num].delay - 1, self.depth) 116 | if self.mode == "TCS": 117 | self.setConstrainedL(int((self.depth)*self._LC)) 118 | else: 119 | self.setConstrainedL(self.CONSTRAINED_L) 120 | self.mark[num] = True 121 | self.order.append(self.adjlist[num]) 122 | 123 | def dfsALAP(self,num): 124 | if self.mark[num]: 125 | return 126 | if len(self.adjlist[num].succ) == 0: 127 | # CONSTRAINED_L is used here, dfsASAP must be done first 128 | self.adjlist[num].setALAP(-1, self.CONSTRAINED_L - self.adjlist[num].delay + 1) 129 | else: 130 | for j in self.adjlist[num].succ: 131 | self.dfsALAP(j) 132 | self.adjlist[num].setALAP(j,self.adjlist[j].getALAP() - self.adjlist[num].delay) 133 | self.mark[num] = True 134 | self.revOrder.append(self.adjlist[num]) 135 | 136 | def dfs(self): 137 | # print("Begin DFS...") 138 | self.mark = np.zeros(self.vertex,dtype=bool) 139 | for i in range(len(self.adjlist)): 140 | if len(self.adjlist[i].succ) == 0: 141 | self.dfsASAP(i) 142 | self.mark = np.zeros(self.vertex,dtype=bool) 143 | for i in range(len(self.adjlist)): 144 | if len(self.adjlist[i].pred) == 0: 145 | self.dfsALAP(i) 146 | # print("Finish DFS.") 147 | # print("Constrained Latency is %d" % (self.CONSTRAINED_L+1)) 148 | 149 | def initial_schedule(self): 150 | # clear previous state 151 | self.totLatency = 0 152 | self.numScheduledOp = 0 153 | self.currNr = {"MUL":0, "ALU":0} 154 | self.bestNr = {"MUL":0x3f3f3f, "ALU":0x3f3f3f} 155 | self.nrt = {"MUL":np.array([0]*(self.CONSTRAINED_L+1)), "ALU":np.array([0]*(self.CONSTRAINED_L+1))} 156 | for i in range(len(self.adjlist)): 157 | self.adjlist[i].initial() 158 | # reschedule 159 | self.state = np.zeros((3,self.vertex,self.CONSTRAINED_L+1)) 160 | for i in range(self.vertex): 161 | self.state[1:3,i,self.adjlist[i].getASAP():self.adjlist[i].getALAP() + self.adjlist[i].delay] = 1 162 | for i in range(self.vertex): 163 | self.schedule_node(i,self.adjlist[i].getASAP(),0) 164 | 165 | def schedule_node(self,op,step,mode=1): 166 | if not self.test_val(op,step): 167 | return False, self.reward["penalty"] 168 | reward = 0 169 | tempR = self.mapR(self.adjlist[op].type) 170 | tempNum = self.mapR(self.adjlist[op].type,1) 171 | # remove old state 172 | oldOpNr = 0 173 | for d in range(self.adjlist[op].delay): 174 | oldOpNr += self.nrt[tempR][self.adjlist[op].cstep + d] 175 | if mode == 1: 176 | self.numScheduledOp += 1 177 | for d in range(self.adjlist[op].delay): 178 | # since the op initially placed here, so it should be at least WA 179 | self.state[0,op,self.adjlist[op].cstep + d] = 0 180 | self.nrt[tempR][self.adjlist[op].cstep + d] -= 1 181 | # current operation 182 | self.adjlist[op].schedule(step) 183 | delay = self.adjlist[op].delay 184 | for d in range(delay): 185 | self.nrt[tempR][step + d] += 1 186 | self.state[0,op,step:step+delay] = 1 187 | self.state[1,op,step:step+delay] = 0 188 | self.state[1,op,self.adjlist[op].getASAP():step] = 1 189 | self.state[1,op,step+delay:self.adjlist[op].getALAP()+delay] = 1 190 | # other influenced operations 191 | for vpred in self.adjlist[op].pred: 192 | tempALAP = self.adjlist[vpred].getALAP() 193 | d = self.adjlist[vpred].delay 194 | self.adjlist[vpred].setALAP(op,step - d) 195 | currALAP = self.adjlist[vpred].getALAP() 196 | self.state[1,vpred,min(tempALAP,currALAP)+d:max(tempALAP,currALAP)+d] = 0 if currALAP < tempALAP else 1 197 | if currALAP > tempALAP: 198 | reward += self.reward["small"] 199 | for vsucc in self.adjlist[op].succ: 200 | tempASAP = self.adjlist[vsucc].getASAP() 201 | self.adjlist[vsucc].setASAP(op,step + self.adjlist[op].delay) 202 | currASAP = self.adjlist[vsucc].getASAP() 203 | self.state[1,vsucc,min(tempASAP,currASAP):max(tempASAP,currASAP)] = 0 if currASAP > tempASAP else 1 204 | if currASAP < tempASAP: 205 | reward += self.reward["small"] 206 | self.totLatency = max(self.totLatency, step + self.adjlist[op].delay) # step start from 0 207 | oldNr = self.currNr[tempR] 208 | self.currNr[tempR] = self.nrt[tempR].max() 209 | if mode != 0: 210 | if self.currNr["MUL"] != 0 and self.currNr["ALU"] != 0 and self.currNr["MUL"] + self.currNr["ALU"] <= self.bestNr["MUL"] + self.bestNr["ALU"]: 211 | self.bestNr["MUL"], self.bestNr["ALU"] = self.currNr["MUL"], self.currNr["ALU"] 212 | newOpNr = 0 213 | for d in range(self.adjlist[op].delay): 214 | newOpNr += self.nrt[tempR][self.adjlist[op].cstep + d] 215 | # early stop 216 | cnt = 0 217 | legal_move = self.getAllLegalMove()[0] 218 | for legal_op in legal_move: 219 | legal_op = self.adjlist[legal_op] 220 | typeR = self.mapR(legal_op.type) 221 | if (self.nrt[typeR][legal_op.cstep+1:legal_op.cstep+1+legal_op.delay] + 1 222 | > self.currNr[typeR]).any(): 223 | cnt += 1 224 | if cnt >= len(legal_move): 225 | return False, self.reward["nothing"] 226 | # final reward 227 | if self.mode == "RCS": 228 | reward += 10 / self.totLatency 229 | else: 230 | reward += oldNr - self.currNr[tempR] 231 | # reward += (oldOpNr - newOpNr)/5 232 | return True, reward 233 | 234 | # mode 0: without recursion 235 | # mode 1: recursion 236 | def test_val(self,op,step,mode=0): 237 | if op < 0 or op >= self.vertex: 238 | return False 239 | tempR = self.mapR(self.adjlist[op].type) 240 | # Constraints 241 | if self.mode == "RCS": 242 | if self.nrt[tempR][step] + 1 > self.maxNr[tempR]: 243 | return False 244 | else: 245 | if step + self.adjlist[op].delay - 1 > self.CONSTRAINED_L: 246 | return False 247 | if mode == 1: 248 | return True 249 | if self.adjlist[op].getASAP() > step or self.adjlist[op].getALAP() < step: 250 | return False 251 | for vsucc in self.adjlist[op].succ: 252 | vsucc = self.adjlist[vsucc] 253 | if vsucc.cstep > -1 and step + self.adjlist[op].delay - 1 >= vsucc.cstep: 254 | return False 255 | for vpred in self.adjlist[op].pred: 256 | vpred = self.adjlist[vpred] 257 | if vpred.cstep > -1 and vpred.cstep + vpred.delay > step: 258 | return False 259 | return True 260 | 261 | def schedule_node_recursion(self,op,step): # only support top-down 262 | if not self.test_val(op,step,1): 263 | return False, self.reward["penalty"] 264 | delay = self.adjlist[op].delay 265 | if not self.state[2,op,step:step+delay].all(): 266 | return False, self.reward["penalty"] 267 | elif self.state[1,op,step:step+delay].all(): # the final operation that needn't move 268 | return self.schedule_node(op,step) 269 | if step < self.adjlist[op].cstep: 270 | return True, 0 271 | tot_reward = 0 272 | for vsucc in self.adjlist[op].succ: # move the operations backward 273 | if self.adjlist[vsucc].cstep < step + delay: 274 | fes, reward = self.schedule_node_recursion(vsucc,step+delay) 275 | if fes == False: 276 | return fes, reward 277 | else: 278 | tot_reward += reward 279 | fes, reward = self.schedule_node(op,step) 280 | if fes == False: 281 | return fes, reward 282 | else: 283 | tot_reward += reward 284 | return fes, tot_reward 285 | 286 | def test_final(self): 287 | flag = True 288 | for v in self.adjlist: 289 | for vsucc in v.succ: 290 | vsucc = self.adjlist[vsucc] 291 | if v.cstep + v.delay - 1 >= vsucc.cstep: 292 | flag = False 293 | print("Schedule conflicts with Node %d(%s) and Node %d(%s)." % (v.num,v.name,vsucc.num,vsucc.name)) 294 | return flag 295 | return flag 296 | 297 | def get_state(self): 298 | return self.state 299 | 300 | def get_partial_state(self,size,pos=(0,0)): 301 | res = np.zeros((3,size[0],size[1])) 302 | x = min(self.state.shape[1]-pos[0],size[0]) 303 | y = min(self.state.shape[2]-pos[1],size[1]) 304 | res[:,0:x,0:y] = np.copy(self.state)[:,pos[0]:x+pos[0],pos[1]:y+pos[1]] 305 | return res 306 | 307 | def getNrt(self): 308 | return self.nrt 309 | 310 | def getAllLegalMove(self): 311 | res = [] 312 | res_dict = dict() 313 | cnt = 0 314 | for (op,row) in enumerate(self.get_state()[1,:,:]): 315 | if (row[self.adjlist[op].cstep:] == 1).any(): # backward! 316 | res.append(op) 317 | res_dict[cnt] = op 318 | cnt += 1 319 | return (res,res_dict) 320 | 321 | def getLegalMove(self,pos=(0,0)): 322 | res = [] 323 | res_dict = dict() 324 | cnt = 0 325 | for (op,row) in enumerate(self.get_state()[1,:,:]): 326 | if pos[0] <= op < pos[0] + 50: # 50! 327 | if (row[max(pos[1],self.adjlist[op].cstep):] == 1).any(): # backward! 328 | res.append(op-pos[0]) 329 | res_dict[cnt] = op - pos[0] 330 | cnt += 1 331 | return (res,res_dict) 332 | 333 | def output_adjlist(self): 334 | print("Adjacent List:") 335 | for v in self.adjlist: 336 | print("Node %d(%s):" % (v.num,v.name),end=" ") 337 | for op in v.succ: 338 | print(op+1,end=" ") 339 | print() 340 | 341 | def output_axap(self): 342 | print("AXAP:") 343 | for v in self.adjlist: 344 | print("Node %d(%s): [%d, %d]" % (v.num,v.name,v.getASAP(),v.getALAP())) 345 | 346 | def output(self): 347 | print("# of operations: %d" % self.vertex) 348 | print("Latency factor: %f, CONSTRAINED_L: %d, Mul_delay: %d" % (self._LC,self.CONSTRAINED_L+1,self.mul_delay)) 349 | print("Best # of resources: MUL: %d, ALU: %d" % (self.bestNr["MUL"], self.bestNr["ALU"])) 350 | print("Current # of resources: MUL: %d, ALU: %d" % (self.currNr["MUL"], self.currNr["ALU"])) 351 | print("Latency: %d" % self.totLatency) 352 | print("Schedule: ") 353 | for v in self.adjlist: 354 | print("Node %d(%s): %d" % (v.num,v.name,v.cstep)) -------------------------------------------------------------------------------- /ilp_solver.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the ILP solver for HLS scheduling. 8 | ''' 9 | 10 | import pulp 11 | from graph import Graph 12 | 13 | class ILPSolver(object): 14 | def __init__(self, file_num, mul_delay=2, lf=1.0): 15 | self.schedule = dict() 16 | g = Graph("TCS",mul_delay) 17 | g.setLatencyFactor(lf) 18 | with open("./DAG/dag_%d.dot" % file_num) as infile: 19 | g.read(infile) 20 | g.initialize() 21 | # print("Begin generating ILP formulas for time-constrained scheduling problem...") 22 | prob = pulp.LpProblem("Time-Constrained Scheduling Problem",pulp.LpMinimize) 23 | M1 = pulp.LpVariable("MUL",lowBound=1,upBound=None,cat=pulp.LpInteger) 24 | M2 = pulp.LpVariable("ALU",lowBound=1,upBound=None,cat=pulp.LpInteger) 25 | prob += M1 + M2, "Minimize the number of FUs" 26 | # Time frame constraints 27 | x = pulp.LpVariable.dicts("x",(range(len(g.adjlist)),range(g.getConstrainedL())),lowBound=0,upBound=1,cat=pulp.LpInteger) 28 | for (i,node) in enumerate(g.adjlist): 29 | prob += pulp.lpSum([x[i][t] for t in range(node.getASAP(),node.getALAP()+1)]) == 1, "" 30 | # print("Time frame constraints generated.") 31 | # Resource constraints 32 | rowR = [] 33 | for i in range(g.getConstrainedL()): 34 | rowR.append({"ALU":[],"MUL":[]}) # be careful of share memory 35 | for (i,node) in enumerate(g.adjlist): 36 | for t in range(node.getASAP(),node.getALAP()+node.delay): 37 | rowR[t][g.mapR(node.type)].append(i) 38 | for t in range(g.getConstrainedL()): 39 | for typeR in ["ALU","MUL"]: 40 | if len(rowR[t][typeR]) < 2: 41 | continue 42 | else: 43 | prob += pulp.lpSum([x[i][td] for i in rowR[t][typeR] 44 | for td in range(max(t-g.adjlist[i].delay+1,0),t+1)]) - (M1 if typeR == "MUL" else M2)<= 0, "" 45 | # print("Resource constraints generated.") 46 | # Precedence constraints 47 | for (i,node) in enumerate(g.adjlist): 48 | for vsucc in node.succ: 49 | prob += (pulp.lpSum([(t+1)*x[i][t] for t in range(node.getASAP(),node.getALAP()+1)]) 50 | - pulp.lpSum([(t+1)*x[vsucc][t] for t in range(g.adjlist[vsucc].getASAP(),g.adjlist[vsucc].getALAP()+1)]) 51 | <= (-1)*node.delay), "" 52 | # print("Precedence constraints generated.") 53 | # print("Finish ILP generation.") 54 | prob.writeLP("./ILP_formulation/dag_%d.lp" % (file_num)) 55 | prob.solve() 56 | # print("MUL = %d" % prob.variablesDict()["MUL"].varValue) 57 | # print("ALU = %d" % prob.variablesDict()["ALU"].varValue) 58 | out_file = open("./Sol/dag_%d.sol" % file_num,"w") 59 | for v in sorted(prob.variables(),key=lambda x: int(x.name.split("_")[1]) if len(x.name.split("_")) != 1 else 0): 60 | if v.name[0] == "x" and v.varValue == 1: 61 | op = v.name.split("_")[1] 62 | cstep = v.name.split("_")[-1] 63 | self.schedule[int(op)] = int(cstep) 64 | out_file.write("%s, %s\n" % (op,cstep)) 65 | out_file.close() 66 | # print("Status:", pulp.LpStatus[prob.status]) 67 | 68 | def getOptSchedule(self): 69 | return self.schedule -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains logger used for recording the training process. 8 | ''' 9 | 10 | import re, os, logging 11 | 12 | class LogHandler(object): 13 | def __init__(self, name, level=logging.INFO): 14 | self.name = name 15 | self.logger = logging.getLogger(__name__) 16 | self.logger.setLevel(level) 17 | try: 18 | self.file_num = int(re.split("[_.]",sorted(os.listdir("./Log"),key=lambda x: x.split("_")[1])[-2])[1]) + 1 19 | except: 20 | self.file_num = 1 21 | self.file_name = "./Log/%s_%02d.log" % (self.name,self.file_num) 22 | self.handler = logging.FileHandler(self.file_name) 23 | self.formatter = logging.Formatter('%(asctime)s - %(message)s') # - %(name)s - %(levelname)s 24 | self.handler.setFormatter(self.formatter) 25 | self.logger.addHandler(self.handler) 26 | 27 | def getLogger(self): 28 | return self.file_num, self.logger -------------------------------------------------------------------------------- /node.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the definition and implementation of Graph class. 8 | ''' 9 | 10 | from copy import deepcopy 11 | 12 | class OSPair(object): 13 | def __init__(self, op, step): 14 | super(OSPair, self).__init__() 15 | self.op = op 16 | self.step = step 17 | 18 | def __repr__(self): 19 | return "Op: %d, Step: %d" % (self.op + 1,self.step + 1) 20 | 21 | class Node(object): 22 | def __init__(self, num_, name_, type_, delay_): 23 | super(Node, self).__init__() 24 | self.num = num_ 25 | self.name = name_ 26 | self.type = type_ 27 | self.delay = delay_ 28 | self.pred = [] 29 | self.succ = [] 30 | self.asap = [] # the -1-th 31 | self.alap = [] # the 1-st 32 | self.iasap = [] 33 | self.ialap = [] 34 | self.cstep = -1 35 | 36 | def initial(self): 37 | self.asap = [] 38 | self.alap = [] 39 | for p in self.iasap: 40 | self.asap.append(deepcopy(p)) # very important 41 | for p in self.ialap: 42 | self.alap.append(deepcopy(p)) 43 | self.asap.sort(key=lambda x:x.step,reverse=True) 44 | self.alap.sort(key=lambda x:x.step) 45 | self.cstep = -1 46 | 47 | def schedule(self, step): 48 | self.cstep = step 49 | 50 | def setASAP(self,op,asap_): 51 | flag = False 52 | for i in range(len(self.asap)): 53 | if self.asap[i].op == op: 54 | self.asap[i].step = asap_ 55 | flag = True 56 | if not flag: 57 | self.asap.append(OSPair(op,asap_)) 58 | self.iasap.append(OSPair(op,asap_)) # different copies 59 | self.asap.sort(key=lambda x:x.step,reverse=True) 60 | 61 | def setALAP(self,op,alap_): 62 | flag = False 63 | for i in range(len(self.alap)): 64 | if self.alap[i].op == op: 65 | self.alap[i].step = alap_ 66 | flag = True 67 | if not flag: 68 | self.alap.append(OSPair(op,alap_)) 69 | self.ialap.append(OSPair(op,alap_)) 70 | self.alap.sort(key=lambda x:x.step) 71 | 72 | def getASAP(self): 73 | return self.asap[0].step 74 | 75 | def getALAP(self): 76 | return self.alap[0].step -------------------------------------------------------------------------------- /policy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the architecture of the policy network. 8 | The policy network is modify from the VGG network. 9 | @article{vgg, 10 | author = {Karen Simonyan and Andrew Zisserman}, 11 | title = {Very Deep Convolutional Networks for Large-Scale Image Recognition}, 12 | journal = {CoRR}, 13 | year = {2014}, 14 | url = {http://arxiv.org/abs/1409.1556}, 15 | } 16 | ''' 17 | 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | 21 | class Policy(nn.Module): 22 | # modify from VGG11 23 | def __init__(self, output_size, batch_norm=False): 24 | super(Policy, self).__init__() 25 | # minibatch*in_channels*iH*iW 26 | # bs*output_size 27 | self.features = nn.Sequential( 28 | # 3*50*50 29 | nn.Conv2d(3,64,kernel_size=3,padding=1), # default stride = 1 30 | nn.ReLU(True), 31 | # 64*50*50 32 | nn.Conv2d(64,64,kernel_size=3,padding=1), 33 | nn.ReLU(True), 34 | # 64*50*50 35 | nn.MaxPool2d(kernel_size=2,stride=2), # default stride = kernel size 36 | # 64*25*25 37 | nn.Conv2d(64,128,kernel_size=3,padding=1), 38 | nn.ReLU(True), 39 | # 128*25*25 40 | nn.Conv2d(128,128,kernel_size=3,padding=1), 41 | nn.ReLU(True), 42 | # 128*25*25 43 | nn.MaxPool2d(kernel_size=2,stride=2), 44 | # 128*12*12 45 | nn.Conv2d(128,256,kernel_size=3,padding=1), 46 | nn.ReLU(True), 47 | # 256*12*12 48 | nn.Conv2d(256,256,kernel_size=3,padding=1), 49 | nn.ReLU(True), 50 | # 256*12*12 51 | nn.MaxPool2d(kernel_size=2,stride=2) 52 | # 256*6*6 53 | ) 54 | self.classifier = nn.Sequential( 55 | nn.Linear(256 * 6 * 6, 2048), 56 | nn.ReLU(True), 57 | nn.Dropout(), 58 | nn.Linear(2048, 2048), 59 | nn.ReLU(True), 60 | nn.Dropout(), 61 | nn.Linear(2048, output_size), 62 | ) 63 | self._initialize_weights() 64 | 65 | def _initialize_weights(self): 66 | for m in self.modules(): 67 | if isinstance(m, nn.Conv2d): 68 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 69 | if m.bias is not None: 70 | nn.init.constant_(m.bias, 0) 71 | elif isinstance(m, nn.BatchNorm2d): 72 | nn.init.constant_(m.weight, 1) 73 | nn.init.constant_(m.bias, 0) 74 | elif isinstance(m, nn.Linear): 75 | nn.init.normal_(m.weight, 0, 0.01) 76 | nn.init.constant_(m.bias, 0) 77 | 78 | def forward(self, x): 79 | x = self.features(x) 80 | x = x.view(x.size(0), -1) 81 | x = self.classifier(x) 82 | x = F.log_softmax(x,dim=1) 83 | return x 84 | 85 | def forward_without_softmax(self, x): 86 | x = self.features(x) 87 | x = x.view(x.size(0), -1) 88 | x = self.classifier(x) 89 | return x -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the preprocess part. 8 | ''' 9 | 10 | from graph import Graph 11 | from ilp_solver import ILPSolver 12 | 13 | def preprocess(tot_files): 14 | state_action_pair = [] 15 | # logger.info("Begin generating data...") 16 | for file_num in range(1,tot_files+1): 17 | generateData(file_num,state_action_pair) 18 | if file_num % 10 == 0: 19 | print("Generated %d / %d." % (file_num,tot_files)) 20 | # logger.info("Generated %d / %d." % (file_num,tot_files)) 21 | # logger.info("Finish generating data.") 22 | return state_action_pair 23 | 24 | def generateData(file_num,state_action_pair,state_size=(50,50)): 25 | graph = Graph("TCS") 26 | graph.read(open("./DAG/dag_%d.dot" % file_num,"r")) 27 | graph.initialize() 28 | graph.initial_schedule() 29 | if graph.getConstrainedL() > 50: 30 | print("File %d exceeds 50 latency." % file_num) 31 | return 32 | try: 33 | sol = open("./Sol/dag_%d.sol" % file_num,"r") 34 | ops = dict() 35 | for line in sol: 36 | op, cstep = map(int,line.split(", ")) 37 | ops[op] = cstep 38 | except: 39 | ilp = ILPSolver(file_num,graph.getMulDelay(),graph.getLf()) 40 | ops = ilp.getOptSchedule() 41 | for node in graph.revOrder: 42 | if ops[node.num] == node.cstep: 43 | continue 44 | for t in range(node.cstep+1,ops[node.num]+1): 45 | state_action_pair.append((graph.get_partial_state(state_size),node.num)) 46 | graph.schedule_node(node.num,t) 47 | # logger.debug("Node %d schedules on cstep %d" % (node.num,t)) -------------------------------------------------------------------------------- /rl.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the reinforcement learning (RL) part of the training pipeline. 8 | ''' 9 | 10 | import time, sys, os, argparse 11 | import random 12 | import torch 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | from logger import LogHandler 17 | from graph import Graph 18 | from policy import Policy 19 | from agent import Agent 20 | from dag_dataset import DagDataset 21 | 22 | parser = argparse.ArgumentParser(description="Deep-RL-Based HLS Scheduler (Reinforcement learning)") 23 | parser.add_argument("--mode", type=str, default="TCS", help="Scheduling mode: TCS or RCS (default TCS)") 24 | parser.add_argument("--lc", type=float, default=1, help="Latency factor used for TCS (default: 1)") 25 | parser.add_argument("--mul_delay", type=int, default=2, help="MUL delay (default: 2)") 26 | parser.add_argument("--episodes", type=int, default=1000, help="Max iteration episodes (default: 1000)") 27 | parser.add_argument("--input_graphs", type=int, default=3000, help="Number of input graphs? (default: 3000)") 28 | parser.add_argument("--batch_size", type=int, default=32, help="Batch size? (default: 32)") 29 | parser.add_argument("--timesteps", type=int, default=2500, help="Max timestep in one simulation (default: 2500)") 30 | parser.add_argument("--learning_rate", type=float, default=1e-3, help="Learning rate? (default: 1e-3)") 31 | parser.add_argument("--use_cuda", type=int, default=1, help="Use cuda? (default: True, the 1st GPU)") 32 | parser.add_argument("--use_network", type=str, default="", help="Use previous network? Input the name of the network. (default: None)") 33 | parser.add_argument("--test", type=int, default=-1, help="Test file num? (default: -1)") 34 | parser.add_argument("--stride", type=int, default=3, help="Stride of the kernel? (default: 3)") 35 | args = parser.parse_args() 36 | 37 | best_reward = 0 38 | 39 | STATE_SIZE = (50,50) 40 | device = torch.device(("cuda:%d" % (args.use_cuda-1)) if args.use_cuda != 0 else "cpu") 41 | agent = Agent(STATE_SIZE,use_network=args.use_network,device=device,lr=args.learning_rate) 42 | 43 | if args.test == -1: 44 | logger_num, logger = LogHandler("rl").getLogger() 45 | logger.info("Deep-RL-Based HLS Scheduler (Reinforcement Learning)") 46 | print("Logger num: %d" % logger_num) 47 | file_name = "_rl_" + time.strftime("%Y%m%d_") + str(logger_num) 48 | logger.info(agent.policy.features) 49 | logger.info(agent.policy.classifier) 50 | logger.info("NLLLoss + Adam") 51 | logger.info("Batch size: %d, Learning rate: %f" % (args.batch_size,args.learning_rate)) 52 | logger.info(Graph("TCS").reward) 53 | 54 | def train(episode): # Monte Carol REINFORCE 55 | global best_reward 56 | res_loss, res_reward = [], [] 57 | for i_graph in range(args.input_graphs//args.batch_size): 58 | all_log_probs, all_rewards = [], [] 59 | # simulate batch_size graphs 60 | for minibatch in range(args.batch_size): 61 | log_probs, rewards = [], [] 62 | graph = Graph(args.mode) # "TCS" 63 | graph.read(open("./DAG/dag_%d.dot" % (i_graph*args.batch_size+minibatch+1),"r")) 64 | graph.initialize() 65 | graph.initial_schedule() 66 | # one full trace \tau 67 | for timestep in range(args.timesteps): 68 | state = torch.Tensor(graph.get_partial_state(STATE_SIZE)).float().to(device) 69 | state = state.resize_((1,state.size()[0],state.size()[1],state.size()[2])) 70 | legalMove = graph.getLegalMove() 71 | if len(legalMove[0]) == 0: 72 | break 73 | log_prob, action = agent.get_action(state,legalMove) 74 | fes, reward = graph.schedule_node(action, graph.vertex if action >= graph.vertex else graph.adjlist[action].cstep + 1) 75 | log_probs.append(log_prob) 76 | rewards.append(reward) 77 | if fes == False: 78 | break 79 | all_log_probs.append(log_probs) 80 | all_rewards.append(np.array(rewards).astype(np.float)) 81 | # update policy 82 | loss = agent.update_weight(all_log_probs,all_rewards,baseline=False) # be careful that the rewards are not aligned 83 | avg_reward = np.array([x.sum() for x in all_rewards]).mean() 84 | res_loss.append(loss) 85 | res_reward.append(avg_reward) 86 | if i_graph % 10 == 0: 87 | print("Train - Episode %d, Batch: %d, Loss: %f, Reward: %f" % (episode,i_graph,loss,avg_reward)) 88 | logger.info("Train - Episode %d, Batch: %d, Loss: %f, Reward: %f" % (episode,i_graph,loss,avg_reward)) 89 | if best_reward < avg_reward: 90 | best_reward = avg_reward 91 | torch.save(agent.policy,"./Networks/policy" + file_name + "_best.pkl") 92 | del all_log_probs[:] 93 | del all_rewards[:] 94 | return (np.array(res_loss).mean(), np.array(res_reward).mean()) 95 | 96 | def test(file_num): 97 | print("Begin testing...") 98 | nrt, nrta, step = [], [], [] 99 | graph = Graph(args.mode,args.mul_delay) # "TCS" 100 | graph.setLatencyFactor(args.lc) 101 | graph.read(open("./DAG/dag_%d.dot" % file_num,"r")) 102 | graph.initialize() 103 | graph.initial_schedule() 104 | print("ASAP # of resources: MUL: %d, ALU: %d" % (graph.currNr["MUL"],graph.currNr["ALU"])) 105 | step.append(0) 106 | nrt.append(graph.currNr["MUL"]) 107 | nrta.append(graph.currNr["ALU"]) 108 | flag_in = False 109 | timestep = 0 110 | cnt_loop = 0 111 | stride = args.stride 112 | pos_num = [0] 113 | while pos_num[-1] + STATE_SIZE[0] <= graph.vertex: 114 | pos_num.append(pos_num[-1] + stride) 115 | print(pos_num) 116 | while timestep < args.timesteps: 117 | for i in pos_num: 118 | state = torch.Tensor(graph.get_partial_state(STATE_SIZE,pos=(i,0))).float().to(device) 119 | state = state.resize_((1,state.size()[0],state.size()[1],state.size()[2])) 120 | legalMove = graph.getLegalMove(pos=(i,0)) 121 | if cnt_loop >= len(pos_num): 122 | print("Early stop! No legal actions!") 123 | flag_in = True 124 | break 125 | if len(legalMove[0]) == 0: 126 | cnt_loop += 1 127 | continue 128 | cnt_loop = 0 129 | # log_prob, action = agent.get_sl_action(state) 130 | log_prob, action = agent.get_deterministic_action(state, legalMove) 131 | action += i 132 | fes, reward = graph.schedule_node(action, graph.vertex if action >= graph.vertex else graph.adjlist[action].cstep + 1) 133 | if fes == False: 134 | if action >= graph.vertex: 135 | print("Timestep %d: op %d (exceed), not available!" % (timestep+1,action)) 136 | else: 137 | print("Timestep %d: op %d move to %d, early stop!" % (timestep+1,action,graph.adjlist[action].cstep + 1)) 138 | flag_in = True 139 | break 140 | else: 141 | print("Timestep %d: op %d move to %d, reward: %f" % (timestep+1,action,graph.adjlist[action].cstep,reward)) 142 | step.append(timestep+1) 143 | nrt.append(graph.currNr["MUL"]) 144 | nrta.append(graph.currNr["ALU"]) 145 | timestep += 1 146 | if flag_in: 147 | break 148 | print("Finish testing.") 149 | print(graph.test_final()) 150 | print(graph.get_state()) 151 | graph.output() 152 | fig = plt.figure() 153 | ax = fig.add_subplot(111) 154 | l1 = ax.plot(step,nrt,label="MUL") 155 | l2 = ax.plot(step,nrta,label="ALU") 156 | ax.set_xlabel("Step") 157 | ax.set_ylabel("# of ops") 158 | # ax.set_title("%s" % input()) 159 | ax.legend(loc=1) 160 | fig.savefig("./fig_test_%d.pdf" % file_num,format="pdf") 161 | plt.show() 162 | return (nrt[0],nrta[0],graph.bestNr["MUL"],graph.bestNr["ALU"]) 163 | 164 | def visualization(results): 165 | res_r = np.array([x[0] for x in results]) 166 | res_l = np.array([x[1] for x in results]) 167 | np.save("./Loss/" + "reward" + file_name + ".npy",res_r) 168 | np.save("./Loss/" + "loss" + file_name + ".npy",res_l) 169 | fig = plt.figure() 170 | ax1 = fig.add_subplot(111) 171 | lns1 = ax1.plot(range(len(res_r)),res_r,label="Reward",color="b") 172 | ax2 = ax1.twinx() # this is the most important function 173 | lns2 = ax2.plot(range(len(res_l)),res_l,label="Loss",color="r") 174 | lns = lns1 + lns2 175 | labs = [l.get_label() for l in lns] 176 | ax1.legend(lns, labs, loc=0) 177 | fig.savefig("./Loss/" + "fig" + file_name + ".jpg") 178 | 179 | if args.test != -1: 180 | agent.policy.eval() 181 | res = [] 182 | # for i in range(10001,10021): 183 | # res.append(test(i)) 184 | res.append(test(i)) 185 | for x in res: 186 | print("%d %d %d %d %d %d" % (x[0],x[1],x[0]+x[1],x[2],x[3],x[2]+x[3])) 187 | sys.exit() 188 | 189 | logger.info("Begin training...") 190 | startTime = time.time() 191 | results = [] 192 | for episode in range(1,args.episodes+1): 193 | results.append(train(episode)) 194 | visualization(results) 195 | logger.info("Train Episode %d: Avg. Loss: %f, Avg. Reward: %f" % (episode,results[-1][0],results[-1][1])) 196 | print("Train Episode %d: Avg. Loss: %f, Avg. Reward: %f" % (episode,results[-1][0],results[-1][1])) 197 | torch.save(agent.policy,"./Networks/policy" + file_name +".pkl") 198 | usedTime = time.time() - startTime 199 | print("Finish %d / %d. Total time used: %f min. Rest of time: %f min." 200 | % (episode,args.episodes,usedTime/60,usedTime/60*args.episodes/episode-usedTime/60)) 201 | logger.info("Finish training.") -------------------------------------------------------------------------------- /sl.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2018 Hongzheng Chen 3 | E-mail: chenhzh37@mail2.sysu.edu.cn 4 | 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis. 6 | 7 | This file contains the supervised learning (SL) part of the training pipeline. 8 | ''' 9 | 10 | import time, sys, os, argparse 11 | import random 12 | import numpy as np 13 | import visdom 14 | import matplotlib.pyplot as plt 15 | from logger import LogHandler 16 | 17 | import torch 18 | import torch.nn as nn 19 | from torch.utils.data import DataLoader 20 | 21 | from graph import Graph 22 | from preprocess import preprocess 23 | from policy import Policy 24 | from dag_dataset import DagDataset 25 | 26 | parser = argparse.ArgumentParser(description="Deep-RL-Based HLS Scheduler (Supervised Learning)") 27 | parser.add_argument("--use_cuda", type=int, default=1, help="Use cuda? (default: True, the 1st GPU)") 28 | parser.add_argument("--input_graphs", type=int, default=3500, help="Number of input graphs? (default: 3500)") 29 | parser.add_argument("--batch_size", type=int, default=128, help="Batch size? (default: 128)") 30 | parser.add_argument("--learning_rate", type=float, default=5e-4, help="Learning rate? (default: 5e-4)") 31 | parser.add_argument("--epoch", type=int, default=10000, help="Number of epoch? (default: 10000)") 32 | parser.add_argument("--use_network", type=str, default="", help="Use previous network? Input the name of the network. (default: None)") 33 | args = parser.parse_args() 34 | 35 | logger_num, logger = LogHandler("sl").getLogger() 36 | logger.info("Deep-RL-Based HLS Scheduler (Supervised Learning)") 37 | print("Logger num: %d" % logger_num) 38 | device = torch.device(("cuda:%d" % (args.use_cuda-1)) if args.use_cuda != 0 else "cpu") 39 | file_name = "_sl_" + time.strftime("%Y%m%d_") + str(logger_num) 40 | 41 | STATE_SIZE = (50,50) 42 | 43 | if args.use_network == "": 44 | net = Policy(STATE_SIZE[0]).to(device) 45 | print("Build a new network!") 46 | else: 47 | try: 48 | net = torch.load("./Networks/" + args.use_network).to(device) 49 | print("Loaded %s." % args.use_network) 50 | logger.info("Pretrained network: %s (%s)" % (args.use_network,"gpu" if args.use_cuda else "cpu")) 51 | except: 52 | print("No such network named %s. Rebuild a new network!" % args.use_network) 53 | net = Policy(STATE_SIZE[0]).to(device) 54 | network_file = "./Networks/policy" + file_name + ".pkl" 55 | logger.info("New network: %s (%s)" % (network_file,"gpu" if args.use_cuda else "cpu")) 56 | criterion = nn.NLLLoss() 57 | optimizer = torch.optim.Adam(net.parameters(),lr=args.learning_rate) 58 | logger.info(net.features) 59 | logger.info(net.classifier) 60 | logger.info("NLLLoss (Negative Log likelihood loss) + Adam") 61 | logger.info("Batch size: %d, Learning rate: %f" % (args.batch_size,args.learning_rate)) 62 | 63 | best_accuracy = 0 64 | viz = visdom.Visdom() 65 | cur_batch_win, epoch_loss_win = None, None 66 | 67 | def train(epoch): 68 | global cur_batch_win 69 | net.train() 70 | total_correct = 0 71 | loss_list, batch_list = [], [] 72 | for i, (state, action) in enumerate(data_train_loader): 73 | state = torch.Tensor(state.float()).to(device) 74 | action = torch.Tensor(action.float()).type(torch.LongTensor).to(device) 75 | optimizer.zero_grad() 76 | output = net(state) 77 | # bs*50 <- bs labels 78 | loss = criterion(output,action) 79 | loss_list.append(loss.item()) 80 | batch_list.append(i+1) 81 | predict = output.data.max(1)[1] 82 | total_correct += predict.eq(action.data.view_as(predict)).sum() 83 | if i % 10 == 0: 84 | logger.info("Train - Epoch %d, Batch: %d, Loss: %f" % (epoch,i,loss.item())) 85 | if viz.check_connection(): 86 | cur_batch_win = viz.line(X=torch.FloatTensor(batch_list), Y=torch.FloatTensor(loss_list), 87 | win=cur_batch_win, name='current_batch_loss', 88 | update=(None if cur_batch_win is None else 'replace'), 89 | opts={'title': 'Epoch Loss Trace','xlabel': 'Batch Number','ylabel': 'Loss','width': 1200,'height': 600}) 90 | loss.backward() 91 | optimizer.step() 92 | avg_loss = np.array(loss_list).sum() / len(data_train_loader) 93 | accuracy = float(total_correct) / len(data_train) 94 | logger.info("Train Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy)) 95 | print("Train Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy)) 96 | return avg_loss 97 | 98 | def test(epoch): 99 | global best_accuracy 100 | net.eval() 101 | total_correct = 0 102 | avg_loss = 0.0 103 | for i, (state, action) in enumerate(data_test_loader): 104 | state = torch.Tensor(state.float()).to(device) 105 | action = torch.Tensor(action.float()).type(torch.LongTensor).to(device) 106 | output = net(state) 107 | avg_loss += criterion(output, action).item() # sum() 108 | predict = output.data.max(1)[1] 109 | total_correct += predict.eq(action.data.view_as(predict)).sum() 110 | avg_loss /= (len(data_test_loader)) 111 | accuracy = float(total_correct) / len(data_test) 112 | logger.info("Test Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy)) 113 | print("Test Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy)) 114 | if best_accuracy < accuracy: 115 | best_accuracy = accuracy 116 | torch.save(net,network_file[:-4]+"_best.pkl") 117 | return avg_loss 118 | 119 | def visualization(epoch,train_loss,test_loss): 120 | fig = plt.figure() 121 | ax = fig.add_subplot(111) 122 | ax.plot([i for i in range(1,epoch+1)],np.array(train_loss),label="train") 123 | ax.plot([i for i in range(1,epoch+1)],np.array(test_loss),label="test") 124 | ax.set_xlabel("Epoch") 125 | ax.set_ylabel("Loss") 126 | ax.legend() 127 | fig.savefig("./Loss/fig" + file_name + ".jpg") 128 | plt.cla() 129 | plt.close() 130 | np.save("./Loss/train_loss" + file_name + ".npy",np.array(train_loss)) 131 | np.save("./Loss/test_loss" + file_name + ".npy",np.array(test_loss)) 132 | 133 | state_action_pair = preprocess(args.input_graphs) 134 | random.shuffle(state_action_pair) # important to break out the corelation 135 | cut = int(0.96*len(state_action_pair)) 136 | data_train = DagDataset(state_action_pair[:cut]) 137 | data_test = DagDataset(state_action_pair[cut:]) 138 | data_train_loader = DataLoader(data_train,shuffle=True,batch_size=args.batch_size,num_workers=12) 139 | data_test_loader = DataLoader(data_test,shuffle=True,batch_size=args.batch_size,num_workers=12) 140 | print("# of train data: %d" % len(data_train)) 141 | print("# of test data: %d" % len(data_test)) 142 | logger.info("# of input graphs: %d" % args.input_graphs) 143 | logger.info("# of train data: %d" % len(data_train)) 144 | logger.info("# of test data: %d" % len(data_test)) 145 | startTime = time.time() 146 | logger.info("Begin training...") 147 | train_loss = [] 148 | test_loss = [] 149 | for epoch in range(1,args.epoch+1): 150 | train_loss.append(train(epoch)) 151 | test_loss.append(test(epoch)) 152 | visualization(epoch,train_loss,test_loss) 153 | torch.save(net,network_file) 154 | usedTime = time.time() - startTime 155 | print("Finish %d / %d. Total time used: %f min. Rest of time: %f min." 156 | % (epoch,args.epoch,usedTime/60,usedTime/60*args.epoch/epoch-usedTime/60)) 157 | logger.info("Finish training.") --------------------------------------------------------------------------------