├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── agent.py
├── dag_dataset.py
├── dag_generator.py
├── graph.py
├── ilp_solver.py
├── logger.py
├── node.py
├── policy.py
├── preprocess.py
├── rl.py
└── sl.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep-Reinforcement-Learning-Based Scheduler for High-Level Synthesis
 2 | 
 3 | This is the implementation of Deep-Reinforcement-Learning-Based Scheduler for High-Level Synthesis (HLS). This work has been published in ICCAD'19.
 4 | 
 5 | ```
 6 | @inproceedings{DBLP:conf/iccad/ChenS19,
 7 |   author    = {Hongzheng Chen and
 8 |                Minghua Shen},
 9 |   editor    = {David Z. Pan},
10 |   title     = {A Deep-Reinforcement-Learning-Based Scheduler for {FPGA} {HLS}},
11 |   booktitle = {Proceedings of the International Conference on Computer-Aided Design,
12 |                {ICCAD} 2019, Westminster, CO, USA, November 4-7, 2019},
13 |   pages     = {1--8},
14 |   publisher = {{ACM}},
15 |   year      = {2019},
16 |   url       = {https://doi.org/10.1109/ICCAD45719.2019.8942126},
17 |   doi       = {10.1109/ICCAD45719.2019.8942126},
18 | }
19 | ```
20 | 
21 | To run the program, please follow the instructions below.
22 | 
23 | ```bash
24 | # Generate DAGs for supervised learning
25 | $ python3 dag_generator.py
26 | 
27 | # Supervised learning
28 | $ python3 sl.py
29 | 
30 | # Reinforcement learning
31 | # Use --use_network to pass in pre-trained SL networks
32 | $ python3 rl.py
33 | ```
34 | 
35 | Prepare the test DAGs in `DAG` folder and name them as `dag_X.dot` (where `X` should be a number different from those DAGs in the training set).
36 | 
37 | ```bash
38 | # Test the Xth DAG
39 | $ python3 rl.py --test X
40 | ```
41 | 
42 | Other parameter settings can be found in the source code.
43 | 
44 | 
45 | ## Requirements
46 | * Python 3.6
47 | * Pytorch v0.4
48 | * Visdom v0.1
49 | * Pulp v1.6.8
50 | * Numpy v1.14
51 | * Matplotlib v2.2.2


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2018 Hongzheng Chen
  3 | E-mail: chenhzh37@mail2.sysu.edu.cn
  4 | 
  5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
  6 | 
  7 | This file contains the Agent class.
  8 | '''
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | from torch.distributions import Categorical
 15 | import numpy as np
 16 | from policy import Policy
 17 | 
 18 | class Agent(object):
 19 | 	def __init__(self, state_size, use_network="", device="cuda",lr=5e-4):
 20 | 		super(Agent, self).__init__()
 21 | 		self.device = device
 22 | 		if use_network == "":
 23 | 			net = Policy(state_size[0]).to(self.device)
 24 | 			print("Build a new network!")
 25 | 		else:
 26 | 			try:
 27 | 				net = torch.load("./Networks/" + use_network).to(self.device)
 28 | 				net.classifier = nn.Sequential(*list(net.classifier.children())[:-1]) # delete the softmax layer
 29 | 				print("Loaded %s." % use_network)
 30 | 			except:
 31 | 				net = Policy(state_size[0]).to(self.device)
 32 | 				print("No such network named %s. Rebuild a new network!" % use_network)
 33 | 		self.policy = net
 34 | 		# self.policy = net.eval() # avoid dropout
 35 | 		self.optimizer = optim.Adam(self.policy.parameters(),lr=lr)
 36 | 
 37 | 	def get_sl_action(self, state):
 38 | 		output = self.policy(state) # bs(1)*50
 39 | 		# randomly select
 40 | 		action = torch.topk(output,1)
 41 | 		action = action[1] # op
 42 | 		criterion = nn.NLLLoss()
 43 | 		nllloss = criterion(output,torch.Tensor([action]).type(torch.LongTensor).to(self.device).resize_((1,)))
 44 | 		return nllloss, action
 45 | 
 46 | 	def get_action(self, state, legal_move):
 47 | 		output = self.policy.forward_without_softmax(state) # bs(1)*50
 48 | 		legal_move_dict = legal_move[1]
 49 | 		legal_move = torch.tensor(legal_move[0]).long().to(self.device)
 50 | 		legal_prob = torch.index_select(output,1,legal_move)
 51 | 		# randomly select
 52 | 		if len(legal_prob.shape) == 2 and legal_prob.shape[1] != 1:
 53 | 			m = Categorical(F.softmax(legal_prob,dim=1))
 54 | 			index = m.sample().item()
 55 | 		else:
 56 | 			index = 0
 57 | 		action = legal_move_dict[index]
 58 | 		criterion = nn.NLLLoss()
 59 | 		nllloss = criterion(F.log_softmax(legal_prob,dim=1),torch.Tensor([index]).type(torch.LongTensor).to(self.device).resize_((1,)))
 60 | 		del output
 61 | 		return nllloss, action # log_prob, action
 62 | 
 63 | 	def get_deterministic_action(self, state, legal_move):
 64 | 		output = self.policy(state) # bs(1)*50
 65 | 		legal_move_dict = legal_move[1]
 66 | 		legal_move = torch.tensor(legal_move[0]).long().to(self.device)
 67 | 		legal_prob = torch.index_select(output,1,legal_move)
 68 | 		action = torch.topk(legal_prob,1)
 69 | 		action = action[1] # op
 70 | 		if len(legal_prob.shape) == 2 and legal_prob.shape[1] != 1:
 71 | 			action = legal_move_dict[action.item()]
 72 | 		else:
 73 | 			action = legal_move_dict[0]
 74 | 		log_prob = output[0][action] # requires_grad
 75 | 		return log_prob, action
 76 | 
 77 | 	def update_weight(self, all_log_probs, all_rewards, baseline=False):
 78 | 		gamma = 0.99
 79 | 		eps = np.finfo(np.float32).eps.item()
 80 | 		tot_loss = []
 81 | 		res_rewards, avg_reward = [], []
 82 | 		# baseline `1/N\sum_{i=1}^N r(\tau)`
 83 | 		for log_prob, temp_rewards in zip(all_log_probs,all_rewards):
 84 | 			# a full trace \tau
 85 | 			R = 0
 86 | 			rewards = []
 87 | 			for r in temp_rewards[::-1]:
 88 | 				R = r + gamma * R
 89 | 				rewards.insert(0, R)
 90 | 			avg_reward.append(rewards[0]) # r(\tau)
 91 | 			res_rewards.append(rewards)
 92 | 		if baseline:
 93 | 			avg_reward = np.array(avg_reward).mean()
 94 | 		else:
 95 | 			avg_reward = 0
 96 | 		for log_prob, rewards in zip(all_log_probs,res_rewards):
 97 | 			rewards = torch.tensor(rewards).to(self.device)
 98 | 			rewards = rewards - avg_reward # minus baseline
 99 | 			loss = torch.Tensor([0]).float().to(self.device)
100 | 			for step, (nllloss, reward) in enumerate(zip(log_prob,rewards)):
101 | 				# if prob is very small (say 0.01) then -log(prob) is extremely large
102 | 				# reward needs to be small to make loss small
103 | 				loss += nllloss * reward # minus!
104 | 			tot_loss.append(loss)
105 | 			# tot_loss.append(torch.dot(torch.tensor(log_prob).to(self.device),rewards))
106 | 		# backpropagate
107 | 		self.optimizer.zero_grad()
108 | 		# loss = torch.stack(tot_loss, dim=0).sum() / len(tot_loss)
109 | 		tot_loss = torch.cat(tot_loss).mean() # sum()
110 | 		tot_loss.backward()
111 | 		self.optimizer.step()
112 | 		res = tot_loss.item()
113 | 		del tot_loss
114 | 		return res


--------------------------------------------------------------------------------
/dag_dataset.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the DagDataset class.
 8 | '''
 9 | 
10 | import numpy as np
11 | from torch.utils.data import Dataset
12 | 
13 | class DagDataset(Dataset):
14 | 	def __init__(self,state_action_pair):
15 | 		super(DagDataset, self).__init__()
16 | 		self.state_action_pair = np.array(state_action_pair)
17 | 
18 | 	def __len__(self):
19 | 		return len(self.state_action_pair)
20 | 
21 | 	def __getitem__(self, idx):
22 | 		eps = np.finfo(np.float32).eps.item()
23 | 		state = np.array(self.state_action_pair[idx][0]).astype(np.float64)
24 | 		state = (state - state.mean(axis = 0)) / (state.std(axis = 0) + eps)
25 | 		return (state,np.array(self.state_action_pair[idx][1]))


--------------------------------------------------------------------------------
/dag_generator.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the random DAG generator.
 8 | '''
 9 | from graph import Graph
10 | import random
11 | 
12 | NUM_GRAPH = 5000
13 | 
14 | class DAGGen(object):
15 | 	def __init__(self, num, tot_node=50, min_per_layer=1, max_per_layer=5, link_rate=0.5, mul_rate=0.3):
16 | 		res = "digraph {\n"
17 | 		res += "    node [fontcolor=black]\n"
18 | 		res += "    property [mul=%d,lf=%.1f]\n" % (random.randint(2,5),random.uniform(1.0,2.0))
19 | 		nowNode = 0
20 | 		edges = []
21 | 		pre_layer = []
22 | 		while nowNode < tot_node:
23 | 			newNode = random.randint(min_per_layer, max_per_layer)
24 | 			if nowNode + newNode > tot_node:
25 | 				newNode = tot_node - nowNode
26 | 			cur_layer = []
27 | 			for i in range(nowNode,nowNode + newNode):
28 | 				cur_layer.append(i)
29 | 			for j in pre_layer:
30 | 				for k in cur_layer:
31 | 					if random.random() < link_rate:
32 | 						edges.append((j,k))
33 | 			pre_layer = cur_layer[:]
34 | 			nowNode += newNode
35 | 		for i in range(tot_node):
36 | 			if random.random() < mul_rate:
37 | 				typename = "mul"
38 | 			else:
39 | 				typename = "add"
40 | 			res += "    %d [ label = %s ];\n" % (i, typename)
41 | 		for (step,edge) in enumerate(edges):
42 | 			res += "    %d -> %d [ name = %d ];\n" % (edge[0],edge[1],step)
43 | 		res += "}\n"
44 | 		output = open("./DAG/dag_" + str(num) + ".dot","w")
45 | 		output.write(res)
46 | 		output.close()
47 | 
48 | for i in range(1,NUM_GRAPH+1):
49 | 	DAGGen(i,tot_node=random.randint(10,50),mul_rate=random.uniform(0.3,0.5))
50 | 	if i % 100 == 0:
51 | 		print("Generated %d / %d DAGs." % (i,NUM_GRAPH))


--------------------------------------------------------------------------------
/graph.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2018 Hongzheng Chen
  3 | E-mail: chenhzh37@mail2.sysu.edu.cn
  4 | 
  5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
  6 | 
  7 | This file contains the definition and implementation of Graph class.
  8 | '''
  9 | 
 10 | import re, sys
 11 | import numpy as np
 12 | from node import Node
 13 | 
 14 | class Graph(object):
 15 | 	def __init__(self, mode, mul=2):
 16 | 		self.mode = mode
 17 | 		self.mul_delay = mul
 18 | 		self._LC = 1
 19 | 		self.vertex = 0
 20 | 		self.edge = 0
 21 | 		self.adjlist = []
 22 | 		self.depth = 0
 23 | 		self.order = []
 24 | 		self.revOrder = []
 25 | 		self.totLatency = 0
 26 | 		self.numScheduledOp = 0
 27 | 		# state[0]: Current schedule
 28 | 		# state[1]: Current possible move
 29 | 		# state[2]: All possible move
 30 | 		self.state = []
 31 | 		# reward and punishment
 32 | 		self.reward = dict()
 33 | 		self.reward["penalty"] = 0
 34 | 		self.reward["small"] = 0
 35 | 		self.reward["nothing"] = 0
 36 | 
 37 | 	def setLatencyFactor(self,lc):
 38 | 		self._LC = lc
 39 | 
 40 | 	def setConstrainedL(self,conL):
 41 | 		self.CONSTRAINED_L = conL
 42 | 
 43 | 	def getConstrainedL(self):
 44 | 		return self.CONSTRAINED_L+1
 45 | 
 46 | 	def getMulDelay(self):
 47 | 		return self.mul_delay
 48 | 
 49 | 	def getLf(self):
 50 | 		return self._LC
 51 | 
 52 | 	def setMAXRESOURCE(self,r):
 53 | 		self.maxNr = {"MUL":r[0], "ALU":r[1]}
 54 | 		print("Constrained resources: MUL: %d ALU: %d" % (self.maxNr["MUL"],self.maxNr["ALU"]))
 55 | 
 56 | 	def initialize(self):
 57 | 		self.dfs() # obtain CONSTRAINED_L
 58 | 		self.currNr = {"MUL":0, "ALU":0}
 59 | 		self.bestNr = {"MUL":0x3f3f3f, "ALU":0x3f3f3f}
 60 | 		self.nrt = {"MUL":np.array([0]*(self.CONSTRAINED_L+1)), "ALU":np.array([0]*(self.CONSTRAINED_L+1))}
 61 | 
 62 | 	def read(self,infile):
 63 | 		# print("Begin parsing...")
 64 | 		for line in infile:
 65 | 			if not ("label" in line or "name" in line):
 66 | 				if "property" in line:
 67 | 					res = re.split("=|,|\\].*",line)
 68 | 					self.mul_delay = int(res[1])
 69 | 					self.setLatencyFactor(float(res[3]))
 70 | 				else:
 71 | 					continue
 72 | 			elif "label" in line:
 73 | 				res = re.split(" *\\[ *label *= *| *\\];| +",line)
 74 | 				op, op_type = res[1], res[2]
 75 | 				self.add_vertex(op,op_type)
 76 | 			else:
 77 | 				res = re.split(" *\\[ *name *= *| *\\];| *-> *| +",line)
 78 | 				src, des = res[1], res[2]
 79 | 				self.add_edge(src,des)
 80 | 		# print("Finish parsing!")
 81 | 
 82 | 	def mapR(self,type_,mode=0):
 83 | 		if (type_ == "mul" or type_ == "MUL" or type_ == "div" or type_ == "DIV"):
 84 | 			return ("MUL" if mode == 0 else 0)
 85 | 		else:
 86 | 			return ("ALU" if mode == 0 else 1)
 87 | 
 88 | 	def add_vertex(self,name_,type_):
 89 | 		delay = 1
 90 | 		if self.mapR(type_) == "MUL":
 91 | 			delay = self.mul_delay
 92 | 		v = Node(self.vertex,name_,type_,delay)
 93 | 		self.vertex += 1
 94 | 		self.adjlist.append(v)
 95 | 
 96 | 	def add_edge(self,src,des):
 97 | 		for i in range(len(self.adjlist)):
 98 | 			if self.adjlist[i].name == src:
 99 | 				for j in range(len(self.adjlist)):
100 | 					if self.adjlist[j].name == des:
101 | 						self.adjlist[i].succ.append(j)
102 | 						self.adjlist[j].pred.append(i)
103 | 						self.edge += 1
104 | 						break
105 | 
106 | 	def dfsASAP(self,num):
107 | 		if self.mark[num]:
108 | 			return
109 | 		if len(self.adjlist[num].pred) == 0:
110 | 			self.adjlist[num].setASAP(-1,0)
111 | 		else:
112 | 			for j in self.adjlist[num].pred:
113 | 				self.dfsASAP(j)
114 | 				self.adjlist[num].setASAP(j,self.adjlist[j].getASAP() + self.adjlist[j].delay)
115 | 		self.depth = max(self.adjlist[num].getASAP() + self.adjlist[num].delay - 1, self.depth)
116 | 		if self.mode == "TCS":
117 | 			self.setConstrainedL(int((self.depth)*self._LC))
118 | 		else:
119 | 			self.setConstrainedL(self.CONSTRAINED_L)
120 | 		self.mark[num] = True
121 | 		self.order.append(self.adjlist[num])
122 | 
123 | 	def dfsALAP(self,num):
124 | 		if self.mark[num]:
125 | 			return
126 | 		if len(self.adjlist[num].succ) == 0:
127 | 			# CONSTRAINED_L is used here, dfsASAP must be done first
128 | 			self.adjlist[num].setALAP(-1, self.CONSTRAINED_L - self.adjlist[num].delay + 1)
129 | 		else:
130 | 			for j in self.adjlist[num].succ:
131 | 				self.dfsALAP(j)
132 | 				self.adjlist[num].setALAP(j,self.adjlist[j].getALAP() - self.adjlist[num].delay)
133 | 		self.mark[num] = True
134 | 		self.revOrder.append(self.adjlist[num])
135 | 
136 | 	def dfs(self):
137 | 		# print("Begin DFS...")
138 | 		self.mark = np.zeros(self.vertex,dtype=bool)
139 | 		for i in range(len(self.adjlist)):
140 | 			if len(self.adjlist[i].succ) == 0:
141 | 				self.dfsASAP(i)
142 | 		self.mark = np.zeros(self.vertex,dtype=bool)
143 | 		for i in range(len(self.adjlist)):
144 | 			if len(self.adjlist[i].pred) == 0:
145 | 				self.dfsALAP(i)
146 | 		# print("Finish DFS.")
147 | 		# print("Constrained Latency is %d" % (self.CONSTRAINED_L+1))
148 | 
149 | 	def initial_schedule(self):
150 | 		# clear previous state
151 | 		self.totLatency = 0
152 | 		self.numScheduledOp = 0
153 | 		self.currNr = {"MUL":0, "ALU":0}
154 | 		self.bestNr = {"MUL":0x3f3f3f, "ALU":0x3f3f3f}
155 | 		self.nrt = {"MUL":np.array([0]*(self.CONSTRAINED_L+1)), "ALU":np.array([0]*(self.CONSTRAINED_L+1))}
156 | 		for i in range(len(self.adjlist)):
157 | 			self.adjlist[i].initial()
158 | 		# reschedule
159 | 		self.state = np.zeros((3,self.vertex,self.CONSTRAINED_L+1))
160 | 		for i in range(self.vertex):
161 | 			self.state[1:3,i,self.adjlist[i].getASAP():self.adjlist[i].getALAP() + self.adjlist[i].delay] = 1
162 | 		for i in range(self.vertex):
163 | 			self.schedule_node(i,self.adjlist[i].getASAP(),0)
164 | 
165 | 	def schedule_node(self,op,step,mode=1):
166 | 		if not self.test_val(op,step):
167 | 			return False, self.reward["penalty"]
168 | 		reward = 0
169 | 		tempR = self.mapR(self.adjlist[op].type)
170 | 		tempNum = self.mapR(self.adjlist[op].type,1)
171 | 		# remove old state
172 | 		oldOpNr = 0
173 | 		for d in range(self.adjlist[op].delay):
174 | 			oldOpNr += self.nrt[tempR][self.adjlist[op].cstep + d]
175 | 		if mode == 1:
176 | 			self.numScheduledOp += 1
177 | 			for d in range(self.adjlist[op].delay):
178 | 				# since the op initially placed here, so it should be at least WA
179 | 				self.state[0,op,self.adjlist[op].cstep + d] = 0
180 | 				self.nrt[tempR][self.adjlist[op].cstep + d] -= 1
181 | 		# current operation
182 | 		self.adjlist[op].schedule(step)
183 | 		delay = self.adjlist[op].delay
184 | 		for d in range(delay):
185 | 			self.nrt[tempR][step + d] += 1
186 | 		self.state[0,op,step:step+delay] = 1
187 | 		self.state[1,op,step:step+delay] = 0
188 | 		self.state[1,op,self.adjlist[op].getASAP():step] = 1
189 | 		self.state[1,op,step+delay:self.adjlist[op].getALAP()+delay] = 1
190 | 		# other influenced operations
191 | 		for vpred in self.adjlist[op].pred:
192 | 			tempALAP = self.adjlist[vpred].getALAP()
193 | 			d = self.adjlist[vpred].delay
194 | 			self.adjlist[vpred].setALAP(op,step - d)
195 | 			currALAP = self.adjlist[vpred].getALAP()
196 | 			self.state[1,vpred,min(tempALAP,currALAP)+d:max(tempALAP,currALAP)+d] = 0 if currALAP < tempALAP else 1
197 | 			if currALAP > tempALAP:
198 | 				reward += self.reward["small"]
199 | 		for vsucc in self.adjlist[op].succ:
200 | 			tempASAP = self.adjlist[vsucc].getASAP()
201 | 			self.adjlist[vsucc].setASAP(op,step + self.adjlist[op].delay)
202 | 			currASAP = self.adjlist[vsucc].getASAP()
203 | 			self.state[1,vsucc,min(tempASAP,currASAP):max(tempASAP,currASAP)] = 0 if currASAP > tempASAP else 1
204 | 			if currASAP < tempASAP:
205 | 				reward += self.reward["small"]
206 | 		self.totLatency = max(self.totLatency, step + self.adjlist[op].delay) # step start from 0
207 | 		oldNr = self.currNr[tempR]
208 | 		self.currNr[tempR] = self.nrt[tempR].max()
209 | 		if mode != 0:
210 | 			if self.currNr["MUL"] != 0 and self.currNr["ALU"] != 0 and self.currNr["MUL"] + self.currNr["ALU"] <= self.bestNr["MUL"] + self.bestNr["ALU"]:
211 | 				self.bestNr["MUL"], self.bestNr["ALU"] = self.currNr["MUL"], self.currNr["ALU"]
212 | 		newOpNr = 0
213 | 		for d in range(self.adjlist[op].delay):
214 | 			newOpNr += self.nrt[tempR][self.adjlist[op].cstep + d]
215 | 		# early stop
216 | 		cnt = 0
217 | 		legal_move = self.getAllLegalMove()[0]
218 | 		for legal_op in legal_move:
219 | 			legal_op = self.adjlist[legal_op]
220 | 			typeR = self.mapR(legal_op.type)
221 | 			if (self.nrt[typeR][legal_op.cstep+1:legal_op.cstep+1+legal_op.delay] + 1
222 | 				> self.currNr[typeR]).any():
223 | 				cnt += 1
224 | 		if cnt >= len(legal_move):
225 | 			return False, self.reward["nothing"]
226 | 		# final reward
227 | 		if self.mode == "RCS":
228 | 			reward += 10 / self.totLatency
229 | 		else:
230 | 			reward += oldNr - self.currNr[tempR]
231 | 			# reward += (oldOpNr - newOpNr)/5
232 | 		return True, reward
233 | 
234 | 	# mode 0: without recursion
235 | 	# mode 1: recursion
236 | 	def test_val(self,op,step,mode=0):
237 | 		if op < 0 or op >= self.vertex:
238 | 			return False
239 | 		tempR = self.mapR(self.adjlist[op].type)
240 | 		# Constraints
241 | 		if self.mode == "RCS":
242 | 			if self.nrt[tempR][step] + 1 > self.maxNr[tempR]:
243 | 				return False
244 | 		else:
245 | 			if step + self.adjlist[op].delay - 1 > self.CONSTRAINED_L:
246 | 				return False
247 | 		if mode == 1:
248 | 			return True
249 | 		if self.adjlist[op].getASAP() > step or self.adjlist[op].getALAP() < step:
250 | 			return False
251 | 		for vsucc in self.adjlist[op].succ:
252 | 			vsucc = self.adjlist[vsucc]
253 | 			if vsucc.cstep > -1 and step + self.adjlist[op].delay - 1 >= vsucc.cstep:
254 | 				return False
255 | 		for vpred in self.adjlist[op].pred:
256 | 			vpred = self.adjlist[vpred]
257 | 			if vpred.cstep > -1 and vpred.cstep + vpred.delay > step:
258 | 				return False
259 | 		return True
260 | 
261 | 	def schedule_node_recursion(self,op,step): # only support top-down
262 | 		if not self.test_val(op,step,1):
263 | 			return False, self.reward["penalty"]
264 | 		delay = self.adjlist[op].delay
265 | 		if not self.state[2,op,step:step+delay].all():
266 | 			return False, self.reward["penalty"]
267 | 		elif self.state[1,op,step:step+delay].all(): # the final operation that needn't move
268 | 			return self.schedule_node(op,step)
269 | 		if step < self.adjlist[op].cstep:
270 | 			return True, 0
271 | 		tot_reward = 0
272 | 		for vsucc in self.adjlist[op].succ: # move the operations backward
273 | 			if self.adjlist[vsucc].cstep < step + delay:
274 | 				fes, reward = self.schedule_node_recursion(vsucc,step+delay)
275 | 				if fes == False:
276 | 					return fes, reward
277 | 				else:
278 | 					tot_reward += reward
279 | 		fes, reward = self.schedule_node(op,step)
280 | 		if fes == False:
281 | 			return fes, reward
282 | 		else:
283 | 			tot_reward += reward
284 | 			return fes, tot_reward
285 | 
286 | 	def test_final(self):
287 | 		flag = True
288 | 		for v in self.adjlist:
289 | 			for vsucc in v.succ:
290 | 				vsucc = self.adjlist[vsucc]
291 | 				if v.cstep + v.delay - 1 >= vsucc.cstep:
292 | 					flag = False
293 | 					print("Schedule conflicts with Node %d(%s) and Node %d(%s)." % (v.num,v.name,vsucc.num,vsucc.name))
294 | 					return flag
295 | 		return flag
296 | 
297 | 	def get_state(self):
298 | 		return self.state
299 | 
300 | 	def get_partial_state(self,size,pos=(0,0)):
301 | 		res = np.zeros((3,size[0],size[1]))
302 | 		x = min(self.state.shape[1]-pos[0],size[0])
303 | 		y = min(self.state.shape[2]-pos[1],size[1])
304 | 		res[:,0:x,0:y] = np.copy(self.state)[:,pos[0]:x+pos[0],pos[1]:y+pos[1]]
305 | 		return res
306 | 
307 | 	def getNrt(self):
308 | 		return self.nrt
309 | 
310 | 	def getAllLegalMove(self):
311 | 		res = []
312 | 		res_dict = dict()
313 | 		cnt = 0
314 | 		for (op,row) in enumerate(self.get_state()[1,:,:]):
315 | 			if (row[self.adjlist[op].cstep:] == 1).any(): # backward!
316 | 				res.append(op)
317 | 				res_dict[cnt] = op
318 | 				cnt += 1
319 | 		return (res,res_dict)
320 | 
321 | 	def getLegalMove(self,pos=(0,0)):
322 | 		res = []
323 | 		res_dict = dict()
324 | 		cnt = 0
325 | 		for (op,row) in enumerate(self.get_state()[1,:,:]):
326 | 			if pos[0] <= op < pos[0] + 50: # 50!
327 | 				if (row[max(pos[1],self.adjlist[op].cstep):] == 1).any(): # backward!
328 | 					res.append(op-pos[0])
329 | 					res_dict[cnt] = op - pos[0]
330 | 					cnt += 1
331 | 		return (res,res_dict)
332 | 
333 | 	def output_adjlist(self):
334 | 		print("Adjacent List:")
335 | 		for v in self.adjlist:
336 | 			print("Node %d(%s):" % (v.num,v.name),end=" ")
337 | 			for op in v.succ:
338 | 				print(op+1,end=" ")
339 | 			print()
340 | 
341 | 	def output_axap(self):
342 | 		print("AXAP:")
343 | 		for v in self.adjlist:
344 | 			print("Node %d(%s): [%d, %d]" % (v.num,v.name,v.getASAP(),v.getALAP()))
345 | 
346 | 	def output(self):
347 | 		print("# of operations: %d" % self.vertex)
348 | 		print("Latency factor: %f, CONSTRAINED_L: %d, Mul_delay: %d" % (self._LC,self.CONSTRAINED_L+1,self.mul_delay))
349 | 		print("Best # of resources: MUL: %d, ALU: %d" % (self.bestNr["MUL"], self.bestNr["ALU"]))
350 | 		print("Current # of resources: MUL: %d, ALU: %d" % (self.currNr["MUL"], self.currNr["ALU"]))
351 | 		print("Latency: %d" % self.totLatency)
352 | 		print("Schedule: ")
353 | 		for v in self.adjlist:
354 | 			print("Node %d(%s): %d" % (v.num,v.name,v.cstep))


--------------------------------------------------------------------------------
/ilp_solver.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the ILP solver for HLS scheduling.
 8 | '''
 9 | 
10 | import pulp
11 | from graph import Graph
12 | 
13 | class ILPSolver(object):
14 | 	def __init__(self, file_num, mul_delay=2, lf=1.0):
15 | 		self.schedule = dict()
16 | 		g = Graph("TCS",mul_delay)
17 | 		g.setLatencyFactor(lf)
18 | 		with open("./DAG/dag_%d.dot" % file_num) as infile:
19 | 			g.read(infile)
20 | 		g.initialize()
21 | 		# print("Begin generating ILP formulas for time-constrained scheduling problem...")
22 | 		prob = pulp.LpProblem("Time-Constrained Scheduling Problem",pulp.LpMinimize)
23 | 		M1 = pulp.LpVariable("MUL",lowBound=1,upBound=None,cat=pulp.LpInteger)
24 | 		M2 = pulp.LpVariable("ALU",lowBound=1,upBound=None,cat=pulp.LpInteger)
25 | 		prob += M1 + M2, "Minimize the number of FUs"
26 | 		# Time frame constraints
27 | 		x = pulp.LpVariable.dicts("x",(range(len(g.adjlist)),range(g.getConstrainedL())),lowBound=0,upBound=1,cat=pulp.LpInteger)
28 | 		for (i,node) in enumerate(g.adjlist):
29 | 			prob += pulp.lpSum([x[i][t] for t in range(node.getASAP(),node.getALAP()+1)]) == 1, ""
30 | 		# print("Time frame constraints generated.")
31 | 		# Resource constraints
32 | 		rowR = []
33 | 		for i in range(g.getConstrainedL()):
34 | 			rowR.append({"ALU":[],"MUL":[]}) # be careful of share memory
35 | 		for (i,node) in enumerate(g.adjlist):
36 | 			for t in range(node.getASAP(),node.getALAP()+node.delay):
37 | 				rowR[t][g.mapR(node.type)].append(i)
38 | 		for t in range(g.getConstrainedL()):
39 | 			for typeR in ["ALU","MUL"]:
40 | 				if len(rowR[t][typeR]) < 2:
41 | 					continue
42 | 				else:
43 | 					prob += pulp.lpSum([x[i][td] for i in rowR[t][typeR]
44 | 						for td in range(max(t-g.adjlist[i].delay+1,0),t+1)]) - (M1 if typeR == "MUL" else M2)<= 0, ""
45 | 		# print("Resource constraints generated.")
46 | 		# Precedence constraints
47 | 		for (i,node) in enumerate(g.adjlist):
48 | 			for vsucc in node.succ:
49 | 				prob += (pulp.lpSum([(t+1)*x[i][t] for t in range(node.getASAP(),node.getALAP()+1)])
50 | 					- pulp.lpSum([(t+1)*x[vsucc][t] for t in range(g.adjlist[vsucc].getASAP(),g.adjlist[vsucc].getALAP()+1)])
51 | 					<= (-1)*node.delay), ""
52 | 		# print("Precedence constraints generated.")
53 | 		# print("Finish ILP generation.")
54 | 		prob.writeLP("./ILP_formulation/dag_%d.lp" % (file_num))
55 | 		prob.solve()
56 | 		# print("MUL = %d" % prob.variablesDict()["MUL"].varValue)
57 | 		# print("ALU = %d" % prob.variablesDict()["ALU"].varValue)
58 | 		out_file = open("./Sol/dag_%d.sol" % file_num,"w")
59 | 		for v in sorted(prob.variables(),key=lambda x: int(x.name.split("_")[1]) if len(x.name.split("_")) != 1 else 0):
60 | 			if v.name[0] == "x" and v.varValue == 1:
61 | 				op = v.name.split("_")[1]
62 | 				cstep = v.name.split("_")[-1]
63 | 				self.schedule[int(op)] = int(cstep)
64 | 				out_file.write("%s, %s\n" % (op,cstep))
65 | 		out_file.close()
66 | 		# print("Status:", pulp.LpStatus[prob.status])
67 | 
68 | 	def getOptSchedule(self):
69 | 		return self.schedule


--------------------------------------------------------------------------------
/logger.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains logger used for recording the training process.
 8 | '''
 9 | 
10 | import re, os, logging
11 | 
12 | class LogHandler(object):
13 | 	def __init__(self, name, level=logging.INFO):
14 | 		self.name = name
15 | 		self.logger = logging.getLogger(__name__)
16 | 		self.logger.setLevel(level)
17 | 		try:
18 | 			self.file_num = int(re.split("[_.]",sorted(os.listdir("./Log"),key=lambda x: x.split("_")[1])[-2])[1]) + 1
19 | 		except:
20 | 			self.file_num = 1
21 | 		self.file_name = "./Log/%s_%02d.log" % (self.name,self.file_num)
22 | 		self.handler = logging.FileHandler(self.file_name)
23 | 		self.formatter = logging.Formatter('%(asctime)s - %(message)s') # - %(name)s - %(levelname)s
24 | 		self.handler.setFormatter(self.formatter)
25 | 		self.logger.addHandler(self.handler)
26 | 
27 | 	def getLogger(self):
28 | 		return self.file_num, self.logger


--------------------------------------------------------------------------------
/node.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the definition and implementation of Graph class.
 8 | '''
 9 | 
10 | from copy import deepcopy
11 | 
12 | class OSPair(object):
13 | 	def __init__(self, op, step):
14 | 		super(OSPair, self).__init__()
15 | 		self.op = op
16 | 		self.step = step
17 | 
18 | 	def __repr__(self):
19 | 		return "Op: %d, Step: %d" % (self.op + 1,self.step + 1)
20 | 
21 | class Node(object):
22 | 	def __init__(self, num_, name_, type_, delay_):
23 | 		super(Node, self).__init__()
24 | 		self.num = num_
25 | 		self.name = name_
26 | 		self.type = type_
27 | 		self.delay = delay_
28 | 		self.pred = []
29 | 		self.succ = []
30 | 		self.asap = [] # the -1-th
31 | 		self.alap = [] # the 1-st
32 | 		self.iasap = []
33 | 		self.ialap = []
34 | 		self.cstep = -1
35 | 
36 | 	def initial(self):
37 | 		self.asap = []
38 | 		self.alap = []
39 | 		for p in self.iasap:
40 | 			self.asap.append(deepcopy(p)) # very important
41 | 		for p in self.ialap:
42 | 			self.alap.append(deepcopy(p))
43 | 		self.asap.sort(key=lambda x:x.step,reverse=True)
44 | 		self.alap.sort(key=lambda x:x.step)
45 | 		self.cstep = -1
46 | 
47 | 	def schedule(self, step):
48 | 		self.cstep = step
49 | 
50 | 	def setASAP(self,op,asap_):
51 | 		flag = False
52 | 		for i in range(len(self.asap)):
53 | 			if self.asap[i].op == op:
54 | 				self.asap[i].step = asap_
55 | 				flag = True
56 | 		if not flag:
57 | 			self.asap.append(OSPair(op,asap_))
58 | 			self.iasap.append(OSPair(op,asap_)) # different copies
59 | 		self.asap.sort(key=lambda x:x.step,reverse=True)
60 | 
61 | 	def setALAP(self,op,alap_):
62 | 		flag = False
63 | 		for i in range(len(self.alap)):
64 | 			if self.alap[i].op == op:
65 | 				self.alap[i].step = alap_
66 | 				flag = True
67 | 		if not flag:
68 | 			self.alap.append(OSPair(op,alap_))
69 | 			self.ialap.append(OSPair(op,alap_))
70 | 		self.alap.sort(key=lambda x:x.step)
71 | 
72 | 	def getASAP(self):
73 | 		return self.asap[0].step
74 | 
75 | 	def getALAP(self):
76 | 		return self.alap[0].step


--------------------------------------------------------------------------------
/policy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the architecture of the policy network.
 8 | The policy network is modify from the VGG network.
 9 | @article{vgg,
10 | 	author    = {Karen Simonyan and Andrew Zisserman},
11 | 	title     = {Very Deep Convolutional Networks for Large-Scale Image Recognition},
12 | 	journal   = {CoRR},
13 | 	year      = {2014},
14 | 	url       = {http://arxiv.org/abs/1409.1556},
15 | }
16 | '''
17 | 
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | 
21 | class Policy(nn.Module):
22 | 	# modify from VGG11
23 | 	def __init__(self, output_size, batch_norm=False):
24 | 		super(Policy, self).__init__()
25 | 		# minibatch*in_channels*iH*iW
26 | 		# bs*output_size
27 | 		self.features = nn.Sequential(
28 | 			# 3*50*50
29 | 			nn.Conv2d(3,64,kernel_size=3,padding=1), # default stride = 1
30 | 			nn.ReLU(True),
31 | 			# 64*50*50
32 | 			nn.Conv2d(64,64,kernel_size=3,padding=1),
33 | 			nn.ReLU(True),
34 | 			# 64*50*50
35 | 			nn.MaxPool2d(kernel_size=2,stride=2), # default stride = kernel size
36 | 			# 64*25*25
37 | 			nn.Conv2d(64,128,kernel_size=3,padding=1),
38 | 			nn.ReLU(True),
39 | 			# 128*25*25
40 | 			nn.Conv2d(128,128,kernel_size=3,padding=1),
41 | 			nn.ReLU(True),
42 | 			# 128*25*25
43 | 			nn.MaxPool2d(kernel_size=2,stride=2),
44 | 			# 128*12*12
45 | 			nn.Conv2d(128,256,kernel_size=3,padding=1),
46 | 			nn.ReLU(True),
47 | 			# 256*12*12
48 | 			nn.Conv2d(256,256,kernel_size=3,padding=1),
49 | 			nn.ReLU(True),
50 | 			# 256*12*12
51 | 			nn.MaxPool2d(kernel_size=2,stride=2)
52 | 			# 256*6*6
53 | 		)
54 | 		self.classifier = nn.Sequential(
55 | 			nn.Linear(256 * 6 * 6, 2048),
56 | 			nn.ReLU(True),
57 | 			nn.Dropout(),
58 | 			nn.Linear(2048, 2048),
59 | 			nn.ReLU(True),
60 | 			nn.Dropout(),
61 | 			nn.Linear(2048, output_size),
62 | 		)
63 | 		self._initialize_weights()
64 | 
65 | 	def _initialize_weights(self):
66 | 		for m in self.modules():
67 | 			if isinstance(m, nn.Conv2d):
68 | 				nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
69 | 				if m.bias is not None:
70 | 					nn.init.constant_(m.bias, 0)
71 | 			elif isinstance(m, nn.BatchNorm2d):
72 | 				nn.init.constant_(m.weight, 1)
73 | 				nn.init.constant_(m.bias, 0)
74 | 			elif isinstance(m, nn.Linear):
75 | 				nn.init.normal_(m.weight, 0, 0.01)
76 | 				nn.init.constant_(m.bias, 0)
77 | 
78 | 	def forward(self, x):
79 | 		x = self.features(x)
80 | 		x = x.view(x.size(0), -1)
81 | 		x = self.classifier(x)
82 | 		x = F.log_softmax(x,dim=1)
83 | 		return x
84 | 	
85 | 	def forward_without_softmax(self, x):
86 | 		x = self.features(x)
87 | 		x = x.view(x.size(0), -1)
88 | 		x = self.classifier(x)
89 | 		return x


--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2018 Hongzheng Chen
 3 | E-mail: chenhzh37@mail2.sysu.edu.cn
 4 | 
 5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
 6 | 
 7 | This file contains the preprocess part.
 8 | '''
 9 | 
10 | from graph import Graph
11 | from ilp_solver import ILPSolver
12 | 
13 | def preprocess(tot_files):
14 | 	state_action_pair = []
15 | 	# logger.info("Begin generating data...")
16 | 	for file_num in range(1,tot_files+1):
17 | 		generateData(file_num,state_action_pair)
18 | 		if file_num % 10 == 0:
19 | 			print("Generated %d / %d." % (file_num,tot_files))
20 | 			# logger.info("Generated %d / %d." % (file_num,tot_files))
21 | 	# logger.info("Finish generating data.")
22 | 	return state_action_pair
23 | 
24 | def generateData(file_num,state_action_pair,state_size=(50,50)):
25 | 	graph = Graph("TCS")
26 | 	graph.read(open("./DAG/dag_%d.dot" % file_num,"r"))
27 | 	graph.initialize()
28 | 	graph.initial_schedule()
29 | 	if graph.getConstrainedL() > 50:
30 | 		print("File %d exceeds 50 latency." % file_num)
31 | 		return
32 | 	try:
33 | 		sol = open("./Sol/dag_%d.sol" % file_num,"r")
34 | 		ops = dict()
35 | 		for line in sol:
36 | 			op, cstep = map(int,line.split(", "))
37 | 			ops[op] = cstep
38 | 	except:
39 | 		ilp = ILPSolver(file_num,graph.getMulDelay(),graph.getLf())
40 | 		ops = ilp.getOptSchedule()
41 | 	for node in graph.revOrder:
42 | 		if ops[node.num] == node.cstep:
43 | 			continue
44 | 		for t in range(node.cstep+1,ops[node.num]+1):
45 | 			state_action_pair.append((graph.get_partial_state(state_size),node.num))
46 | 			graph.schedule_node(node.num,t)
47 | 			# logger.debug("Node %d schedules on cstep %d" % (node.num,t))


--------------------------------------------------------------------------------
/rl.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2018 Hongzheng Chen
  3 | E-mail: chenhzh37@mail2.sysu.edu.cn
  4 | 
  5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
  6 | 
  7 | This file contains the reinforcement learning (RL) part of the training pipeline.
  8 | '''
  9 | 
 10 | import time, sys, os, argparse
 11 | import random
 12 | import torch
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from logger import LogHandler
 17 | from graph import Graph
 18 | from policy import Policy
 19 | from agent import Agent
 20 | from dag_dataset import DagDataset
 21 | 
 22 | parser = argparse.ArgumentParser(description="Deep-RL-Based HLS Scheduler (Reinforcement learning)")
 23 | parser.add_argument("--mode", type=str, default="TCS", help="Scheduling mode: TCS or RCS (default TCS)")
 24 | parser.add_argument("--lc", type=float, default=1, help="Latency factor used for TCS (default: 1)")
 25 | parser.add_argument("--mul_delay", type=int, default=2, help="MUL delay (default: 2)")
 26 | parser.add_argument("--episodes", type=int, default=1000, help="Max iteration episodes (default: 1000)")
 27 | parser.add_argument("--input_graphs", type=int, default=3000, help="Number of input graphs? (default: 3000)")
 28 | parser.add_argument("--batch_size", type=int, default=32, help="Batch size? (default: 32)")
 29 | parser.add_argument("--timesteps", type=int, default=2500, help="Max timestep in one simulation (default: 2500)")
 30 | parser.add_argument("--learning_rate", type=float, default=1e-3, help="Learning rate? (default: 1e-3)")
 31 | parser.add_argument("--use_cuda", type=int, default=1, help="Use cuda? (default: True, the 1st GPU)")
 32 | parser.add_argument("--use_network", type=str, default="", help="Use previous network? Input the name of the network. (default: None)")
 33 | parser.add_argument("--test", type=int, default=-1, help="Test file num? (default: -1)")
 34 | parser.add_argument("--stride", type=int, default=3, help="Stride of the kernel? (default: 3)")
 35 | args = parser.parse_args()
 36 | 
 37 | best_reward = 0
 38 | 
 39 | STATE_SIZE = (50,50)
 40 | device = torch.device(("cuda:%d" % (args.use_cuda-1)) if args.use_cuda != 0 else "cpu")
 41 | agent = Agent(STATE_SIZE,use_network=args.use_network,device=device,lr=args.learning_rate)
 42 | 
 43 | if args.test == -1:
 44 | 	logger_num, logger = LogHandler("rl").getLogger()
 45 | 	logger.info("Deep-RL-Based HLS Scheduler (Reinforcement Learning)")
 46 | 	print("Logger num: %d" % logger_num)
 47 | 	file_name = "_rl_" + time.strftime("%Y%m%d_") + str(logger_num)
 48 | 	logger.info(agent.policy.features)
 49 | 	logger.info(agent.policy.classifier)
 50 | 	logger.info("NLLLoss + Adam")
 51 | 	logger.info("Batch size: %d, Learning rate: %f" % (args.batch_size,args.learning_rate))
 52 | 	logger.info(Graph("TCS").reward)
 53 | 
 54 | def train(episode): # Monte Carol REINFORCE
 55 | 	global best_reward
 56 | 	res_loss, res_reward = [], []
 57 | 	for i_graph in range(args.input_graphs//args.batch_size):
 58 | 		all_log_probs, all_rewards = [], []
 59 | 		# simulate batch_size graphs
 60 | 		for minibatch in range(args.batch_size):
 61 | 			log_probs, rewards = [], []
 62 | 			graph = Graph(args.mode) # "TCS"
 63 | 			graph.read(open("./DAG/dag_%d.dot" % (i_graph*args.batch_size+minibatch+1),"r"))
 64 | 			graph.initialize()
 65 | 			graph.initial_schedule()
 66 | 			# one full trace \tau
 67 | 			for timestep in range(args.timesteps):
 68 | 				state = torch.Tensor(graph.get_partial_state(STATE_SIZE)).float().to(device)
 69 | 				state = state.resize_((1,state.size()[0],state.size()[1],state.size()[2]))
 70 | 				legalMove = graph.getLegalMove()
 71 | 				if len(legalMove[0]) == 0:
 72 | 					break
 73 | 				log_prob, action = agent.get_action(state,legalMove)
 74 | 				fes, reward = graph.schedule_node(action, graph.vertex if action >= graph.vertex else graph.adjlist[action].cstep + 1)
 75 | 				log_probs.append(log_prob)
 76 | 				rewards.append(reward)
 77 | 				if fes == False:
 78 | 					break
 79 | 			all_log_probs.append(log_probs)
 80 | 			all_rewards.append(np.array(rewards).astype(np.float))
 81 | 		# update policy
 82 | 		loss = agent.update_weight(all_log_probs,all_rewards,baseline=False) # be careful that the rewards are not aligned
 83 | 		avg_reward = np.array([x.sum() for x in all_rewards]).mean()
 84 | 		res_loss.append(loss)
 85 | 		res_reward.append(avg_reward)
 86 | 		if i_graph % 10 == 0:
 87 | 			print("Train - Episode %d, Batch: %d, Loss: %f, Reward: %f" % (episode,i_graph,loss,avg_reward))
 88 | 			logger.info("Train - Episode %d, Batch: %d, Loss: %f, Reward: %f" % (episode,i_graph,loss,avg_reward))
 89 | 		if best_reward < avg_reward:
 90 | 			best_reward = avg_reward
 91 | 			torch.save(agent.policy,"./Networks/policy" + file_name + "_best.pkl")
 92 | 		del all_log_probs[:]
 93 | 		del all_rewards[:]
 94 | 	return (np.array(res_loss).mean(), np.array(res_reward).mean())
 95 | 
 96 | def test(file_num):
 97 | 	print("Begin testing...")
 98 | 	nrt, nrta, step = [], [], []
 99 | 	graph = Graph(args.mode,args.mul_delay) # "TCS"
100 | 	graph.setLatencyFactor(args.lc)
101 | 	graph.read(open("./DAG/dag_%d.dot" % file_num,"r"))
102 | 	graph.initialize()
103 | 	graph.initial_schedule()
104 | 	print("ASAP # of resources: MUL: %d, ALU: %d" % (graph.currNr["MUL"],graph.currNr["ALU"]))
105 | 	step.append(0)
106 | 	nrt.append(graph.currNr["MUL"])
107 | 	nrta.append(graph.currNr["ALU"])
108 | 	flag_in = False
109 | 	timestep = 0
110 | 	cnt_loop = 0
111 | 	stride = args.stride
112 | 	pos_num = [0]
113 | 	while pos_num[-1] + STATE_SIZE[0] <= graph.vertex:
114 | 		pos_num.append(pos_num[-1] + stride)
115 | 	print(pos_num)
116 | 	while timestep < args.timesteps:
117 | 		for i in pos_num:
118 | 			state = torch.Tensor(graph.get_partial_state(STATE_SIZE,pos=(i,0))).float().to(device)
119 | 			state = state.resize_((1,state.size()[0],state.size()[1],state.size()[2]))
120 | 			legalMove = graph.getLegalMove(pos=(i,0))
121 | 			if cnt_loop >= len(pos_num):
122 | 				print("Early stop! No legal actions!")
123 | 				flag_in = True
124 | 				break
125 | 			if len(legalMove[0]) == 0:
126 | 				cnt_loop += 1
127 | 				continue
128 | 			cnt_loop = 0
129 | 			# log_prob, action = agent.get_sl_action(state)
130 | 			log_prob, action = agent.get_deterministic_action(state, legalMove)
131 | 			action += i
132 | 			fes, reward = graph.schedule_node(action, graph.vertex if action >= graph.vertex else graph.adjlist[action].cstep + 1)
133 | 			if fes == False:
134 | 				if action >= graph.vertex:
135 | 					print("Timestep %d: op %d (exceed), not available!" % (timestep+1,action))
136 | 				else:
137 | 					print("Timestep %d: op %d move to %d, early stop!" % (timestep+1,action,graph.adjlist[action].cstep + 1))
138 | 				flag_in = True
139 | 				break
140 | 			else:
141 | 				print("Timestep %d: op %d move to %d, reward: %f" % (timestep+1,action,graph.adjlist[action].cstep,reward))
142 | 				step.append(timestep+1)
143 | 				nrt.append(graph.currNr["MUL"])
144 | 				nrta.append(graph.currNr["ALU"])
145 | 			timestep += 1
146 | 		if flag_in:
147 | 			break
148 | 	print("Finish testing.")
149 | 	print(graph.test_final())
150 | 	print(graph.get_state())
151 | 	graph.output()
152 | 	fig = plt.figure()
153 | 	ax = fig.add_subplot(111)
154 | 	l1 = ax.plot(step,nrt,label="MUL")
155 | 	l2 = ax.plot(step,nrta,label="ALU")
156 | 	ax.set_xlabel("Step")
157 | 	ax.set_ylabel("# of ops")
158 | 	# ax.set_title("%s" % input())
159 | 	ax.legend(loc=1)
160 | 	fig.savefig("./fig_test_%d.pdf" % file_num,format="pdf")
161 | 	plt.show()
162 | 	return (nrt[0],nrta[0],graph.bestNr["MUL"],graph.bestNr["ALU"])
163 | 
164 | def visualization(results):
165 | 	res_r = np.array([x[0] for x in results])
166 | 	res_l = np.array([x[1] for x in results])
167 | 	np.save("./Loss/" + "reward" + file_name + ".npy",res_r)
168 | 	np.save("./Loss/" + "loss" + file_name + ".npy",res_l)
169 | 	fig = plt.figure()
170 | 	ax1 = fig.add_subplot(111)
171 | 	lns1 = ax1.plot(range(len(res_r)),res_r,label="Reward",color="b")
172 | 	ax2 = ax1.twinx()  # this is the most important function
173 | 	lns2 = ax2.plot(range(len(res_l)),res_l,label="Loss",color="r")
174 | 	lns = lns1 + lns2
175 | 	labs = [l.get_label() for l in lns]
176 | 	ax1.legend(lns, labs, loc=0)
177 | 	fig.savefig("./Loss/" + "fig" + file_name + ".jpg")
178 | 
179 | if args.test != -1:
180 | 	agent.policy.eval()
181 | 	res = []
182 | 	# for i in range(10001,10021):
183 | 	# 	res.append(test(i))
184 | 	res.append(test(i))
185 | 	for x in res:
186 | 		print("%d %d %d %d %d %d" % (x[0],x[1],x[0]+x[1],x[2],x[3],x[2]+x[3]))
187 | 	sys.exit()
188 | 
189 | logger.info("Begin training...")
190 | startTime = time.time()
191 | results = []
192 | for episode in range(1,args.episodes+1):
193 | 	results.append(train(episode))
194 | 	visualization(results)
195 | 	logger.info("Train Episode %d: Avg. Loss: %f, Avg. Reward: %f" % (episode,results[-1][0],results[-1][1]))
196 | 	print("Train Episode %d: Avg. Loss: %f, Avg. Reward: %f" % (episode,results[-1][0],results[-1][1]))
197 | 	torch.save(agent.policy,"./Networks/policy" + file_name +".pkl")
198 | 	usedTime = time.time() - startTime
199 | 	print("Finish %d / %d. Total time used: %f min. Rest of time: %f min."
200 | 		% (episode,args.episodes,usedTime/60,usedTime/60*args.episodes/episode-usedTime/60))
201 | logger.info("Finish training.")


--------------------------------------------------------------------------------
/sl.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright 2018 Hongzheng Chen
  3 | E-mail: chenhzh37@mail2.sysu.edu.cn
  4 | 
  5 | This is the implementation of Deep-reinforcement-learning-based scheduler for High-Level Synthesis.
  6 | 
  7 | This file contains the supervised learning (SL) part of the training pipeline.
  8 | '''
  9 | 
 10 | import time, sys, os, argparse
 11 | import random
 12 | import numpy as np
 13 | import visdom
 14 | import matplotlib.pyplot as plt
 15 | from logger import LogHandler
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | from torch.utils.data import DataLoader
 20 | 
 21 | from graph import Graph
 22 | from preprocess import preprocess
 23 | from policy import Policy
 24 | from dag_dataset import DagDataset
 25 | 
 26 | parser = argparse.ArgumentParser(description="Deep-RL-Based HLS Scheduler (Supervised Learning)")
 27 | parser.add_argument("--use_cuda", type=int, default=1, help="Use cuda? (default: True, the 1st GPU)")
 28 | parser.add_argument("--input_graphs", type=int, default=3500, help="Number of input graphs? (default: 3500)")
 29 | parser.add_argument("--batch_size", type=int, default=128, help="Batch size? (default: 128)")
 30 | parser.add_argument("--learning_rate", type=float, default=5e-4, help="Learning rate? (default: 5e-4)")
 31 | parser.add_argument("--epoch", type=int, default=10000, help="Number of epoch? (default: 10000)")
 32 | parser.add_argument("--use_network", type=str, default="", help="Use previous network? Input the name of the network. (default: None)")
 33 | args = parser.parse_args()
 34 | 
 35 | logger_num, logger = LogHandler("sl").getLogger()
 36 | logger.info("Deep-RL-Based HLS Scheduler (Supervised Learning)")
 37 | print("Logger num: %d" % logger_num)
 38 | device = torch.device(("cuda:%d" % (args.use_cuda-1)) if args.use_cuda != 0 else "cpu")
 39 | file_name = "_sl_" + time.strftime("%Y%m%d_") + str(logger_num)
 40 | 
 41 | STATE_SIZE = (50,50)
 42 | 
 43 | if args.use_network == "":
 44 | 	net = Policy(STATE_SIZE[0]).to(device)
 45 | 	print("Build a new network!")
 46 | else:
 47 | 	try:
 48 | 		net = torch.load("./Networks/" + args.use_network).to(device)
 49 | 		print("Loaded %s." % args.use_network)
 50 | 		logger.info("Pretrained network: %s (%s)" % (args.use_network,"gpu" if args.use_cuda else "cpu"))
 51 | 	except:
 52 | 		print("No such network named %s. Rebuild a new network!" % args.use_network)
 53 | 		net = Policy(STATE_SIZE[0]).to(device)
 54 | network_file = "./Networks/policy" + file_name + ".pkl"
 55 | logger.info("New network: %s (%s)" % (network_file,"gpu" if args.use_cuda else "cpu"))
 56 | criterion = nn.NLLLoss()
 57 | optimizer = torch.optim.Adam(net.parameters(),lr=args.learning_rate)
 58 | logger.info(net.features)
 59 | logger.info(net.classifier)
 60 | logger.info("NLLLoss (Negative Log likelihood loss) + Adam")
 61 | logger.info("Batch size: %d, Learning rate: %f" % (args.batch_size,args.learning_rate))
 62 | 
 63 | best_accuracy = 0
 64 | viz = visdom.Visdom()
 65 | cur_batch_win, epoch_loss_win = None, None
 66 | 
 67 | def train(epoch):
 68 | 	global cur_batch_win
 69 | 	net.train()
 70 | 	total_correct = 0
 71 | 	loss_list, batch_list = [], []
 72 | 	for i, (state, action) in enumerate(data_train_loader):
 73 | 		state = torch.Tensor(state.float()).to(device)
 74 | 		action = torch.Tensor(action.float()).type(torch.LongTensor).to(device)
 75 | 		optimizer.zero_grad()
 76 | 		output = net(state)
 77 | 		# bs*50 <- bs labels
 78 | 		loss = criterion(output,action)
 79 | 		loss_list.append(loss.item())
 80 | 		batch_list.append(i+1)
 81 | 		predict = output.data.max(1)[1]
 82 | 		total_correct += predict.eq(action.data.view_as(predict)).sum()
 83 | 		if i % 10 == 0:
 84 | 			logger.info("Train - Epoch %d, Batch: %d, Loss: %f" % (epoch,i,loss.item()))
 85 | 		if viz.check_connection():
 86 | 			cur_batch_win = viz.line(X=torch.FloatTensor(batch_list), Y=torch.FloatTensor(loss_list),
 87 | 									 win=cur_batch_win, name='current_batch_loss',
 88 | 									 update=(None if cur_batch_win is None else 'replace'),
 89 | 									 opts={'title': 'Epoch Loss Trace','xlabel': 'Batch Number','ylabel': 'Loss','width': 1200,'height': 600})
 90 | 		loss.backward()
 91 | 		optimizer.step()
 92 | 	avg_loss = np.array(loss_list).sum() / len(data_train_loader)
 93 | 	accuracy = float(total_correct) / len(data_train)
 94 | 	logger.info("Train Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy))
 95 | 	print("Train Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy))
 96 | 	return avg_loss
 97 | 
 98 | def test(epoch):
 99 | 	global best_accuracy
100 | 	net.eval()
101 | 	total_correct = 0
102 | 	avg_loss = 0.0
103 | 	for i, (state, action) in enumerate(data_test_loader):
104 | 		state = torch.Tensor(state.float()).to(device)
105 | 		action = torch.Tensor(action.float()).type(torch.LongTensor).to(device)
106 | 		output = net(state)
107 | 		avg_loss += criterion(output, action).item() # sum()
108 | 		predict = output.data.max(1)[1]
109 | 		total_correct += predict.eq(action.data.view_as(predict)).sum()
110 | 	avg_loss /= (len(data_test_loader))
111 | 	accuracy = float(total_correct) / len(data_test)
112 | 	logger.info("Test Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy))
113 | 	print("Test Epoch %d: Avg. Loss: %f, Accuracy: %f" % (epoch,avg_loss,accuracy))
114 | 	if best_accuracy < accuracy:
115 | 		best_accuracy = accuracy
116 | 		torch.save(net,network_file[:-4]+"_best.pkl")
117 | 	return avg_loss
118 | 
119 | def visualization(epoch,train_loss,test_loss):
120 | 	fig = plt.figure()
121 | 	ax = fig.add_subplot(111)
122 | 	ax.plot([i for i in range(1,epoch+1)],np.array(train_loss),label="train")
123 | 	ax.plot([i for i in range(1,epoch+1)],np.array(test_loss),label="test")
124 | 	ax.set_xlabel("Epoch")
125 | 	ax.set_ylabel("Loss")
126 | 	ax.legend()
127 | 	fig.savefig("./Loss/fig" + file_name + ".jpg")
128 | 	plt.cla()
129 | 	plt.close()
130 | 	np.save("./Loss/train_loss" + file_name + ".npy",np.array(train_loss))
131 | 	np.save("./Loss/test_loss" + file_name + ".npy",np.array(test_loss))
132 | 
133 | state_action_pair = preprocess(args.input_graphs)
134 | random.shuffle(state_action_pair) # important to break out the corelation
135 | cut = int(0.96*len(state_action_pair))
136 | data_train = DagDataset(state_action_pair[:cut])
137 | data_test = DagDataset(state_action_pair[cut:])
138 | data_train_loader = DataLoader(data_train,shuffle=True,batch_size=args.batch_size,num_workers=12)
139 | data_test_loader = DataLoader(data_test,shuffle=True,batch_size=args.batch_size,num_workers=12)
140 | print("# of train data: %d" % len(data_train))
141 | print("# of test data: %d" % len(data_test))
142 | logger.info("# of input graphs: %d" % args.input_graphs)
143 | logger.info("# of train data: %d" % len(data_train))
144 | logger.info("# of test data: %d" % len(data_test))
145 | startTime = time.time()
146 | logger.info("Begin training...")
147 | train_loss = []
148 | test_loss = []
149 | for epoch in range(1,args.epoch+1):
150 | 	train_loss.append(train(epoch))
151 | 	test_loss.append(test(epoch))
152 | 	visualization(epoch,train_loss,test_loss)
153 | 	torch.save(net,network_file)
154 | 	usedTime = time.time() - startTime
155 | 	print("Finish %d / %d. Total time used: %f min. Rest of time: %f min."
156 | 		% (epoch,args.epoch,usedTime/60,usedTime/60*args.epoch/epoch-usedTime/60))
157 | logger.info("Finish training.")


--------------------------------------------------------------------------------