├── README.md ├── agents.py ├── background.py ├── environment.py ├── model ├── DDDPG.py ├── DDPG.py ├── DDPGtest.py ├── PG.py ├── PGtest.py ├── exp_ddpg_Pendulum-v0 │ ├── 900_actor.pth │ └── 900_critic.pth └── exp_pg_CartPole-v0 │ └── 900.pth ├── test.py ├── train.py ├── trainPG.py ├── ui.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Reinforce-learning based algorithm for dynamic scheduling problem in steelmaking workshop 2 | 基于强化学习的炼钢动态调度求解技术和软件实现 3 | ------ 4 | 5 | ## 文件说明 6 | ### model文件夹 7 | model文件夹储存各种算法: 8 | > * `PG.py`: policy Gradient 算法 9 | > * `PGtest.py`: policy Gradient 算法测试, 测试对象 openAI CartPole-v0 10 | > * `DDPG.py`: deep deterministic policy gradient 算法 11 | > * `DDPGtets.py`: deep deterministic policy gradient 算法测试 12 | > * `DDDPG.py`: deep deterministic discrete policy gradient 算法 13 | 14 | ### 调度系统 15 | `background.py`: 定义调度系统的流程数量, 工艺种类等各种信息 16 | `agents.py`: 分布式多agent集群 17 | `environment.py`: 整个项目的环境 18 | `utils.py`: 自定义工具包 19 | `test.py`: 测试运行, 随机算法运行, 测试系统的可用性 20 | `ui.py`: 运行则会显示ui界面 21 | `train.py`: 神经网络训练, 算法DDDPG -------------------------------------------------------------------------------- /agents.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | agents.py 5 | 6 | 该程序定义agent群 7 | 分两种agent: 加工原材料的agent和加工半成品的agent 8 | """ 9 | 10 | import random 11 | #from model.DDDPG import * 12 | from model.PG import * 13 | 14 | # 进行决策的agent, 父类 15 | class VanillaAgent: 16 | # 参数: 属于第几流程, 设备数 17 | def __init__(self, processNum, machineNum): 18 | self.processNum = processNum 19 | self.machineNum = machineNum 20 | 21 | # 随机算法决策 22 | def SelectAction(self, state): 23 | pass 24 | 25 | 26 | ''' 27 | 最初处理原料的agent, 继承父类 28 | 29 | state格式: list, 长度为task的种数(定值)+machine的数量(定值) 30 | 第一部分元素的取值是剩下未完成的job数(0-N) 31 | 第二部分取值是machine状态, 0是空闲, 1是非空闲/占用(工作中或工作结束但材料没运走或损坏) 32 | 例如(3,2,0,1)表示task1剩下3个job, task2剩下2个job, task3剩下0个job, machine1正在占用 33 | 34 | action格式: list, 长度为machine数量(定值), 元素取值范围是0-task的种数, 表示不选择动作或者选择task中的一个进行加工, 0为不动作 35 | 例如(2,1,0)表示第一台设备运行一个task2的job,第二台设备运行一个task1的job,第三台设备不工作 36 | ''' 37 | class InitialAgent(VanillaAgent): 38 | # 新增加一个Task的种类数 39 | def __init__(self, processNum, machineNum, taskNum, action_min, exploration_noise, capacity, device): 40 | super().__init__(processNum, machineNum) 41 | self.taskNum = taskNum 42 | self.capacity = capacity 43 | 44 | # RL算法 45 | ''' 46 | state_dim = machineNum + taskNum # 状态维度 47 | action_dim = machineNum # 动作维度 48 | action_num = taskNum + 1 # 每个动作供选择的个数 49 | action_max = action_num 50 | 51 | state_dim = 3 # 状态维度 52 | action_dim = 1 # 动作维度 53 | action_num = 2 # 每个动作供选择的个数 54 | action_max = 1 55 | ''' 56 | 57 | state_dim = machineNum + 2 # 状态维度 58 | action_dim = 1 # 动作维度 59 | action_num = machineNum # 每个动作供选择的个数 60 | action_max = action_num 61 | 62 | #self.rl = DDDPG(state_dim, action_dim, action_num, action_min, action_max, exploration_noise, capacity, device) 63 | self.rl = PolicyGradient(state_dim, 400, action_num, device) 64 | 65 | # 随机算法决策 66 | def SelectActionRandom(self, state): 67 | stateFront = state[: self.taskNum] 68 | action = [] 69 | # 每个设备随机选择动作, 注意这里设备按编号顺序先后决策 70 | for each in range(self.machineNum): 71 | # 获得该设备的状态 72 | machineState = state[self.taskNum+each] 73 | # machine空闲且有可供选择的job时,有0.5概率动作,如果动作则挑一个不为0的task运行 74 | if machineState == 0 and sum(stateFront) != 0: 75 | # 动作 76 | if random.randint(0, 1) == 1: 77 | # 挑一个不为0的task运行 78 | noZeroList = [n for n in range(self.taskNum) if stateFront[n]>0 ] 79 | tempIndex = random.choice(noZeroList) 80 | action.append(tempIndex+1) 81 | # 每台设备决策后都会state更新 82 | stateFront[tempIndex] -= 1 83 | else: 84 | action.append(0) 85 | else: 86 | action.append(0) 87 | return action 88 | 89 | 90 | ''' 91 | 中途处理原料的agent, 继承父类 92 | 93 | state格式: list, 长度为上一个agent的machine数(定值)+本agent的machine数(定值) 94 | 第一部分元素取值: 上一个agent的machine的状态, 0是其他情况, taskNum是种类为taskNum的job运行完成 95 | 第二部分元素取值: 本agent的machine的状态, 0是空闲, 1是非空闲(工作中或工作结束但材料没运走或损坏) 96 | 97 | action格式: list, 长度为machine数量(定值) 98 | 元素取值范围: 0+(lastAgent的Machine下标+1), 99 | 表示选择lastAgent的Machine的材料进行加工(要求材料在lastAgent上已经完工), 0表示不动作 100 | ''' 101 | class ProcessAgent(VanillaAgent): 102 | # 新增加一个Task的种类数 103 | def __init__(self, processNum, machineNum, lastMachineNum, action_min, exploration_noise, capacity, device): 104 | super().__init__(processNum, machineNum) 105 | self.lastMachineNum = lastMachineNum 106 | self.capacity = capacity 107 | 108 | # RL算法 109 | ''' 110 | state_dim = machineNum + lastMachineNum # 状态维度 111 | action_dim = machineNum # 动作维度 112 | action_num = lastMachineNum + 1 # 每个动作供选择的个数 113 | action_max = action_num 114 | 115 | state_dim = 3 # 状态维度 116 | action_dim = 1 # 动作维度 117 | action_num = 2 # 每个动作供选择的个数 118 | action_max = 1 119 | ''' 120 | state_dim = machineNum + 2 # 状态维度 121 | action_dim = 1 # 动作维度 122 | action_num = machineNum + 1 # 每个动作供选择的个数 123 | action_max = action_num 124 | #self.rl = DDDPG(state_dim, action_dim, action_num, action_min, action_max, exploration_noise, capacity, device) 125 | self.rl = PolicyGradient(state_dim, 400, action_num, device) 126 | 127 | # 随机算法决策 128 | def SelectActionRandom(self, state): 129 | stateFront = state[: self.lastMachineNum] 130 | action = [] 131 | for each in range(self.machineNum): 132 | noZeroList = [] 133 | # 设备不空闲或上一个流程没有job运行完的时候不动作 134 | if state[self.lastMachineNum+each] != 0 or sum(stateFront) == 0: 135 | action.append(0) 136 | else: 137 | # 设备空闲时有0.5概率动作, 从上一个流程里面选已经完成的job继续运行 138 | if random.randint(0, 1) == 1: 139 | # agent3不能选task1 140 | if self.processNum == 3: 141 | noZeroList = [n for n in range(self.lastMachineNum) if stateFront[n]>1] 142 | # 列表为空, 即没得选的时候, action为0 143 | if len(noZeroList) == 0: 144 | action.append(0) 145 | else: 146 | tempIndex = random.choice(noZeroList) 147 | action.append(tempIndex+1) 148 | stateFront[tempIndex] = 0 149 | # agent4不能选task1和task2, 只能选task3 150 | elif self.processNum == 4: 151 | noZeroList = [n for n in range(self.lastMachineNum) if stateFront[n]==3] 152 | # 列表为空, 即没得选的时候, action为0 153 | if len(noZeroList) == 0: 154 | action.append(0) 155 | else: 156 | tempIndex = random.choice(noZeroList) 157 | action.append(tempIndex+1) 158 | stateFront[tempIndex] = 0 159 | # 其它agent随便选 160 | else: 161 | noZeroList = [n for n in range(self.lastMachineNum) if stateFront[n]>0] 162 | tempIndex = random.choice(noZeroList) 163 | action.append(tempIndex+1) 164 | stateFront[tempIndex] = 0 165 | else: 166 | action.append(0) 167 | return action 168 | 169 | 170 | ''' 171 | 最后处理原料的agent, 继承父类 172 | 173 | state格式: list 174 | 长度: agent2,3,4,5的machine数(定值)之和 175 | agent2,3,4部分元素取值: agent的machine的状态, 0是其他情况, taskNum是种类为taskNum的job运行完成 176 | agent5部分元素取值: 本agent的machine的状态, 0是空闲, 1是非空闲(工作中或工作结束但材料没运走或损坏) 177 | 178 | action格式: list, 长度为machine数量(定值) 179 | 180 | frontMachineSum = self.agent2MachineNum+self.agent3MachineNum+self.agent4MachineNum 181 | stateFront = state[:frontMachineSum] 182 | 元素取值范围: 0+(stateFront下标+1), 183 | 表示选择stateFront的Machine的材料进行加工(要求材料在lastAgent上已经完工), 0表示不动作 184 | ''' 185 | class FinalAgent(VanillaAgent): 186 | # 新增加一个Task的种类数 187 | def __init__(self, processNum, machineNum, agent2MachineNum, agent3MachineNum, agent4MachineNum): 188 | super().__init__(processNum, machineNum) 189 | self.agent2MachineNum = agent2MachineNum 190 | self.agent3MachineNum = agent3MachineNum 191 | self.agent4MachineNum = agent4MachineNum 192 | 193 | # 随机算法决策 194 | def SelectActionRandom(self, state): 195 | frontMachineSum = self.agent2MachineNum+self.agent3MachineNum+self.agent4MachineNum 196 | stateFront = state[:frontMachineSum] 197 | action = [] 198 | for each in range(self.machineNum): 199 | noZeroList = [] 200 | # 设备不空闲的时候不动作 201 | if state[frontMachineSum+each] != 0 or sum(stateFront) == 0: 202 | action.append(0) 203 | else: 204 | # 设备空闲时有0.5概率动作, 从之前流程里面选已经完成的job继续运行 205 | if random.randint(0, 1) == 1: 206 | for index in range(frontMachineSum): 207 | # 从agent2里面选task1 208 | if index=(self.agent2MachineNum+self.agent3MachineNum) and stateFront[index]==3: 215 | noZeroList.append(index) 216 | if len(noZeroList) == 0: 217 | action.append(0) 218 | else: 219 | tempIndex = random.choice(noZeroList) 220 | action.append(tempIndex+1) 221 | # 每台设备决策后都会state更新 222 | stateFront[tempIndex] = 0 223 | else: 224 | action.append(0) 225 | return action 226 | 227 | 228 | ''' 229 | state格式: list, 长度为上一个agent的machine数(定值)+本agent的machine数(定值) 230 | 第一部分元素取值: 上一个agent的machine的状态, 0是其他情况, taskNum是种类为taskNum的job运行完成 231 | 第二部分元素取值: 本agent的machine的状态, 0是空闲, 1是非空闲(工作中或工作结束但材料没运走或损坏) 232 | 233 | action格式: list, 长度为machine数量(定值) 234 | 元素取值范围: 0+(lastAgent的Machine下标+1), 235 | 表示选择lastAgent的Machine的材料进行加工(要求材料在lastAgent上已经完工), 0表示不动作 236 | ''' 237 | class LastAgent(VanillaAgent): 238 | # 新增加一个Task的种类数 239 | def __init__(self, processNum, lastProcessNum, machineNum, lastMachineNum, action_min, exploration_noise, capacity, device): 240 | super().__init__(processNum, machineNum) 241 | self.lastMachineNum = lastMachineNum 242 | self.lastProcessNum = lastProcessNum 243 | self.capacity = capacity 244 | 245 | # RL算法 246 | ''' 247 | state_dim = machineNum + lastMachineNum # 状态个数 248 | action_dim = machineNum # 动作维度 249 | action_num = lastMachineNum + 1 # 每个动作供选择的个数 250 | action_max = action_num 251 | 252 | state_dim = 3 # 状态维度 253 | action_dim = 1 # 动作维度 254 | action_num = 2 # 每个动作供选择的个数 255 | action_max = 1 256 | ''' 257 | state_dim = machineNum + 2 # 状态维度 258 | action_dim = 1 # 动作维度 259 | action_num = machineNum + 1 # 每个动作供选择的个数 260 | action_max = action_num 261 | #self.rl = DDDPG(state_dim, action_dim, action_num, action_min, action_max, exploration_noise, capacity, device) 262 | self.rl = PolicyGradient(state_dim, 400, action_num, device) 263 | 264 | # 测试程序 265 | def main(): 266 | agent0 = InitialAgent(0, 4, 3) 267 | agent1 = ProcessAgent(1, 6, 4) 268 | #state = [6,5,4,0,0,0,0] 269 | #action = agent0.SelectActionRandom(state) 270 | #print(action) 271 | state = [0,2,3,1, 0,0,0,0,0,0] 272 | action = agent1.SelectActionRandom(state) 273 | print(action) 274 | 275 | 276 | if __name__ == '__main__': 277 | main() -------------------------------------------------------------------------------- /background.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | background.py 5 | 6 | 该程序定义了调度系统的各种信息, 包括 结构, 功能, 流程数量, 工艺种类等信息 7 | 包括: 6个流程组成的工艺系统, 三种任务 8 | 9 | 注意: 这里面所有的编号都是从0开始的 10 | """ 11 | 12 | import numpy as np 13 | 14 | # 一个工业流程 15 | class TechnologicalProcess: 16 | def __init__(self, machine): 17 | # 一个流程拥有的设备数 18 | self.machine = machine 19 | 20 | 21 | # 整个调度系统 22 | class SchedulingSystem: 23 | # 创建六个流程组成的调度系统 24 | def __init__(self): 25 | self.processNum = 6 26 | self.process0 = TechnologicalProcess(4) 27 | self.process1 = TechnologicalProcess(6) 28 | self.process2 = TechnologicalProcess(5) 29 | self.process3 = TechnologicalProcess(3) 30 | self.process4 = TechnologicalProcess(4) 31 | self.process5 = TechnologicalProcess(4) 32 | 33 | ''' 34 | 该流程的设备的材料 运输到 下一个流程的设备 的运输时间 35 | 注意: 这个运输时间与材料无关, 无论是什么材料, 都是同样的运输时间 36 | 37 | 数组形式, 行数为本流程设备数, 列数为下一个流程的设备数 38 | 假设本流程machine=4, 下一个流程machine=3, 数组为4*3形式, 39 | array[02]表示本流程第0个machine的材料运输到下一个流程的第2个machine所需要的时间 40 | ''' 41 | self.process0.transpotArray = np.array([[3, 7, 7, 8, 5, 5], 42 | [3, 9, 3, 4, 3, 3], 43 | [9, 3, 3, 4, 8, 4], 44 | [7, 7, 2, 9, 5, 2]]) 45 | self.process1.transpotArray = np.array([[5, 4, 6, 6, 6], 46 | [9, 8, 9, 3, 6], 47 | [2, 2, 8, 7, 8], 48 | [8, 7, 5, 8, 9], 49 | [7, 4, 5, 2, 3], 50 | [9, 2, 3, 9, 2]]) 51 | self.process2.transpotArray = np.array([[9, 5, 4], 52 | [4, 6, 8], 53 | [9, 6, 5], 54 | [2, 5, 5], 55 | [8, 4, 6]]) 56 | self.process3.transpotArray = np.array([[3, 9, 6, 5], 57 | [6, 4, 3, 3], 58 | [8, 7, 8, 4]]) 59 | self.process4.transpotArray = np.array([[6, 2, 4, 2], 60 | [8, 8, 6, 2], 61 | [3, 2, 9, 3], 62 | [5, 4, 6, 3]]) 63 | 64 | # 材料从当前流程的设备m运输到下一个流程的设备n所需的时间 65 | def transpotTime(self, array, m, n): 66 | return array[m][n] 67 | 68 | 69 | ''' 70 | 新建三个任务类型 71 | ''' 72 | # 需要完成的任务, 任务一 73 | class Task: 74 | def __init__(self): 75 | ''' 76 | process由六个bool类型组成, 因为是6道工序所以是6个bool值, 工序数与bool数量一致 77 | 111001表示要经过第1,2,3,6一共四道工序, 0表示该不需要经过该工序, 1表示需要经过该工序 78 | ''' 79 | self.process = '111001' 80 | ''' 81 | 该任务的材料在设备上运行所需要的时间, 数组形式, 行为任务需要的工序数, 列为该工序的机器数 82 | 假设需要4道工序, 每道工序的设备数分别为4654, 则有以下形式, 83 | array[02]表示材料在第1道工序的第3台设备上运行所需要的时间 84 | 0表示不在上面运行 85 | ''' 86 | self.dealArray = np.array([[19, 15, 10, 15], 87 | [11, 16, 17, 19, 19, 13], 88 | [15, 11, 13, 16, 14], 89 | [0 , 0 , 0 ], 90 | [0 , 0 , 0 , 0 ], 91 | [15, 19, 10, 15]]) 92 | 93 | ''' 94 | 工序3运输到工序6的时间, 结构和transpotArray一样 95 | ''' 96 | self.transport2To5 = np.array([[12, 23, 24, 14], 97 | [24, 28, 13, 27], 98 | [19, 10, 11, 15], 99 | [16, 27, 17, 29], 100 | [11, 29, 21, 11]]) 101 | 102 | # 返回该任务的材料在工序m的第n台设备运行所需要的时间 103 | def dealTime(self, array, m, n): 104 | return array[m][n] 105 | 106 | 107 | # 继承父类Task, 其它一样,需要完成的任务, 任务二 108 | class TaskTwo(Task): 109 | def __init__(self): 110 | self.process = '111101' 111 | self.dealArray = np.array([[16, 19, 10, 13], 112 | [10, 17, 19, 15, 12, 12], 113 | [14, 19, 14, 15, 19], 114 | [10, 18, 15], 115 | [0 , 0 , 0 , 0 ], 116 | [19, 12, 15, 10]]) 117 | self.transport3To5 = np.array([[12, 6, 15, 8], 118 | [ 8, 11, 18, 11], 119 | [ 9, 7, 17, 16]]) 120 | 121 | 122 | # 继承父类Task, 其它一样,需要完成的任务, 任务三 123 | class TaskThree(Task): 124 | def __init__(self): 125 | self.process = '111111' 126 | self.dealArray = np.array([[17, 10, 11, 15], 127 | [10, 19, 15, 15, 14, 11], 128 | [12, 18, 15, 12, 14], 129 | [15, 10, 16], 130 | [19, 15, 17, 18], 131 | [14, 17, 19, 12]]) 132 | 133 | -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | environment.py 5 | 6 | 该程序实现了整个项目的环境 7 | """ 8 | 9 | from background import * 10 | from agents import * 11 | 12 | ''' 13 | 整个运行环境, 原理: 14 | 时间不断增加, 时间不是现实时间, 而是count表示的计数 15 | 每一个时间, 对每一个agent进行遍历 16 | 每一个agent决策此时此刻是否要进行动作, 如何进行动作 17 | 一个agent决策后state变化后才轮到下一个agent进行决策 18 | ''' 19 | class Env: 20 | # 定义一个调度系统, 以及5个task1, 6个task2, 4个task3 21 | def __init__(self): 22 | self.schedulingSystem = SchedulingSystem() 23 | 24 | # 运行总时间 25 | self.count = 0 26 | # 已完成的任务 27 | self.finishTask = [0,0,0] 28 | 29 | # 记录实时states和累计states 30 | self.envStates = [] 31 | 32 | # 定义task 33 | self.taskType = 3 34 | self.task1 = Task() 35 | self.task2 = TaskTwo() 36 | self.task3 = TaskThree() 37 | 38 | # 辅助程序 39 | def runTime(self, taskType, m, n): 40 | if taskType == 1: 41 | return self.task1.dealTime(self.task1.dealArray, m, n) 42 | elif taskType == 2: 43 | return self.task2.dealTime(self.task2.dealArray, m, n) 44 | elif taskType == 3: 45 | return self.task3.dealTime(self.task3.dealArray, m, n) 46 | 47 | # 辅助程序 48 | def transTime(self, processNum, m, n): 49 | if processNum == 1: 50 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process0.transpotArray, m, n) 51 | elif processNum == 2: 52 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process1.transpotArray, m, n) 53 | elif processNum == 3: 54 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process2.transpotArray, m, n) 55 | elif processNum == 4: 56 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process3.transpotArray, m, n) 57 | 58 | # 辅助程序 59 | def transCrossTime(self, taskType, m, n): 60 | if taskType == 1: 61 | return self.task1.dealTime(self.task1.transport2To5, m, n) 62 | elif taskType == 2: 63 | return self.task2.dealTime(self.task2.transport3To5, m, n) 64 | elif taskType == 3: 65 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process4.transpotArray, m, n) 66 | 67 | # 辅助程序 68 | def transTimeLast(self, lastProcessNum, m, n): 69 | if lastProcessNum == 2: 70 | return self.task1.dealTime(self.task1.transport2To5, m, n) 71 | elif lastProcessNum == 3: 72 | return self.task2.dealTime(self.task2.transport3To5, m, n) 73 | elif lastProcessNum == 4: 74 | return self.schedulingSystem.transpotTime(self.schedulingSystem.process4.transpotArray, m, n) 75 | 76 | ''' 77 | 定义initialStep的States规则: 78 | 原料列表: 值表示还剩下多少任务没有开工 79 | 各agent列表分以下情况: 80 | -1: 损坏/维护, 格式: [-1] 81 | 0: 空闲, 格式: [0] 82 | 1: 工作, 格式:[1, task, 结束倒计时, 1(延时)/0(正常)/-1(提早)] 83 | eg: [1, 2, 5, 0] 表示: 当前machine运行task2, 5t后工作结束, 目前一切正常 84 | 3: 占用, 格式: [3] 85 | 没有运输环节 86 | ''' 87 | # 输入action, 更改envStates, done表示系统是否出错 88 | def initialStep(self, agent, action): 89 | done = 0 90 | reward = 0 91 | envStates = self.envStates 92 | # 原料的state与本agent的state变化 93 | material = envStates[0] 94 | state = envStates[agent.processNum+1] 95 | 96 | # 对每一个设备分析情况 97 | for each in range(agent.machineNum): 98 | # 设备为损坏或维护时 99 | if state[each][0] == -1: 100 | # 损坏或维护不能添加任务 101 | if action[each] != 0: 102 | done = 1 103 | break 104 | # 设备为空闲时 105 | elif state[each][0] == 0: 106 | # 空闲时可以添加任务 107 | if action[each] != 0: 108 | temp = self.runTime(action[each],agent.processNum,each) 109 | state[each] = [1, action[each], temp, 0] 110 | material[action[each]-1] -= 1 111 | reward += -temp 112 | # 设备为工作时 113 | elif state[each][0] == 1: 114 | # 工作时不能添加任务 115 | if action[each] != 0: 116 | done = 1 117 | break 118 | else: 119 | # 已有工作减1单位剩余时间 120 | state[each][2] -= 1 121 | # 工作完成时变成占用 122 | if state[each][2] == 0: 123 | temp = state[each][1] 124 | state[each] = [3, temp] 125 | 126 | envStates[0] = material 127 | envStates[agent.processNum+1] = state 128 | 129 | return envStates 130 | 131 | def initReturn(self, state, action): 132 | if state[action[0]+1] == 0: 133 | reward = -self.runTime(state[1],0,action[0]-1) 134 | done = 0 135 | else: 136 | reward = -1000 137 | done = 1 138 | 139 | return reward, done 140 | 141 | ''' 142 | 定义processStep的States规则: 143 | 原料列表: 值表示还剩下多少任务没有开工 144 | 各agent列表分以下情况: 145 | -1: 损坏/维护, 格式: [-1] 146 | 0: 空闲, 格式: [0] 147 | 1: 工作, 格式:[1, task, 结束倒计时, 1(延时)/0(正常)/-1(提早)] 148 | eg: [1, 2, 5, 0] 表示: 当前machine运行task2, 5t后工作结束, 目前一切正常 149 | 2: 运输, 格式:[2, task, lastMachine, nextMachine, 结束倒计时] 150 | eg: [2, 2, 3, 4, 1] 表示: 正在将task2从machine4运往下个agent的machine5, 1t后到达 151 | 3: 占用, 格式: [3, task] 152 | ''' 153 | # 输入action, 更改envStates, done表示系统是否出错 154 | def processStep(self, agent, action): 155 | done = 0 156 | reward = 0 157 | envStates = self.envStates 158 | # 上一个agent的state与本agent的state变化 159 | lastState = envStates[agent.processNum] 160 | state = envStates[agent.processNum+1] 161 | 162 | # 对每一个设备分析情况 163 | for each in range(agent.machineNum): 164 | # 设备为损坏或维护时 165 | if state[each][0] == -1: 166 | # 损坏或维护不能添加任务 167 | if action[each] != 0: 168 | done = 1 169 | break 170 | # lastMachine=3且设备为空闲时可以添加任务, lastMachine由占用状态变运输状态 171 | elif state[each][0] == 0: 172 | if action[each] != 0: 173 | if lastState[action[each]-1][0] == 3: 174 | # agent3不能选task1 175 | if agent.processNum == 3 and lastState[action[each]-1][-1] == 1: 176 | done = 1 177 | break 178 | # agent4不能选task1和task2, 只能选task3 179 | if agent.processNum == 4 and lastState[action[each]-1][-1] != 3: 180 | done = 1 181 | break 182 | temp = self.transTime(agent.processNum,action[each]-1,each) 183 | state[each] = [2, lastState[action[each]-1][-1], action[each]-1, each, temp] 184 | lastState[action[each]-1] = [0] 185 | reward += -temp-self.runTime(state[each][1],agent.processNum,each) 186 | else: 187 | done = 1 188 | break 189 | # 设备为工作时 190 | elif state[each][0] == 1: 191 | # 工作时不能添加任务 192 | if action[each] != 0: 193 | done = 1 194 | break 195 | else: 196 | # 已有工作减1单位剩余时间 197 | state[each][2] -= 1 198 | # 工作完成时变成占用 199 | if state[each][2] == 0: 200 | temp = state[each][1] 201 | state[each] = [3, temp] 202 | # 设备为运输时不能选动作 203 | elif state[each][0] == 2: 204 | if action[each] != 0: 205 | done = 1 206 | break 207 | # 运输时间减1 208 | else: 209 | state[each][-1] -= 1 210 | # 运输到目的地转为工作状态 211 | if state[each][-1] == 0: 212 | temp = self.runTime(state[each][1],agent.processNum,each) 213 | state[each] = [1, state[each][1], temp, 0] 214 | 215 | envStates[agent.processNum] = lastState 216 | envStates[agent.processNum+1] = state 217 | 218 | return envStates 219 | 220 | # 输入action, 更改envStates, done表示系统是否出错 221 | def lastStep(self, agent, action): 222 | done = 0 223 | reward = 0 224 | envStates = self.envStates 225 | # 上一个agent的state与本agent的state变化 226 | lastState = envStates[agent.lastProcessNum+1] 227 | state = envStates[agent.processNum+1] 228 | 229 | # 对每一个设备分析情况 230 | for each in range(agent.machineNum): 231 | # 设备为损坏或维护时 232 | if state[each][0] == -1: 233 | # 损坏或维护不能添加任务 234 | if action[each] != 0: 235 | done = 1 236 | break 237 | # lastMachine=3且设备为空闲时可以添加任务, lastMachine由占用状态变运输状态 238 | elif state[each][0] == 0: 239 | if action[each] != 0: 240 | if lastState[action[each]-1][0] == 3: 241 | # agent5只能选task1 242 | if agent.lastProcessNum == 2 and lastState[action[each]-1][-1] != 1: 243 | done = 1 244 | break 245 | # agent6只能选task2 246 | if agent.lastProcessNum == 3 and lastState[action[each]-1][-1] != 2: 247 | done = 1 248 | break 249 | # agent7只能选task3 250 | if agent.lastProcessNum == 3 and lastState[action[each]-1][-1] != 3: 251 | done = 1 252 | break 253 | temp = self.transTimeLast(agent.lastProcessNum,action[each]-1,each) 254 | state[each] = [2, lastState[action[each]-1][-1], action[each]-1, each, temp] 255 | lastState[action[each]-1] = [0] 256 | reward += -temp-self.runTime(state[each][1],agent.processNum,each) 257 | else: 258 | done = 1 259 | break 260 | # 设备为工作时 261 | elif state[each][0] == 1: 262 | # 工作时不能添加任务 263 | if action[each] != 0: 264 | done = 1 265 | break 266 | else: 267 | # 已有工作减1单位剩余时间 268 | state[each][2] -= 1 269 | # 工作完成时变成空闲 270 | if state[each][2] == 0: 271 | temp = state[each][1] 272 | state[each] = [0] 273 | # 设备为运输时不能选动作 274 | elif state[each][0] == 2: 275 | if action[each] != 0: 276 | done = 1 277 | break 278 | # 运输时间减1 279 | else: 280 | state[each][-1] -= 1 281 | # 运输到目的地转为工作状态 282 | if state[each][-1] == 0: 283 | temp = self.runTime(state[each][1],agent.processNum,each) 284 | state[each] = [1, state[each][1], temp, 0] 285 | 286 | envStates[agent.lastProcessNum+1] = lastState 287 | envStates[agent.processNum+1] = state 288 | 289 | return envStates, reward, done 290 | 291 | ''' 292 | 定义finalStep的States规则: 293 | 和processStep原则一样, 多了一个完成任务的记录 294 | ''' 295 | def finalStep(self, agent, action, agent2, agent3, agent4): 296 | done = 0 297 | envStates = self.envStates 298 | agent2State = envStates[agent.processNum-2] 299 | agent3State = envStates[agent.processNum-1] 300 | agent4State = envStates[agent.processNum] 301 | state = envStates[agent.processNum+1] 302 | 303 | # 对每一个设备分析情况 304 | for each in range(agent.machineNum): 305 | # 设备为损坏或维护时 306 | if state[each][0] == -1: 307 | # 损坏或维护不能添加任务 308 | if action[each] != 0: 309 | done = 1 310 | break 311 | # 设备为空闲时可以添加任务 312 | elif state[each][0] == 0: 313 | if action[each] != 0: 314 | # 对action进行解码, 区分action获取哪个agent的job 315 | tempActAg3 = action[each]-agent2.machineNum 316 | tempActAg4 = action[each]-agent2.machineNum-agent3.machineNum 317 | # agent2部分 318 | if action[each] < (agent2.machineNum+1): 319 | if agent2State[action[each]-1][0] == 3: 320 | temp = self.transCrossTime(agent2State[action[each]-1][-1],action[each]-1,each) 321 | state[each] = [2, agent2State[action[each]-1][-1], action[each]-1, each, temp] 322 | agent2State[action[each]-1] = [0] 323 | else: 324 | done = 1 325 | break 326 | # agent3部分 327 | elif action[each] < (agent2.machineNum+agent3.machineNum+1): 328 | if agent3State[tempActAg3-1][0] == 3: 329 | temp = self.transCrossTime(agent3State[tempActAg3-1][-1],tempActAg3-1,each) 330 | state[each] = [2, agent3State[tempActAg3-1][-1], tempActAg3-1, each, temp] 331 | agent3State[tempActAg3-1] = [0] 332 | else: 333 | done = 1 334 | break 335 | # agent4部分 336 | else: 337 | if agent4State[tempActAg4-1][0] == 3: 338 | temp = self.transCrossTime(agent4State[tempActAg4-1][-1],tempActAg4-1,each) 339 | state[each] = [2, agent4State[tempActAg4-1][-1], tempActAg4-1, each, temp] 340 | agent4State[tempActAg4-1] = [0] 341 | else: 342 | done = 1 343 | break 344 | # 设备为工作时 345 | elif state[each][0] == 1: 346 | # 工作时不能添加任务 347 | if action[each] != 0: 348 | done = 1 349 | break 350 | else: 351 | # 已有工作减1单位剩余时间 352 | state[each][2] -= 1 353 | # 工作完成时变成空闲 354 | if state[each][2] == 0: 355 | self.finishTask[state[each][1]-1] += 1 356 | state[each] = [0] 357 | # 设备为运输时不能选动作 358 | elif state[each][0] == 2: 359 | if action[each] != 0: 360 | done = 1 361 | break 362 | # 运输时间减1 363 | else: 364 | state[each][-1] -= 1 365 | # 运输到目的地转为工作状态 366 | if state[each][-1] == 0: 367 | temp = self.runTime(state[each][1],agent.processNum,each) 368 | state[each] = [1, state[each][1], temp, 0] 369 | 370 | envStates[agent.processNum-2] = agent2State 371 | envStates[agent.processNum-1] = agent3State 372 | envStates[agent.processNum] = agent4State 373 | envStates[agent.processNum+1] = state 374 | 375 | return envStates, done 376 | 377 | # 重置时间与状态 378 | def reset(self): 379 | self.count = 0 380 | self.finishTask = [0,0,0] 381 | self.envStates = [[5,6,4], 382 | [[0],[0],[0],[0]], 383 | [[0],[0],[0],[0],[0],[0]], 384 | [[0],[0],[0],[0],[0]], 385 | [[0],[0],[0]], 386 | [[0],[0],[0],[0]], 387 | [[0],[0],[0],[0]]] 388 | 389 | # 检查整个系统是否结束 390 | def ifTaskFinish(self): 391 | material = self.envStates[0] 392 | process = self.envStates[1:] 393 | # 原料不为0一定不完成 394 | for each in material: 395 | if each != 0: 396 | return 0 397 | # 过程全为0或者-1 398 | for eachLine in process: 399 | for each in eachLine: 400 | if each[0] != 0 and each[0] != -1: 401 | return 0 402 | print("调度结束") 403 | return 1 404 | 405 | -------------------------------------------------------------------------------- /model/DDDPG.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | deep deterministic discrete policy gradient 5 | 6 | 与deep deterministic policy gradient的区别: 7 | 修改了Actor的forward函数的神经网络的最后一层, 改成softmax函数 8 | DDDPG的select_action函数, 对action进行向下取整以实现离散化 9 | """ 10 | 11 | import random 12 | import numpy as np 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.functional as F 17 | import torch.optim as optim 18 | 19 | 20 | class Replay_buffer(): 21 | def __init__(self, max_size): 22 | self.storage = [] 23 | self.max_size = max_size 24 | self.ptr = 0 25 | 26 | def push(self, data): 27 | if len(self.storage) == self.max_size: 28 | self.storage[int(self.ptr)] = data 29 | self.ptr = (self.ptr + 1) % self.max_size 30 | else: 31 | self.storage.append(data) 32 | 33 | # 从replay_buffer采样batch_size条数据; 数组x, y, u, r, d分别储存batch_size个采样到的x, y, u, r, d 34 | def sample(self, batch_size): 35 | ind = np.random.randint(0, len(self.storage), size=batch_size) 36 | x, y, u, r, d = [], [], [], [], [] 37 | 38 | for i in ind: 39 | X, Y, U, R, D = self.storage[i] 40 | x.append(np.array(X, copy=False)) 41 | y.append(np.array(Y, copy=False)) 42 | u.append(np.array(U, copy=False)) 43 | r.append(np.array(R, copy=False)) 44 | d.append(np.array(D, copy=False)) 45 | 46 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1) 47 | 48 | 49 | class Actor(nn.Module): 50 | def __init__(self, state_dim, action_dim, action_num): 51 | super(Actor, self).__init__() 52 | 53 | self.l1 = nn.Linear(state_dim, 400) 54 | self.l2 = nn.Linear(400, 300) 55 | self.l3 = nn.Linear(300, action_dim) 56 | 57 | self.action_num = action_num-0.1 # 减0.1, 后面向下取整的时候不会取到action_num 58 | 59 | def forward(self, x): 60 | x = F.relu(self.l1(x)) # 根据relu函数修改, 小于0的值变成0, 大于0的值不变 61 | x = F.relu(self.l2(x)) 62 | # softmax值域0至1, 乘action_num将值域扩展为需要的值域; dim=1按列计算 63 | x = self.action_num * F.softmax(self.l3(x), dim=1) 64 | return x 65 | 66 | 67 | class Critic(nn.Module): 68 | def __init__(self, state_dim, action_dim): 69 | super(Critic, self).__init__() 70 | 71 | self.l1 = nn.Linear(state_dim + action_dim, 400) 72 | self.l2 = nn.Linear(400 , 300) 73 | self.l3 = nn.Linear(300, 1) 74 | 75 | def forward(self, x, u): 76 | print(x) 77 | print(u) 78 | x = F.relu(self.l1(torch.cat([x, u], 1))) 79 | x = F.relu(self.l2(x)) 80 | x = self.l3(x) 81 | return x 82 | 83 | 84 | class DDDPG(object): 85 | def __init__(self, state_dim, action_dim, action_num, action_min, action_max, exploration_noise, capacity, device): 86 | self.action_dim = action_dim 87 | self.action_min = action_min 88 | self.action_max = action_max 89 | self.device = device 90 | self.exploration_noise = exploration_noise 91 | 92 | self.actor = Actor(state_dim, action_dim, action_num).to(self.device) # actor网络 93 | self.actor_target = Actor(state_dim, action_dim, action_num).to(self.device) # actor_target网络 94 | self.actor_target.load_state_dict(self.actor.state_dict()) 95 | self.actor_optimizer = optim.Adam(self.actor.parameters(), 1e-3) # 优化器 96 | 97 | self.critic = Critic(state_dim, action_dim).to(self.device) # critic网络 98 | self.critic_target = Critic(state_dim, action_dim).to(self.device) # critic_target网络 99 | self.critic_target.load_state_dict(self.critic.state_dict()) 100 | self.critic_optimizer = optim.Adam(self.critic.parameters(), 1e-3) 101 | 102 | self.replay_buffer = Replay_buffer(capacity) 103 | self.num_critic_update_iteration = 0 104 | self.num_actor_update_iteration = 0 105 | self.num_training = 0 106 | 107 | def select_action(self, state): 108 | # FloatTensor建立FloatTensor类型; reshape(1,-1)指无论多少行列, 都将其变成一行 109 | state = torch.FloatTensor(np.array(state).reshape(1, -1)).to(self.device) 110 | # cpu():提取CPU的data数据, numpy():tensor转numpy, flatten():降成一行 111 | action = self.actor(state).cpu().data.numpy().flatten() 112 | 113 | # 对每个action的元素 通过向下取整的方式 进行离散化 114 | return action.astype(np.int) 115 | 116 | def add_action_noise(self, action): 117 | action = (action + np.random.normal(0, self.exploration_noise, size=self.action_dim)).clip(self.action_min, self.action_max) 118 | return action.astype(np.int) 119 | 120 | # update一次训练update_iteration批次, 每批次学习batch_size条数据, 即update一次学习(update_iteration*batch_size)条数据 121 | def update(self,tau=0.005,batch_size=64,update_iteration=10): 122 | for it in range(update_iteration): 123 | # Sample replay buffer 124 | x, y, u, r, d = self.replay_buffer.sample(batch_size) 125 | state = torch.FloatTensor(x).to(self.device) 126 | action = torch.FloatTensor(u).to(self.device) 127 | next_state = torch.FloatTensor(y).to(self.device) 128 | reward = torch.FloatTensor(r).to(self.device) 129 | done = torch.FloatTensor(d).to(self.device) 130 | 131 | # Compute the target Q value 132 | target_Q = self.critic_target(next_state, self.actor_target(next_state)) 133 | target_Q = reward + ((1 - done) * 0.99 * target_Q).detach() 134 | 135 | # Get current Q estimate 136 | current_Q = self.critic(state, action) 137 | 138 | # Compute critic loss 139 | critic_loss = F.mse_loss(current_Q, target_Q) 140 | 141 | # Optimize the critic 142 | self.critic_optimizer.zero_grad() 143 | critic_loss.backward() 144 | self.critic_optimizer.step() 145 | 146 | # Compute actor loss 147 | actor_loss = -self.critic(state, self.actor(state)).mean() 148 | 149 | # Optimize the actor 150 | self.actor_optimizer.zero_grad() 151 | actor_loss.backward() 152 | self.actor_optimizer.step() 153 | 154 | # Update the frozen target models 155 | for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): 156 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 157 | 158 | for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): 159 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 160 | 161 | self.num_actor_update_iteration += 1 162 | self.num_critic_update_iteration += 1 163 | 164 | def save(self, directory, name, i): 165 | torch.save(self.actor.state_dict(), directory + name + '_' + str(i) + '_actor.pth') 166 | torch.save(self.critic.state_dict(), directory + name + '_' + str(i) + '_critic.pth') 167 | #print("====================================") 168 | #print("Model has been saved...") 169 | #print("====================================") 170 | 171 | def load(self, directory, name, i): 172 | self.actor.load_state_dict(torch.load(directory + name + '_' + str(i) + '_actor.pth')) 173 | self.critic.load_state_dict(torch.load(directory + name + '_' + str(i) + '_critic.pth')) 174 | #print("====================================") 175 | #print("model has been loaded...") 176 | #print("====================================") 177 | 178 | 179 | -------------------------------------------------------------------------------- /model/DDPG.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | deep deterministic policy gradient 5 | """ 6 | 7 | import random 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | 15 | 16 | class Replay_buffer(): 17 | def __init__(self, max_size): 18 | self.storage = [] 19 | self.max_size = max_size 20 | self.ptr = 0 21 | 22 | def push(self, data): 23 | if len(self.storage) == self.max_size: 24 | self.storage[int(self.ptr)] = data 25 | self.ptr = (self.ptr + 1) % self.max_size 26 | else: 27 | self.storage.append(data) 28 | 29 | # 从replay_buffer采样batch_size条数据; 数组x, y, u, r, d分别储存batch_size个采样到的x, y, u, r, d 30 | def sample(self, batch_size): 31 | ind = np.random.randint(0, len(self.storage), size=batch_size) 32 | x, y, u, r, d = [], [], [], [], [] 33 | 34 | for i in ind: 35 | X, Y, U, R, D = self.storage[i] 36 | x.append(np.array(X, copy=False)) 37 | y.append(np.array(Y, copy=False)) 38 | u.append(np.array(U, copy=False)) 39 | r.append(np.array(R, copy=False)) 40 | d.append(np.array(D, copy=False)) 41 | 42 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1) 43 | 44 | 45 | class Actor(nn.Module): 46 | def __init__(self, state_dim, action_dim, max_action): 47 | super(Actor, self).__init__() 48 | 49 | self.l1 = nn.Linear(state_dim, 400) 50 | self.l2 = nn.Linear(400, 300) 51 | self.l3 = nn.Linear(300, action_dim) 52 | 53 | self.max_action = max_action 54 | 55 | def forward(self, x): 56 | x = F.relu(self.l1(x)) # 根据relu函数修改, 小于0的值变成0, 大于0的值不变 57 | x = F.relu(self.l2(x)) 58 | # 根据tanh函数修改(relu函数的平滑版本); tanh函数值域-1至1, 所以乘max_action将值域扩展为需要的值域 59 | x = self.max_action * torch.tanh(self.l3(x)) 60 | return x 61 | 62 | 63 | class Critic(nn.Module): 64 | def __init__(self, state_dim, action_dim): 65 | super(Critic, self).__init__() 66 | 67 | self.l1 = nn.Linear(state_dim + action_dim, 400) 68 | self.l2 = nn.Linear(400 , 300) 69 | self.l3 = nn.Linear(300, 1) 70 | 71 | def forward(self, x, u): 72 | x = F.relu(self.l1(torch.cat([x, u], 1))) 73 | x = F.relu(self.l2(x)) 74 | x = self.l3(x) 75 | return x 76 | 77 | 78 | class DDPG(object): 79 | def __init__(self, state_dim, action_dim, max_action,capacity,device): 80 | self.device = device 81 | self.actor = Actor(state_dim, action_dim, max_action).to(self.device) # actor网络 82 | self.actor_target = Actor(state_dim, action_dim, max_action).to(self.device) # actor_target网络 83 | self.actor_target.load_state_dict(self.actor.state_dict()) 84 | self.actor_optimizer = optim.Adam(self.actor.parameters(), 1e-3) # 优化器 85 | 86 | self.critic = Critic(state_dim, action_dim).to(self.device) # critic网络 87 | self.critic_target = Critic(state_dim, action_dim).to(self.device) # critic_target网络 88 | self.critic_target.load_state_dict(self.critic.state_dict()) 89 | self.critic_optimizer = optim.Adam(self.critic.parameters(), 1e-3) 90 | 91 | self.replay_buffer = Replay_buffer(capacity) 92 | self.num_critic_update_iteration = 0 93 | self.num_actor_update_iteration = 0 94 | self.num_training = 0 95 | 96 | def select_action(self, state): 97 | # FloatTensor建立FloatTensor类型; reshape(1,-1)指无论多少行列, 都将其变成一行 98 | state = torch.FloatTensor(state.reshape(1, -1)).to(self.device) 99 | # cpu():提取CPU的data数据, numpy():tensor转numpy, flatten():降成一行 100 | return self.actor(state).cpu().data.numpy().flatten() 101 | 102 | # update一次训练update_iteration批次, 每批次学习batch_size条数据, 即update一次学习(update_iteration*batch_size)条数据 103 | def update(self,tau=0.005,batch_size=64,update_iteration=10): 104 | for it in range(update_iteration): 105 | # Sample replay buffer 106 | x, y, u, r, d = self.replay_buffer.sample(batch_size) 107 | state = torch.FloatTensor(x).to(self.device) 108 | action = torch.FloatTensor(u).to(self.device) 109 | next_state = torch.FloatTensor(y).to(self.device) 110 | reward = torch.FloatTensor(r).to(self.device) 111 | done = torch.FloatTensor(d).to(self.device) 112 | 113 | # Compute the target Q value 114 | target_Q = self.critic_target(next_state, self.actor_target(next_state)) 115 | target_Q = reward + ((1 - done) * 0.99 * target_Q).detach() 116 | 117 | # Get current Q estimate 118 | current_Q = self.critic(state, action) 119 | 120 | # Compute critic loss 121 | critic_loss = F.mse_loss(current_Q, target_Q) 122 | 123 | # Optimize the critic 124 | self.critic_optimizer.zero_grad() 125 | critic_loss.backward() 126 | self.critic_optimizer.step() 127 | 128 | # Compute actor loss 129 | actor_loss = -self.critic(state, self.actor(state)).mean() 130 | 131 | # Optimize the actor 132 | self.actor_optimizer.zero_grad() 133 | actor_loss.backward() 134 | self.actor_optimizer.step() 135 | 136 | # Update the frozen target models 137 | for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): 138 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 139 | 140 | for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): 141 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 142 | 143 | self.num_actor_update_iteration += 1 144 | self.num_critic_update_iteration += 1 145 | 146 | def save(self, directory, i): 147 | torch.save(self.actor.state_dict(), directory + str(i) + '_actor.pth') 148 | torch.save(self.critic.state_dict(), directory + str(i) + '_critic.pth') 149 | print("====================================") 150 | print("Model has been saved...") 151 | print("====================================") 152 | 153 | def load(self, directory, i): 154 | self.actor.load_state_dict(torch.load(directory + str(i) + '_actor.pth')) 155 | self.critic.load_state_dict(torch.load(directory + str(i) + '_critic.pth')) 156 | print("====================================") 157 | print("model has been loaded...") 158 | print("====================================") 159 | 160 | 161 | -------------------------------------------------------------------------------- /model/DDPGtest.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | deep deterministic policy gradient test 5 | """ 6 | 7 | import os 8 | import gym 9 | 10 | from DDPG import * 11 | 12 | env_name = 'Pendulum-v0' # 游戏名 13 | env = gym.make(env_name).unwrapped # 创建gym游戏 14 | 15 | # use the cuda or not 16 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 17 | if device == 'cuda': 18 | print('using the GPU...') 19 | else: 20 | print('using the CPU...') 21 | 22 | # 强化学习参数 23 | state_dim = env.observation_space.shape[0] # 状态个数 24 | action_dim = env.action_space.shape[0] # 动作个数 25 | max_action = float(env.action_space.high[0]) # 动作最大值 26 | 27 | # 其它参数 28 | num_episodes = 1 # 训练时走几次 29 | num_steps = 2 # 训练时一次走几步 30 | test_iteration = 10 # 测试时走几次 31 | num_test_steps = 200 # 测试时一次走几步 32 | mode = 'train' # train or test 33 | 34 | retrain = True # 是否重头训练 35 | weight_num = 900 # 载入权重的代数,用于中途继续训练和test情况 36 | log_interval = 100 # 每隔log_interval保存一次参数 37 | print_log = 5 # 每走print_log次输出一次 38 | exploration_noise = 0.1 # 加入随机量 39 | capacity = 5000 # 储存量 40 | 41 | 42 | # create the directory to save the weight and the result 43 | directory = './exp_ddpg_' + env_name +'./' 44 | if not os.path.exists(directory): 45 | os.mkdir(directory) 46 | 47 | # 创建agent 48 | agent = DDPG(state_dim, action_dim, max_action,capacity,device) 49 | 50 | # train 51 | if mode == 'train': 52 | # 是否中途开始训练 53 | if retrain == False: 54 | agent.load(directory, weight_num) 55 | 56 | for i_episode in range(num_episodes): 57 | # 环境回归原位 58 | state = env.reset() 59 | rewards = [] 60 | 61 | # 每次走num_steps步 62 | for t in range(num_steps): 63 | # 选action 64 | action = agent.select_action(state) 65 | 66 | # add noise to action 67 | action = (action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])).clip( 68 | env.action_space.low, env.action_space.high) 69 | 70 | # 环境反馈 71 | next_state, reward, done, info = env.step(action) 72 | rewards.append(reward) 73 | agent.replay_buffer.push((state, next_state, action, reward, np.float(done))) 74 | ''' 75 | for each in agent.replay_buffer.storage: 76 | print(each) 77 | ''' 78 | # 更新state 79 | state = next_state 80 | if done: 81 | break 82 | 83 | # 参数更新是运行完一次更新一次, 不是每走一步更新一次 84 | if len(agent.replay_buffer.storage) >= capacity-1: 85 | agent.update() 86 | 87 | # 保存权重并输出 88 | if i_episode % log_interval == 0 and i_episode != 0: 89 | agent.save(directory, i_episode) 90 | 91 | # 每隔几次输出一次信息 92 | if i_episode % print_log == 0 and i_episode != 0: 93 | # 输出回报 94 | print("Episode: {}, reward: {}".format(i_episode, np.sum(rewards))) 95 | env.close() 96 | # test 97 | elif mode == 'test': 98 | agent.load(directory, weight_num) 99 | print("load weight...") 100 | 101 | for i_episode in range(test_iteration): 102 | state = env.reset() 103 | for t in range(num_test_steps): 104 | action = agent.select_action(state) 105 | 106 | next_state, reward, done, _ = env.step(np.float32(action)) 107 | env.render() 108 | state = next_state 109 | if done: 110 | break 111 | env.close() 112 | else: 113 | raise NameError("mode wrong!!!") 114 | 115 | -------------------------------------------------------------------------------- /model/PG.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | policy Gradient 5 | """ 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.autograd import Variable 11 | import torch.optim as optim 12 | 13 | # 定义全连接神经网络 14 | class Net(nn.Module): 15 | # 搭建网络, state_dim 状态个数, hidden_dim 隐藏层个数, action_dim 动作个数 16 | def __init__(self, state_dim, hidden_dim, action_dim): 17 | super(Net, self).__init__() 18 | 19 | self.l1 = nn.Linear(state_dim, hidden_dim) 20 | self.l2 = nn.Linear(hidden_dim, action_dim) 21 | 22 | # 前向传播 23 | def forward(self, x): 24 | # 各个元素调用relu函数, 为了增加非线性性 25 | x = F.relu(self.l1(x)) 26 | action_scores = self.l2(x) 27 | # 对张量用softmax函数, 得到是0-1之间的值 28 | return F.softmax(action_scores, dim=1) 29 | 30 | 31 | class PolicyGradient: 32 | def __init__(self, state_dim, hidden_dim, action_dim, device): 33 | self.device = device 34 | # 实例化神经网络 to(device)决定使用GPU或者CPU, 实例化时会定义 35 | self.model = Net(state_dim, hidden_dim, action_dim).to(self.device) 36 | # optimizer:优化器, lr:learning_rate 37 | self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3) 38 | 39 | # 选择动作 40 | def select_action(self, state): 41 | # model(state)指将state输入神经网络(model)里面 42 | # 转成Variable形式, probs输出size为action_dim, 值为代表权重的小数 43 | probs = self.model(Variable(state).to(self.device)) 44 | # multinomial()从probs里面按概率取值, probs里面元素代表权重, 元素值越大越容易被取到, 即取权重大的值的下标 45 | action = probs.multinomial(1).data 46 | prob = probs[:, action[0,0]].view(1, -1) 47 | log_prob = prob.log() 48 | entropy = - (probs*probs.log()).sum() 49 | 50 | return action[0], log_prob, entropy 51 | 52 | # 更新: rewards,log_probs,entropies是每运行一次里所有的reward,log_prob,entropie的集合 53 | def update(self, rewards, log_probs, entropies): 54 | # R = 0 tensor类型 一行一列 55 | R = torch.zeros(1, 1) 56 | loss = 0 57 | for i in reversed(range(len(rewards))): 58 | R = 0.99 * R + rewards[i] 59 | loss = loss - (log_probs[i]*(Variable(R).expand_as(log_probs[i])).to(self.device)).sum() - (0.0001*entropies[i].to(self.device)).sum() 60 | loss = loss / len(rewards) 61 | 62 | # 优化网络 63 | self.optimizer.zero_grad() 64 | loss.backward() 65 | # 梯度剪切, 防止梯度消失 66 | #nn.utils.clip_grad_norm(self.model.parameters(), 40) 67 | self.optimizer.step() 68 | 69 | def save(self, directory, name, i): 70 | torch.save(self.model.state_dict(), directory + name + '_' + str(i) + '.pth') 71 | #print("====================================") 72 | #print("Model has been saved...") 73 | #print("====================================") 74 | 75 | def load(self, directory, i): 76 | self.model.load_state_dict(torch.load(directory + str(i) + '.pth')) 77 | print("====================================") 78 | print("model has been loaded...") 79 | print("====================================") 80 | 81 | -------------------------------------------------------------------------------- /model/PGtest.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | policy gradient test 5 | """ 6 | 7 | import os 8 | import gym 9 | import numpy as np 10 | 11 | from PG import * 12 | 13 | # 环境参数 14 | env_name = 'CartPole-v0' 15 | env_seed = False 16 | random_seed = 9527 17 | 18 | # 创建gym游戏 19 | env = gym.make(env_name).unwrapped 20 | # 一般情况下不使用 21 | if env_seed: 22 | env.seed(random_seed) 23 | torch.manual_seed(random_seed) 24 | np.random.seed(random_seed) 25 | 26 | # 强化学习参数 27 | state_dim = env.observation_space.shape[0] # 状态个数 28 | hidden_dim = 400 29 | action_dim = env.action_space.n # 动作个数 30 | 31 | num_episodes = 1000 # 训练时走几次 32 | num_steps = 100 # 训练时一次走几步 33 | test_iteration = 10 # 测试时走几次 34 | num_test_steps = 200 # 测试时一次走几步 35 | mode = 'train' # train or test 36 | retrain = True # 是否重头训练 37 | weight_num = 900 # 载入权重的代数,用于中途继续训练和test情况 38 | log_interval = 100 # 每隔log_interval保存一次参数 39 | print_log = 5 # 每走print_log次输出一次 40 | 41 | # use the cuda or not 42 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 43 | if device == 'cuda': 44 | print('using the GPU...') 45 | else: 46 | print('using the CPU...') 47 | 48 | # 创建agent 49 | agent = PolicyGradient(state_dim, hidden_dim, action_dim, device) 50 | 51 | # create the directory to save the weight and the result 52 | directory = './exp_pg_' + env_name +'./' 53 | #directory = './exp_ddpg_' + env_name +'./' 54 | if not os.path.exists(directory): 55 | os.mkdir(directory) 56 | 57 | if mode == 'train': 58 | # 是否中途开始训练 59 | if retrain == False: 60 | agent.load(directory, weight_num) 61 | 62 | # 训练; num_episodes:走多少次; i_episode:当前走到第几次; num_steps:每次走多少步 63 | for i_episode in range(num_episodes): 64 | # 环境回归原位 65 | state = torch.Tensor([env.reset()]) 66 | 67 | entropies = [] 68 | log_probs = [] 69 | rewards = [] 70 | 71 | # 每次走num_steps步 72 | for t in range(num_steps): 73 | # 选action 74 | action, log_prob, entropy = agent.select_action(state) 75 | action = action.cpu() 76 | 77 | # 环境反馈 78 | # done:是否重新reset环境,大多游戏分为多个环节(episode),当done=true的时候,表示这个环节结束了 79 | next_state, reward, done, _ = env.step(action.numpy()[0]) 80 | 81 | # 这些list记录了每次运行的每一步的数据, 如果需要, 可以提取数据得到训练过程的信息 82 | entropies.append(entropy) 83 | log_probs.append(log_prob) 84 | rewards.append(reward) 85 | 86 | # 更新state 87 | state = torch.Tensor([next_state]) 88 | 89 | if done: 90 | break 91 | 92 | # 参数更新是每走一次更新一次, 不是每走一步更新一次 93 | agent.update(rewards, log_probs, entropies) 94 | 95 | # 保存权重并输出 96 | if i_episode % log_interval == 0 and i_episode != 0: 97 | agent.save(directory, i_episode) 98 | 99 | # 每隔几次输出一次信息 100 | if i_episode % print_log == 0 and i_episode != 0: 101 | # 输出回报 102 | print("Episode: {}, reward: {}".format(i_episode, np.sum(rewards))) 103 | env.close() 104 | 105 | elif mode == 'test': 106 | # 载入权重 107 | agent.load(directory, weight_num) 108 | print("load weight...") 109 | 110 | for i_episode in range(test_iteration): 111 | state = torch.Tensor([env.reset()]) 112 | for t in range(num_test_steps): 113 | # 选action 114 | action, log_prob, entropy = agent.select_action(state) 115 | action = action.cpu() 116 | 117 | # 环境反馈 118 | next_state, reward, done, _ = env.step(action.numpy()[0]) 119 | # UI显示 120 | env.render() 121 | # 更新state 122 | state = torch.Tensor([next_state]) 123 | if done: 124 | break 125 | env.close() 126 | else: 127 | print("mode wrong!!!") 128 | 129 | -------------------------------------------------------------------------------- /model/exp_ddpg_Pendulum-v0/900_actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Digi-Metal/Reinforce-learning-based-algorithm-for-dynamic-scheduling-problem-in-steelmaking-workshop/8aba0d8c0c905f5a9dd3d15cc5ff9654274d891b/model/exp_ddpg_Pendulum-v0/900_actor.pth -------------------------------------------------------------------------------- /model/exp_ddpg_Pendulum-v0/900_critic.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Digi-Metal/Reinforce-learning-based-algorithm-for-dynamic-scheduling-problem-in-steelmaking-workshop/8aba0d8c0c905f5a9dd3d15cc5ff9654274d891b/model/exp_ddpg_Pendulum-v0/900_critic.pth -------------------------------------------------------------------------------- /model/exp_pg_CartPole-v0/900.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Digi-Metal/Reinforce-learning-based-algorithm-for-dynamic-scheduling-problem-in-steelmaking-workshop/8aba0d8c0c905f5a9dd3d15cc5ff9654274d891b/model/exp_pg_CartPole-v0/900.pth -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | test.py 5 | 6 | 使用随机算法决策, 运行调度系统, 显示ui, 测试系统可行性 7 | """ 8 | 9 | from ui import * 10 | from utils import * 11 | from environment import * 12 | 13 | # 新建agent 14 | agent0 = InitialAgent(0, 4, 3) 15 | agent1 = ProcessAgent(1, 6, 4) 16 | agent2 = ProcessAgent(2, 5, 6) 17 | agent3 = ProcessAgent(3, 3, 5) 18 | agent4 = ProcessAgent(4, 4, 3) 19 | agent5 = FinalAgent(5, 4, 5, 3, 4) 20 | processAgents = [agent1, agent2, agent3, agent4] 21 | 22 | env = Env() # 新建环境 23 | env.reset() # 环境重置 24 | recordStates = [] # 记录各个时刻的states 25 | recordStates.append(str(env.envStates)) 26 | recordActions = [] # 记录各个时刻的actions 27 | 28 | 29 | ''' 30 | 伪代码: 31 | for t(count): 32 | for each_agent: 33 | (agent进行决策) 34 | selectAction 35 | return nextState, reward 36 | state = nextState 37 | 注意: 38 | env的state和agent的state不一样, 使用前需要转换 39 | ''' 40 | while True: 41 | #password = input("按回车继续:") 42 | # t时刻遍历所有agent一次 43 | env.count += 1 44 | tempActions = [] 45 | 46 | # ===========InitialAgent操作=========== 47 | state = toInitialAgentState(env) # 环境state转换成agent的state 48 | action = agent0.SelectActionRandom(state) # 选择动作 49 | # 环境反馈 50 | states, done = env.initialStep(agent0, action) 51 | reward = -env.count 52 | tempActions.append(action) # 记录InitialAgent操作 53 | env.envStates = states # 更新states 54 | # 如果系统出错 55 | if done: 56 | reward = -1000 57 | print("Decision failure, task failure") 58 | break 59 | 60 | # ===========processAgent依次操作=========== 61 | flag = 0 62 | for eachAgent in processAgents: 63 | state = toProcessAgentState(env, eachAgent) 64 | action = eachAgent.SelectActionRandom(state) 65 | states, done = env.processStep(eachAgent, action) 66 | reward = -env.count 67 | tempActions.append(action) 68 | env.envStates = states 69 | if done: 70 | reward = -1000 71 | print("Decision failure, task failure") 72 | flag = 1 73 | break 74 | if flag == 1: 75 | break 76 | 77 | # ===========FinalAgent操作=========== 78 | state = toFinalAgentState(env,agent2,agent3,agent4,agent5) 79 | action = agent5.SelectActionRandom(state) 80 | states, done = env.finalStep(agent5, action, agent2, agent3, agent4) 81 | reward = -env.count 82 | tempActions.append(action) 83 | env.envStates = states 84 | if done: 85 | reward = -1000 86 | print("Decision failure, task failure") 87 | break 88 | 89 | # =================显示================= 90 | #print("===================") 91 | #print("count:") 92 | #print(env.count) 93 | #print("------state------") 94 | #for each in env.envStates: 95 | # print(each) 96 | #print("------action------") 97 | #for each in tempActions: 98 | # print(each) 99 | # =================显示================= 100 | 101 | # 记录下每t时刻, 整个系统的的states和actions 102 | recordStates.append(str(states)) 103 | recordActions.append(str(tempActions)) 104 | 105 | if env.ifTaskFinish() == 1: 106 | break 107 | 108 | # 保存数据 109 | writeData(recordStates, recordActions) 110 | 111 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | 训练神经网络 5 | DDDPG算法 6 | """ 7 | 8 | import os 9 | from ui import * 10 | from utils import * 11 | from environment import * 12 | 13 | # use the cuda or not 14 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 15 | if device == 'cuda': 16 | print('using the GPU...') 17 | else: 18 | print('using the CPU...') 19 | 20 | # create the directory to save the weight 21 | directory = './weights./' 22 | if not os.path.exists(directory): 23 | os.mkdir(directory) 24 | 25 | # 新建agent 26 | agent0 = InitialAgent(0, 4, 3, 0, 0.1, 500, device) 27 | agent1 = ProcessAgent(1, 6, 4, 0, 0.1, 500, device) 28 | agent2 = ProcessAgent(2, 5, 6, 0, 0.1, 500, device) 29 | agent3 = ProcessAgent(3, 3, 5, 0, 0.1, 500, device) 30 | agent4 = ProcessAgent(4, 4, 3, 0, 0.1, 500, device) 31 | agent5 = LastAgent(5, 2, 4, 5, 0, 0.1, 500, device) 32 | agent6 = LastAgent(5, 3, 4, 3, 0, 0.1, 500, device) 33 | agent7 = LastAgent(5, 4, 4, 4, 0, 0.1, 500, device) 34 | 35 | processAgents = [agent1, agent2, agent3, agent4] 36 | lastAgents = [agent5, agent6, agent7] 37 | agentsLs = [agent0, agent1, agent2, agent3, agent4, agent5, agent6, agent7] 38 | agentsName = ['agent0', 'agent1', 'agent2', 'agent3', 'agent4', 'agent5', 'agent6', 'agent7'] 39 | ls_episode = [0, 0, 0, 0, 0, 0, 0, 0] 40 | 41 | 42 | # 参数 43 | num_episodes = 10000 # 训练多少次 44 | retrain = True # 是否重头训练 45 | weight_num = 900 # 载入权重的代数,用于中途继续训练 46 | log_interval = 100 # 每隔log_interval保存一次参数 47 | print_log = 1 # 每走print_log次输出一次 48 | 49 | env = Env() # 新建环境 50 | # 是否中途开始训练 51 | if retrain == False: 52 | for each in agentsLs: 53 | each.rl.load(directory, str(each), weight_num) 54 | 55 | # 训练 56 | for i_episode in range(num_episodes): 57 | env.reset() # 环境重置 58 | rewards = [] 59 | 60 | # 每次训练 61 | while True: 62 | env.count += 1 63 | ''' 64 | print('-------count-------') 65 | print(env.count) 66 | print('-------envStates-------') 67 | for each in env.envStates: 68 | print(each) 69 | ''' 70 | # ===========InitialAgent操作=========== 71 | init_flag = 0 72 | state = toInitialAgentState(env) # 环境state转换成agent的state 73 | # InitialAgent是否要操作 74 | if initialAgentChoose(agent0, state): 75 | ''' 76 | password = input("按回车继续:") 77 | print('agent num') 78 | print(agent0.processNum) 79 | print('envStates: ') 80 | for each in env.envStates: 81 | print(each) 82 | ''' 83 | # 满足下面条件agent便可以运行, 否则不可运行 84 | for eachTask in range(agent0.taskNum): 85 | if state[eachTask] != 0: 86 | if sum(state[agent0.taskNum+1:]) != agent0.machineNum: 87 | # 全局state转成局部state 88 | s = allSToPartSInit(state, eachTask, agent0.taskNum) 89 | actionAll = [0]*agent0.machineNum 90 | ls_episode[agent0.processNum] += 1 # 记录迭代次数 91 | action = agent0.rl.select_action(s) # 选择动作 92 | action = agent0.rl.add_action_noise(action) # add noise to action 93 | reward, done = env.initReturn(s, action) 94 | ''' 95 | password = input("按回车继续:") 96 | print('state: ') 97 | print(s) 98 | print('action: ') 99 | print(action) 100 | print("reward: ") 101 | print(reward) 102 | print("done: ") 103 | print(done) 104 | ''' 105 | rewards.append(reward) # 记录各步骤reward 106 | # 如果决策出错 107 | if done: 108 | init_flag = 1 109 | if action[0] != 0: 110 | actionAll[action[0]-1] = 1 111 | state[agent0.taskNum+action[0]-1] = 1 112 | next_s = allSToPartSInit(state, eachTask, agent0.taskNum) 113 | # 数据添加入replay_buffer 114 | agent0.rl.replay_buffer.push((s, next_s, action[0], reward, np.float(done))) 115 | states = env.initialStep(agent0, actionAll) 116 | env.envStates = states 117 | ''' 118 | print('envStates: ') 119 | for each in env.envStates: 120 | print(each) 121 | ''' 122 | else: 123 | action = [0]*agent0.machineNum 124 | states, reward, done = env.initialStep(agent0, action) 125 | env.envStates = states # 更新states 126 | 127 | if init_flag == 1: 128 | break 129 | # ===========processAgent依次操作=========== 130 | flag = 0 131 | for eachAgent in processAgents: 132 | process_flag = 0 133 | state = toProcessAgentState(env, eachAgent) 134 | 135 | # processAgent是否要操作 136 | if processAgentChoose(eachAgent, state): 137 | ''' 138 | password = input("按回车继续:") 139 | print('agent num') 140 | print(eachAgent.processNum) 141 | print('state') 142 | print(state) 143 | print('envStates: ') 144 | for each in env.envStates: 145 | print(each) 146 | ''' 147 | # 满足下面条件agent便可以运行, 否则不可运行 148 | for eachTask in range(eachAgent.lastMachineNum): 149 | if state[eachTask] != 0: 150 | if sum(state[eachAgent.lastMachineNum+1:]) != eachAgent.machineNum: 151 | # 全局state转成局部state 152 | s = allSToPartSInit(state, eachTask, eachAgent.lastMachineNum) 153 | actionAll = [0]*eachAgent.machineNum 154 | ls_episode[eachAgent.processNum] += 1 # 记录迭代次数 155 | action = eachAgent.rl.select_action(s) # 选择动作 156 | action = eachAgent.rl.add_action_noise(action) # add noise to action 157 | reward, done = env.initReturn(s, action) 158 | ''' 159 | print('state: ') 160 | print(s) 161 | print('action: ') 162 | print(action) 163 | print("reward: ") 164 | print(reward) 165 | print("done: ") 166 | print(done) 167 | ''' 168 | rewards.append(reward) # 记录各步骤reward 169 | # 如果决策出错 170 | if done: 171 | init_flag = 1 172 | if action[0] != 0: 173 | actionAll[action[0]-1] = 1 174 | state[eachAgent.lastMachineNum+action[0]-1] = 1 175 | next_s = allSToPartSInit(state, eachTask, eachAgent.lastMachineNum) 176 | # 数据添加入replay_buffer 177 | eachAgent.rl.replay_buffer.push((s, next_s, action[0], reward, np.float(done))) 178 | states = env.processStep(eachAgent, actionAll) 179 | env.envStates = states 180 | ''' 181 | print('envStates: ') 182 | for each in env.envStates: 183 | print(each) 184 | ''' 185 | else: 186 | action = [0]*eachAgent.machineNum 187 | states = env.processStep(eachAgent, action) 188 | env.envStates = states 189 | 190 | if process_flag == 1: 191 | break 192 | if flag == 1: 193 | break 194 | 195 | # ===========lastAgent依次操作=========== 196 | flag = 0 197 | for eachAgent in lastAgents: 198 | last_flag = 0 199 | state = toLastAgentState(env, eachAgent) 200 | 201 | # processAgent是否要操作 202 | if processAgentChoose(eachAgent, state): 203 | ''' 204 | password = input("按回车继续:") 205 | print('agent num') 206 | print(eachAgent.processNum) 207 | print('state: ') 208 | print(state) 209 | print('envStates: ') 210 | for each in env.envStates: 211 | print(each) 212 | ''' 213 | # 满足下面条件agent便可以运行, 否则不可运行 214 | for eachTask in range(eachAgent.lastMachineNum): 215 | if state[eachTask] != 0: 216 | if sum(state[eachAgent.lastMachineNum+1:]) != eachAgent.machineNum: 217 | # 全局state转成局部state 218 | s = allSToPartSInit(state, eachTask, eachAgent.lastMachineNum) 219 | actionAll = [0]*eachAgent.machineNum 220 | ls_episode[eachAgent.processNum] += 1 # 记录迭代次数 221 | action = eachAgent.rl.select_action(s) # 选择动作 222 | action = eachAgent.rl.add_action_noise(action) # add noise to action 223 | reward, done = env.initReturn(s, action) 224 | ''' 225 | print('state: ') 226 | print(s) 227 | print('action: ') 228 | print(action) 229 | print("reward: ") 230 | print(reward) 231 | print("done: ") 232 | print(done) 233 | ''' 234 | rewards.append(reward) # 记录各步骤reward 235 | # 如果决策出错 236 | if done: 237 | init_flag = 1 238 | if action[0] != 0: 239 | actionAll[action[0]-1] = 1 240 | state[eachAgent.lastMachineNum+action[0]-1] = 1 241 | next_s = allSToPartSInit(state, eachTask, eachAgent.lastMachineNum) 242 | # 数据添加入replay_buffer 243 | eachAgent.rl.replay_buffer.push((s, next_s, action[0], reward, np.float(done))) 244 | states = env.processStep(eachAgent, actionAll) 245 | env.envStates = states 246 | ''' 247 | print('envStates: ') 248 | for each in env.envStates: 249 | print(each) 250 | ''' 251 | else: 252 | action = [0]*eachAgent.machineNum 253 | states, reward, done = env.lastStep(eachAgent, action) 254 | env.envStates = states 255 | 256 | if last_flag == 1: 257 | break 258 | if flag == 1: 259 | break 260 | 261 | # 调度系统正常完成 262 | if env.ifTaskFinish() == 1: 263 | print("Decision succeed") 264 | break 265 | 266 | # ===========其它操作=========== 267 | # update 268 | for each in agentsLs: 269 | if len(each.rl.replay_buffer.storage) >= each.capacity-1: 270 | each.rl.update() 271 | 272 | # save 273 | for each in range(len(agentsName)): 274 | if ls_episode[each] % log_interval == 0 and ls_episode[each] != 0: 275 | agentsLs[each].rl.save(directory, agentsName[each], ls_episode[each]) 276 | 277 | # 每隔几次输出一次信息 278 | if i_episode % print_log == 0 and i_episode != 0: 279 | # 输出回报 280 | print("Episode: {}, sum reward: {}".format(i_episode, np.sum(rewards))) 281 | 282 | -------------------------------------------------------------------------------- /trainPG.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | 训练神经网络 5 | PG算法 6 | """ 7 | 8 | import os 9 | from ui import * 10 | from utils import * 11 | from environment import * 12 | 13 | # use the cuda or not 14 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 15 | if device == 'cuda': 16 | print('using the GPU...') 17 | else: 18 | print('using the CPU...') 19 | 20 | # create the directory to save the weight 21 | directory = './weights./' 22 | if not os.path.exists(directory): 23 | os.mkdir(directory) 24 | 25 | # 新建agent 26 | agent0 = InitialAgent(0, 4, 3, 0, 0.1, 500, device) 27 | agent1 = ProcessAgent(1, 6, 4, 0, 0.1, 500, device) 28 | agent2 = ProcessAgent(2, 5, 6, 0, 0.1, 500, device) 29 | agent3 = ProcessAgent(3, 3, 5, 0, 0.1, 500, device) 30 | agent4 = ProcessAgent(4, 4, 3, 0, 0.1, 500, device) 31 | agent5 = LastAgent(5, 2, 4, 5, 0, 0.1, 500, device) 32 | agent6 = LastAgent(5, 3, 4, 3, 0, 0.1, 500, device) 33 | agent7 = LastAgent(5, 4, 4, 4, 0, 0.1, 500, device) 34 | 35 | processAgents = [agent1, agent2, agent3, agent4] 36 | lastAgents = [agent5, agent6, agent7] 37 | agentsLs = [agent0, agent1, agent2, agent3, agent4, agent5, agent6, agent7] 38 | agentsName = ['agent0', 'agent1', 'agent2', 'agent3', 'agent4', 'agent5', 'agent6', 'agent7'] 39 | ls_episode = [0, 0, 0, 0, 0, 0, 0, 0] 40 | 41 | 42 | # 参数 43 | num_episodes = 10000 # 训练多少次 44 | retrain = True # 是否重头训练 45 | weight_num = 900 # 载入权重的代数,用于中途继续训练 46 | log_interval = 100 # 每隔log_interval保存一次参数 47 | print_log = 1 # 每走print_log次输出一次 48 | 49 | env = Env() # 新建环境 50 | # 是否中途开始训练 51 | if retrain == False: 52 | for each in agentsLs: 53 | each.rl.load(directory, str(each), weight_num) 54 | 55 | # 训练 56 | for i_episode in range(num_episodes): 57 | env.reset() # 环境重置 58 | 59 | rewards0=[] 60 | log_probs0=[] 61 | entropies0=[] 62 | rewards1=[] 63 | log_probs1=[] 64 | entropies1=[] 65 | rewards2=[] 66 | log_probs2=[] 67 | entropies2=[] 68 | rewards3=[] 69 | log_probs3=[] 70 | entropies3=[] 71 | rewards4=[] 72 | log_probs4=[] 73 | entropies4=[] 74 | rewards5=[] 75 | log_probs5=[] 76 | entropies5=[] 77 | rewards6=[] 78 | log_probs6=[] 79 | entropies6=[] 80 | rewards7=[] 81 | log_probs7=[] 82 | entropies7=[] 83 | 84 | # 每次训练 85 | while True: 86 | password = input("按回车继续:") 87 | 88 | for each in env.envStates: 89 | print(each) 90 | 91 | env.count += 1 92 | # ===========Agent0操作=========== 93 | init_flag = 0 94 | state = toInitialAgentState(env) # 环境state转换成agent的state 95 | if initialAgentChoose(agent0, state): 96 | # 满足下面条件agent便可以运行, 否则不可运行 97 | for eachTask in range(agent0.taskNum): 98 | if state[eachTask] != 0: 99 | if sum(state[agent0.taskNum+1:]) != agent0.machineNum: 100 | 101 | # 全局state转成局部state 102 | s = allSToPartSInit(state, eachTask, agent0.taskNum) 103 | #print(s) 104 | actionAll = [0]*agent0.machineNum 105 | ls_episode[agent0.processNum] += 1 # 记录迭代次数 106 | 107 | action, log_prob, entropy = agent0.rl.select_action(torch.Tensor([s])) # 选择动作 108 | action = action.cpu() 109 | reward, done = env.initReturn(s, action) 110 | print("agent0") 111 | print(s) 112 | print(action) 113 | print(reward) 114 | entropies0.append(entropy) 115 | log_probs0.append(log_prob) 116 | rewards0.append(reward) 117 | # 如果决策出错 118 | if done: 119 | init_flag = 1 120 | if action[0] != 0: 121 | actionAll[action[0]-1] = 1 122 | state[agent0.taskNum+action[0]-1] = 1 123 | states = env.initialStep(agent0, actionAll) 124 | else: 125 | action = [0]*agent0.machineNum 126 | states = env.initialStep(agent0, action) 127 | env.envStates = states # 更新states 128 | if init_flag == 1: 129 | break 130 | #password = input("按回车继续:") 131 | # ===========Agent1操作=========== 132 | init_flag = 0 133 | state = toProcessAgentState(env, agent1) # 环境state转换成agent的state 134 | if processAgentChoose(agent1, state): 135 | # 满足下面条件agent便可以运行, 否则不可运行 136 | for eachTask in range(agent1.lastMachineNum): 137 | if state[eachTask] != 0: 138 | if sum(state[agent1.lastMachineNum+1:]) != agent1.machineNum: 139 | # 全局state转成局部state 140 | s = allSToPartSInit(state, eachTask, agent1.lastMachineNum) 141 | 142 | actionAll = [0]*agent1.machineNum 143 | ls_episode[agent1.processNum] += 1 # 记录迭代次数 144 | 145 | action, log_prob, entropy = agent1.rl.select_action(torch.Tensor([s])) # 选择动作 146 | action = action.cpu() 147 | reward, done = env.initReturn(s, action) 148 | 149 | entropies1.append(entropy) 150 | log_probs1.append(log_prob) 151 | rewards1.append(reward) 152 | # 如果决策出错 153 | if done: 154 | init_flag = 1 155 | if action[0] != 0: 156 | actionAll[action[0]-1] = 1 157 | states = env.processStep(agent1, actionAll) 158 | else: 159 | action = [0]*agent1.machineNum 160 | states = env.processStep(agent1, action) 161 | env.envStates = states # 更新states 162 | if init_flag == 1: 163 | break 164 | # ===========Agent2操作=========== 165 | init_flag = 0 166 | state = toProcessAgentState(env, agent2) # 环境state转换成agent的state 167 | if processAgentChoose(agent2, state): 168 | # 满足下面条件agent便可以运行, 否则不可运行 169 | for eachTask in range(agent2.lastMachineNum): 170 | if state[eachTask] != 0: 171 | if sum(state[agent2.lastMachineNum+1:]) != agent2.machineNum: 172 | # 全局state转成局部state 173 | s = allSToPartSInit(state, eachTask, agent2.lastMachineNum) 174 | 175 | actionAll = [0]*agent2.machineNum 176 | ls_episode[agent2.processNum] += 1 # 记录迭代次数 177 | 178 | action, log_prob, entropy = agent2.rl.select_action(torch.Tensor([s])) # 选择动作 179 | action = action.cpu() 180 | reward, done = env.initReturn(s, action) 181 | 182 | entropies2.append(entropy) 183 | log_probs2.append(log_prob) 184 | rewards2.append(reward) 185 | # 如果决策出错 186 | if done: 187 | init_flag = 1 188 | if action[0] != 0: 189 | actionAll[action[0]-1] = 1 190 | states = env.processStep(agent2, actionAll) 191 | else: 192 | action = [0]*agent2.machineNum 193 | states = env.processStep(agent2, action) 194 | env.envStates = states # 更新states 195 | if init_flag == 1: 196 | break 197 | # ===========Agent3操作=========== 198 | init_flag = 0 199 | state = toProcessAgentState(env, agent3) # 环境state转换成agent的state 200 | if processAgentChoose(agent3, state): 201 | # 满足下面条件agent便可以运行, 否则不可运行 202 | for eachTask in range(agent3.lastMachineNum): 203 | if state[eachTask] != 0: 204 | if sum(state[agent3.lastMachineNum+1:]) != agent3.machineNum: 205 | # 全局state转成局部state 206 | s = allSToPartSInit(state, eachTask, agent3.lastMachineNum) 207 | 208 | actionAll = [0]*agent3.machineNum 209 | ls_episode[agent3.processNum] += 1 # 记录迭代次数 210 | 211 | action, log_prob, entropy = agent3.rl.select_action(torch.Tensor([s])) # 选择动作 212 | action = action.cpu() 213 | reward, done = env.initReturn(s, action) 214 | 215 | entropies3.append(entropy) 216 | log_probs3.append(log_prob) 217 | rewards3.append(reward) 218 | # 如果决策出错 219 | if done: 220 | init_flag = 1 221 | if action[0] != 0: 222 | actionAll[action[0]-1] = 1 223 | states = env.processStep(agent3, actionAll) 224 | else: 225 | action = [0]*agent3.machineNum 226 | states = env.processStep(agent3, action) 227 | env.envStates = states # 更新states 228 | if init_flag == 1: 229 | break 230 | # ===========Agent4操作=========== 231 | init_flag = 0 232 | state = toProcessAgentState(env, agent4) # 环境state转换成agent的state 233 | if processAgentChoose(agent4, state): 234 | # 满足下面条件agent便可以运行, 否则不可运行 235 | for eachTask in range(agent4.lastMachineNum): 236 | if state[eachTask] != 0: 237 | if sum(state[agent4.lastMachineNum+1:]) != agent4.machineNum: 238 | # 全局state转成局部state 239 | s = allSToPartSInit(state, eachTask, agent4.lastMachineNum) 240 | 241 | actionAll = [0]*agent4.machineNum 242 | ls_episode[agent4.processNum] += 1 # 记录迭代次数 243 | 244 | action, log_prob, entropy = agent4.rl.select_action(torch.Tensor([s])) # 选择动作 245 | action = action.cpu() 246 | reward, done = env.initReturn(s, action) 247 | 248 | entropies4.append(entropy) 249 | log_probs4.append(log_prob) 250 | rewards4.append(reward) 251 | # 如果决策出错 252 | if done: 253 | init_flag = 1 254 | if action[0] != 0: 255 | actionAll[action[0]-1] = 1 256 | states = env.processStep(agent4, actionAll) 257 | else: 258 | action = [0]*agent4.machineNum 259 | states = env.processStep(agent4, action) 260 | env.envStates = states # 更新states 261 | if init_flag == 1: 262 | break 263 | # ===========Agent5操作=========== 264 | init_flag = 0 265 | state = toLastAgentState(env, agent5) # 环境state转换成agent的state 266 | if processAgentChoose(agent5, state): 267 | # 满足下面条件agent便可以运行, 否则不可运行 268 | for eachTask in range(agent5.lastMachineNum): 269 | if state[eachTask] != 0: 270 | if sum(state[agent5.lastMachineNum+1:]) != agent5.machineNum: 271 | # 全局state转成局部state 272 | s = allSToPartSInit(state, eachTask, agent5.lastMachineNum) 273 | 274 | actionAll = [0]*agent5.machineNum 275 | ls_episode[agent5.processNum] += 1 # 记录迭代次数 276 | 277 | action, log_prob, entropy = agent5.rl.select_action(torch.Tensor([s])) # 选择动作 278 | action = action.cpu() 279 | reward, done = env.initReturn(s, action) 280 | 281 | entropies5.append(entropy) 282 | log_probs5.append(log_prob) 283 | rewards5.append(reward) 284 | # 如果决策出错 285 | if done: 286 | init_flag = 1 287 | if action[0] != 0: 288 | actionAll[action[0]-1] = 1 289 | states = env.processStep(agent5, actionAll) 290 | else: 291 | action = [0]*agent5.machineNum 292 | states = env.processStep(agent5, action) 293 | env.envStates = states # 更新states 294 | if init_flag == 1: 295 | break 296 | # ===========Agent6操作=========== 297 | init_flag = 0 298 | state = toLastAgentState(env, agent6) # 环境state转换成agent的state 299 | if processAgentChoose(agent6, state): 300 | # 满足下面条件agent便可以运行, 否则不可运行 301 | for eachTask in range(agent6.lastMachineNum): 302 | if state[eachTask] != 0: 303 | if sum(state[agent6.lastMachineNum+1:]) != agent6.machineNum: 304 | # 全局state转成局部state 305 | s = allSToPartSInit(state, eachTask, agent6.lastMachineNum) 306 | 307 | actionAll = [0]*agent6.machineNum 308 | ls_episode[agent6.processNum] += 1 # 记录迭代次数 309 | 310 | action, log_prob, entropy = agent6.rl.select_action(torch.Tensor([s])) # 选择动作 311 | action = action.cpu() 312 | reward, done = env.initReturn(s, action) 313 | 314 | entropies6.append(entropy) 315 | log_probs6.append(log_prob) 316 | rewards6.append(reward) 317 | # 如果决策出错 318 | if done: 319 | init_flag = 1 320 | if action[0] != 0: 321 | actionAll[action[0]-1] = 1 322 | states = env.processStep(agent6, actionAll) 323 | else: 324 | action = [0]*agent6.machineNum 325 | states = env.processStep(agent6, action) 326 | env.envStates = states # 更新states 327 | if init_flag == 1: 328 | break 329 | # ===========Agent7操作=========== 330 | init_flag = 0 331 | state = toLastAgentState(env, agent7) # 环境state转换成agent的state 332 | if processAgentChoose(agent7, state): 333 | # 满足下面条件agent便可以运行, 否则不可运行 334 | for eachTask in range(agent7.lastMachineNum): 335 | if state[eachTask] != 0: 336 | if sum(state[agent7.lastMachineNum+1:]) != agent7.machineNum: 337 | # 全局state转成局部state 338 | s = allSToPartSInit(state, eachTask, agent7.lastMachineNum) 339 | 340 | actionAll = [0]*agent7.machineNum 341 | ls_episode[agent7.processNum] += 1 # 记录迭代次数 342 | 343 | action, log_prob, entropy = agent7.rl.select_action(torch.Tensor([s])) # 选择动作 344 | action = action.cpu() 345 | reward, done = env.initReturn(s, action) 346 | 347 | entropies7.append(entropy) 348 | log_probs7.append(log_prob) 349 | rewards7.append(reward) 350 | # 如果决策出错 351 | if done: 352 | init_flag = 1 353 | if action[0] != 0: 354 | actionAll[action[0]-1] = 1 355 | states = env.processStep(agent7, actionAll) 356 | else: 357 | action = [0]*agent7.machineNum 358 | states = env.processStep(agent7, action) 359 | env.envStates = states # 更新states 360 | if init_flag == 1: 361 | break 362 | 363 | 364 | # 调度系统正常完成 365 | if env.ifTaskFinish() == 1: 366 | #print("Decision succeed") 367 | break 368 | # ===========其它操作=========== 369 | # update 370 | if len(rewards0) != 0: 371 | agent0.rl.update(rewards0, log_probs0, entropies0) 372 | if len(rewards1) != 0: 373 | agent1.rl.update(rewards1, log_probs1, entropies1) 374 | if len(rewards2) != 0: 375 | agent2.rl.update(rewards2, log_probs2, entropies2) 376 | if len(rewards3) != 0: 377 | agent3.rl.update(rewards3, log_probs3, entropies3) 378 | if len(rewards4) != 0: 379 | agent4.rl.update(rewards4, log_probs4, entropies4) 380 | if len(rewards5) != 0: 381 | agent5.rl.update(rewards5, log_probs5, entropies5) 382 | if len(rewards6) != 0: 383 | agent6.rl.update(rewards6, log_probs6, entropies6) 384 | if len(rewards7) != 0: 385 | agent7.rl.update(rewards7, log_probs7, entropies7) 386 | 387 | # save 388 | for each in range(len(agentsName)): 389 | if ls_episode[each] % log_interval == 0 and ls_episode[each] != 0: 390 | agentsLs[each].rl.save(directory, agentsName[each], ls_episode[each]) 391 | 392 | # 每隔几次输出一次信息 393 | if i_episode % print_log == 0 and i_episode != 0: 394 | # 输出回报 395 | print("Episode: {}".format(i_episode)) 396 | 397 | -------------------------------------------------------------------------------- /ui.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | ui.py 5 | pyqtgraph-0.10.0版 6 | 7 | 可视化ui界面 8 | """ 9 | 10 | from PyQt5 import QtWidgets 11 | from PyQt5 import QtCore, QtGui 12 | import pyqtgraph as pg 13 | import sys 14 | 15 | ''' 16 | 辅助函数, 根据state返回甘特图的框的位置 17 | 输入设备的位置: 流程号和设备号 18 | 返回对应的甘特图框的坐标, list格式 19 | ''' 20 | def gantPosition(process, num): 21 | ls = [[26, 25, 24, 23], 22 | [22, 21, 20, 19, 18, 17], 23 | [16, 15, 14, 13, 12], 24 | [11, 10, 9], 25 | [ 8, 7, 6, 5], 26 | [ 4, 3, 2, 1]] 27 | return ls[process][num] 28 | 29 | ''' 30 | 甘特图中的框的类 31 | 输入一条数据,list类型, 输出是数据对应甘特图的框 32 | 继承于pyqtgraph的GraphicsObject类 33 | ''' 34 | class BarItem(pg.GraphicsObject): 35 | def __init__(self, states, times): 36 | self.gant_wide = 0.2 # 甘特图的框的宽度的一半 37 | self.states = states 38 | self.times = times 39 | pg.GraphicsObject.__init__(self) 40 | self.generatePicture() # 实例化时即调用 41 | 42 | # 根据states数据画框 43 | def generatePicture(self): 44 | states = self.states 45 | times = self.times 46 | self.picture = QtGui.QPicture() # 实例化一个绘图设备 47 | p = QtGui.QPainter(self.picture) # 在picture上实例化QPainter用于绘图 48 | 49 | machineStates = states[1:] 50 | # 遍历每一个设备, 判断并画框或划线, 损坏,空闲 51 | for eachprocess in range(len(machineStates)): 52 | for each in range(len(machineStates[eachprocess])): 53 | data = machineStates[eachprocess][each] # 获取设备的状态 54 | # 设备在运行job时, 画框 55 | if data[0] == 1: 56 | # 选颜色, task1为红, task2为黄, task1为绿 57 | if data[1] == 1: 58 | p.setPen(pg.mkPen('r')) 59 | p.setBrush(pg.mkBrush('r')) # 设置画刷颜色为红 60 | elif data[1] == 2: 61 | p.setPen(pg.mkPen('g')) 62 | p.setBrush(pg.mkBrush('g')) # 设置画刷颜色为绿 63 | elif data[1] == 3: 64 | p.setPen(pg.mkPen('y')) 65 | p.setBrush(pg.mkBrush('y')) # 设置画刷颜色为黄 66 | # 绘制箱子,格式:(a,b,c,d)为左下角xy坐标,向xy轴正方向占多少距离 67 | coord_left = gantPosition(eachprocess, each) 68 | p.drawRect(QtCore.QRectF(times,coord_left-self.gant_wide,1,2*self.gant_wide)) 69 | # job占用在设备时, 画白框 70 | elif data[0] == 3: 71 | p.setPen(pg.mkPen('b')) 72 | p.setBrush(pg.mkBrush('b')) 73 | coord_left = gantPosition(eachprocess, each) 74 | p.drawRect(QtCore.QRectF(times,coord_left-self.gant_wide,1,2*self.gant_wide)) 75 | p.end() 76 | 77 | # 不用管这个, 这个函数是pg.GraphicsObject类有关的 78 | def paint(self, p, *args): 79 | p.drawPicture(0, 0, self.picture) 80 | 81 | # # 不用管这个, 这个函数是pg.GraphicsObject类有关的 82 | def boundingRect(self): 83 | return QtCore.QRectF(self.picture.boundingRect()) 84 | 85 | 86 | # 主窗口类 87 | class MainUi(QtWidgets.QMainWindow): 88 | def __init__(self): 89 | super().__init__() 90 | 91 | # 添加主窗口 92 | self.setWindowTitle("在线调度系统 0.0.1版") # 设置窗口标题 93 | self.main_widget = QtWidgets.QWidget() # 实例化一个主部件 94 | self.main_layout = QtWidgets.QGridLayout() # 实例化一个网格布局层 95 | self.main_widget.setLayout(self.main_layout) # 设置主部件布局为网格布局 96 | self.setCentralWidget(self.main_widget) # 设置窗口默认部件为主部件 97 | 98 | # 添加主窗口的按钮,下拉框,文本输入框等部件 99 | self.stock_code = QtWidgets.QLineEdit() # 创建一个文本输入框部件 100 | self.option_sel = QtWidgets.QComboBox() # 创建一个下拉框部件 101 | self.option_sel.addItem("甘特图") # 增加下拉框选项 102 | self.option_sel.addItem("设备状态") 103 | self.que_btn = QtWidgets.QPushButton("运行") # 创建一个按钮部件 104 | 105 | # 创建空白图形,用于放置甘特图 106 | pg.setConfigOption('background', 'w') # 背景为白 107 | self.gant_widget = QtWidgets.QWidget() # 实例化widget部件作为甘特图部件 108 | self.gant_layout = QtWidgets.QGridLayout() 109 | self.gant_widget.setLayout(self.gant_layout) 110 | self.gant_plt = pg.PlotWidget() # 实例化一个绘图部件 111 | self.gant_layout.addWidget(self.gant_plt) # 添加绘图部件到网格布局层 112 | 113 | # 设置部件的布局位置,格式(部件,a,b,c,d):添加到第a行第b列,占c行占d列 114 | self.main_layout.addWidget(self.stock_code,0,0,1,1) 115 | self.main_layout.addWidget(self.option_sel,0,1,1,1) 116 | self.main_layout.addWidget(self.que_btn,0,2,1,1) 117 | self.main_layout.addWidget(self.gant_widget,1,0,3,3) 118 | 119 | # 画一个t时刻的state的甘特图 120 | def plotGantGraph(self, states, times): 121 | item = BarItem(states, times) 122 | self.gant_plt.addItem(item, ) # 在绘图部件中添加甘特图项目 123 | self.gant_plt.showGrid(x=True, y=True) # 设置绘图部件显示网格线 124 | self.gant_plt.setYRange(max=26,min=0) 125 | self.gant_plt.setLabel(axis='left', text='设备') # 设置Y轴标签 126 | self.gant_plt.setLabel(axis='bottom', text='运行时间') # 设置X轴标签 127 | 128 | 129 | # 获取数据, 显示ui 130 | def main(): 131 | # 修改需要显示的文件名 132 | filename = "record_2020_04_12_00_37_54" 133 | 134 | # 读取文件 135 | ls = [] 136 | with open("record/" + filename + "/state_record.txt") as f: 137 | for eachLine in f: 138 | ls.append(eval(eachLine)) 139 | 140 | # 画图 141 | app = QtWidgets.QApplication(sys.argv) 142 | gui = MainUi() 143 | for each in range(len(ls)): 144 | gui.plotGantGraph(ls[each], each) 145 | gui.show() 146 | sys.exit(app.exec_()) 147 | 148 | if __name__ == '__main__': 149 | main() 150 | 151 | ''' 152 | # 待改进: 153 | Y轴要把数字换成设备号, 可以考虑隐藏y轴 154 | 坐标轴分度值 155 | 运输部分没有连线 156 | 看看能不能搞个x轴拖动条 157 | 158 | # 在运输job时, 画线 159 | elif data[0] == 2 and data[-1] == 1: 160 | if data[1] == 1: 161 | p.setPen(pg.mkPen('r')) 162 | elif data[1] == 2: 163 | p.setPen(pg.mkPen('g')) 164 | elif data[1] == 3: 165 | p.setPen(pg.mkPen('y')) 166 | coord_left = gantPosition(eachprocess, data[2]) 167 | coord_right = gantPosition(eachprocess+1, data[3]) 168 | p.drawLine(QtCore.QPointF(1, coord_left), QtCore.QPointF(3, coord_right))..... 169 | ''' 170 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | 3 | """ 4 | utils.py 5 | 6 | 工具类 7 | """ 8 | 9 | import os 10 | import time 11 | 12 | # 环境的state转换成initial agent的state 13 | def toInitialAgentState(env): 14 | state = [] 15 | for each in env.envStates[0]: 16 | state.append(each) 17 | for each in env.envStates[1]: 18 | if each[0] == 0: 19 | state.append(0) 20 | else: 21 | state.append(1) 22 | return state 23 | 24 | # 环境的state转换成process agent的state 25 | def toProcessAgentState(env, agent): 26 | state = [] 27 | # last agent 28 | for each in env.envStates[agent.processNum]: 29 | if each[0] == 3: 30 | state.append(each[-1]) 31 | else: 32 | state.append(0) 33 | # this agent 34 | for each in env.envStates[agent.processNum+1]: 35 | if each[0] == 0: 36 | state.append(0) 37 | else: 38 | state.append(1) 39 | return state 40 | 41 | # 环境的state转换成final agent的state 42 | def toFinalAgentState(env,agent2,agent3,agent4,agent5): 43 | state = [] 44 | for each in env.envStates[agent2.processNum+1]: 45 | if each[0] == 3: 46 | state.append(each[-1]) 47 | else: 48 | state.append(0) 49 | for each in env.envStates[agent3.processNum+1]: 50 | if each[0] == 3: 51 | state.append(each[-1]) 52 | else: 53 | state.append(0) 54 | for each in env.envStates[agent4.processNum+1]: 55 | if each[0] == 3: 56 | state.append(each[-1]) 57 | else: 58 | state.append(0) 59 | # this agent 60 | for each in env.envStates[agent5.processNum+1]: 61 | if each[0] == 0: 62 | state.append(0) 63 | else: 64 | state.append(1) 65 | return state 66 | 67 | # 环境的state转换成last agent的state 68 | def toLastAgentState(env, agent): 69 | state = [] 70 | # last agent 71 | for each in env.envStates[agent.lastProcessNum+1]: 72 | if each[0] == 3 and each[-1] == (agent.lastProcessNum-1): 73 | state.append(each[-1]) 74 | else: 75 | state.append(0) 76 | # this agent 77 | for each in env.envStates[agent.processNum+1]: 78 | if each[0] == 0: 79 | state.append(0) 80 | else: 81 | state.append(1) 82 | return state 83 | 84 | ''' 85 | recordState和recordActions存入txt文件 86 | 文件名: 用当前时间来命名 87 | 内有两个txt文件, 分别储存state和action数据 88 | state是从t=0开始的 89 | action从t=0至t=1之间决策,所以数量比state少1 90 | ''' 91 | def writeData(recordStates, recordActions): 92 | now = time.strftime("%Y_%m_%d_%H_%M_%S",time.localtime(time.time())) 93 | filename = "record/record_" + now + "/" 94 | if not os.path.exists(filename): 95 | # 目录不存在, 进行创建操作 96 | os.makedirs(filename) 97 | with open(filename + "/state_record.txt", 'w') as f: 98 | for eachTimeState in recordStates: 99 | f.write(eachTimeState) 100 | f.write("\n") 101 | with open(filename + "action_record.txt", 'w') as f: 102 | for eachAction in recordActions: 103 | f.write(eachAction) 104 | f.write("\n") 105 | 106 | # initialAgent是否需要进行动作 107 | def initialAgentChoose(agent, state): 108 | stateFront = state[: agent.taskNum] 109 | machineState = state[agent.taskNum :] 110 | if sum(stateFront) == 0: 111 | return 0 112 | elif sum(machineState) == agent.machineNum: 113 | return 0 114 | else: 115 | return 1 116 | 117 | # processAgent是否需要进行动作 118 | def processAgentChoose(agent, state): 119 | stateFront = state[: agent.lastMachineNum] 120 | machineState = state[agent.lastMachineNum :] 121 | if sum(stateFront) == 0: 122 | return 0 123 | elif sum(machineState) == agent.machineNum: 124 | return 0 125 | else: 126 | return 1 127 | 128 | # initialAgent全局state转局部state 129 | def allStateToPartStateInit(taskLoc, macLoc): 130 | s = [] 131 | s.append(taskLoc) 132 | s.append(taskLoc+1) 133 | s.append(macLoc) 134 | return s 135 | 136 | # Agent全局state转局部state 137 | def allStateToPartState(state, taskLoc, macLoc): 138 | s = [] 139 | s.append(taskLoc) 140 | s.append(state[taskLoc]) 141 | s.append(macLoc) 142 | return s 143 | 144 | # initialAgent局部action转成全局action 145 | def partActionToAllActionInit(machineNum, eachMac, action): 146 | actionAll = [] 147 | for i in range(machineNum): 148 | actionAll.append(0) 149 | actionAll[eachMac] = action[0] 150 | 151 | return actionAll 152 | 153 | def allSToPartSInit(state, taskLoc, agentLastNum): 154 | s = [] 155 | s.append(state[taskLoc]) 156 | s.append(taskLoc+1) 157 | s += state[agentLastNum:] 158 | 159 | return s 160 | 161 | --------------------------------------------------------------------------------