├── images ├── fig1 ├── 图片1.png └── 图片2.png ├── cnn.pth ├── eval.py ├── README.md ├── draw.py ├── data.py ├── container.py └── train.py /images/fig1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /cnn.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/cnn.pth -------------------------------------------------------------------------------- /images/图片1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/images/图片1.png -------------------------------------------------------------------------------- /images/图片2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/images/图片2.png -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from train import DQN, solution 3 | import time 4 | from draw import * 5 | 6 | # create the object of DQN class 7 | dqn = DQN() 8 | 9 | bn_state_dict = torch.load('cnn.pth') 10 | dqn.target_net.load_state_dict(bn_state_dict) 11 | 12 | def random_generate(): 13 | idx = np.random.randint(0,len(solution)) 14 | box_list = solution[idx] 15 | gen_box_order = [] 16 | while True: 17 | index = np.random.randint(0, len(box_list)) 18 | box_list[index] = (box_list[index][0], box_list[index][1], box_list[index][2], box_list[index][3]-1) 19 | gen_box_order.append((box_list[index][0], box_list[index][1], box_list[index][2])) 20 | if box_list[index][3] == 0: 21 | box_list.pop(index) 22 | if len(box_list) == 0: 23 | break 24 | return gen_box_order, idx 25 | 26 | def evaluate_draw(cargo_list): 27 | start = time.time() 28 | c_index = 0 29 | state = Container(L, W, H) 30 | while True: 31 | cargo = Cargo(cargo_list[c_index][0], cargo_list[c_index][1], cargo_list[c_index][2]) 32 | action = dqn.choose_action(state, cargo) 33 | if(len(action)==3): 34 | c_index += 1 35 | if c_index >= len(cargo_list): 36 | break 37 | else: 38 | continue 39 | else: 40 | cargo.pose = action[1] 41 | cargo.point = action[0] 42 | state.update_state(cargo) 43 | 44 | if c_index == len(cargo_list)-1: 45 | break 46 | c_index += 1 47 | 48 | end = time.time() 49 | last_time = end - start 50 | print("time cost: %f s" % last_time) 51 | occupy = state.occupy_volume() / state.volume 52 | print('final occupy rate: %f'% occupy) 53 | draw_reslut(state) 54 | 55 | cargo_list, idx = random_generate() 56 | kind_num = 3 57 | if idx == 1: 58 | kind_num = 5 59 | elif idx == 2: 60 | kind_num = 8 61 | elif idx == 3: 62 | kind_num = 10 63 | elif idx == 4: 64 | kind_num = 15 65 | 66 | print('%d kinds of cargos' % kind_num) 67 | print('cargo list length: %d' % len(cargo_list)) 68 | print("cargo list:") 69 | print(cargo_list) 70 | evaluate_draw(cargo_list) 71 | 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RF_binbox 2 | DQN深度强化学习解决三维在线装箱问题 3 | 4 | ## 问题描述 5 | 物流公司在流通过程中,需要将打包完毕的箱子装入到一个货车的车厢中,为了提高物流效率,需要将车厢尽量填满,显然,车厢如果能被100%填满是最优的,但通常认为,车厢能够填满85%,可认为装箱是比较优化的。 6 | 设车厢为长方形,其长宽高分别为L,W,H;共有n个箱子,箱子也为长方形,第i个箱子的长宽高为li,wi,hi(n个箱子的体积总和是要远远大于车厢的体积),做以下假设和要求: 7 | 1. 长方形的车厢共有8个角,并设靠近驾驶室并位于下端的一个角的坐标为(0,0,0),车厢共6个面,其中长的4个面,以及靠近驾驶室的面是封闭的,只有一个面是开着的,用于工人搬运箱子; 8 | 2. 需要计算出每个箱子在车厢中的坐标,即每个箱子摆放后,其和车厢坐标为(0,0,0)的角相对应的角在车厢中的坐标,并计算车厢的填充率。 9 | 10 | ## 运行环境 11 | 12 | 主机 |内存 | 显卡 | IDE | Python | torch 13 | -----|------|------|-----|--------|----- 14 | CPU:12th Gen Intel(R) Core (TM) i7-12700H 2.30 GHz | 6GB RAM | NVIDIA GEFORCE RTX 3050 | Pycharm2022.2.1 | python3.8 | 1.13.0 15 | 16 | ## 思路 17 | 18 | (1)箱子到来后,根据车厢的实际空间情况,按照策略选择放置点; 19 | 20 | (2)当摆放箱子时,以6种姿态摆放,并对其进行评估,使用评估值最高的姿态将箱子摆放在选中的角点上; 21 | 22 | (3)重复以上步骤,直到摆放完毕。 23 | 24 | ## 建立模型 25 | 在车厢内部设置坐标系,靠近驾驶室并位于下端的一个角的坐标为(0,0,0),相交于原点的车厢长边、宽边和高边分别为x轴,y轴和z轴方向,L、W、H分别为车厢的长、宽、高。箱子具有六种摆放姿态,分别以箱子的长宽、长高、宽高平面为底,旋转90°可以得到另外三种摆放姿态。 26 | 27 | ## 核心 28 | ### 箱子放置策略 29 | 本算法将角点作为车厢内部空间中箱子的摆放位置,每次放入新箱子后搜索新生成的角点,当向车厢中放入第一个箱子时,假设车厢中只有原点一个角点,当一个箱子放入后,会产生新的角点,再放置箱子后,又会产生新的角点。 30 | 建立箱子可放置点列表,表示箱子i到来时,车厢内部所有可选的摆放位置,在放置新箱子后更新可放置点列表,并记录已放置箱子到车厢顶部距离,用于后续的奖励函数。 31 | 32 | ### DQN 33 | 34 | (1)设置一些超参数,包括ε-greedy使用的ε,折扣因子γ,目标网络更新频率,经验池容量等。 35 | 36 | (2)由于给定的箱子数据较少,为了增加模型训练数据数量,将给定的箱子数据打乱,以随机的形式生成并保存,作为训练数据,训练网络模型。 37 | 38 | (3)奖励函数 39 | 使用x-y平面中两个最大剩余矩形面积(如下图)之和与箱子到车厢顶部的距离作为奖励值R,奖励函数表示如下: 40 | 41 | 42 | ![image](https://github.com/1024-program/RF_binbox/blob/main/images/%E5%9B%BE%E7%89%872.png) 43 | 44 | ![image](https://github.com/1024-program/RF_binbox/blob/main/images/%E5%9B%BE%E7%89%871.png) 45 | 46 | (4)动作-价值函数网络和目标动作-价值函数网络设置为包含6层卷积层的CNN。对当前状态和动作建模,使其能够输入到价值网络Q和Q’中。以车厢的底面为基准,建模L*W的矩阵,每个元素代表该点放置的箱子最大高度。 47 | 48 | (5)动作选择 49 | 根据当前的状态(当前车厢的属性,包括尺寸、放置的所有箱子、H矩阵、可放置点列表等),使用ε-greedy方法选择具有最大Q值的动作或随机选择动作(动作是箱子的放置点和摆放姿态)。 50 | 51 | (6)经验重放 52 | 53 | ## 说明 54 | 将所有文件夹放置在同一目录下,train.py用于模型训练,cnn.pth是已经训练好的模型,在eval.py中导入后直接运行eval.py即可。 55 | 56 | ## 不足 57 | 1、填充率 58 | 59 | 一般认为车厢填充率高于85%,认为装箱算法是较优的,本实验设计的装箱方案填充率较低,在60%-80%间,分析原因可能在于强化学习网络的参数不够合适,算法有待优化。 60 | 改进的方向:调整强化学习网络的参数,选择更加合适的参数。 61 | 62 | 2、运行时间 63 | 64 | 本实验的代码时间消耗较高,难以满足实时性要求。该算法在在放置货物时需要遍历每个可放置点,每个可放置点需要进行碰撞检测,时间复杂度很高,导致代码运行时间较长。 65 | 后续通过改进代码或者更换编程语言,减少时间复杂度以提高运行速度,改进算法,减少遍历箱子的数量,提高运行速度。 66 | -------------------------------------------------------------------------------- /draw.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | from matplotlib.figure import Figure 3 | import numpy as np 4 | from data import * 5 | from container import * 6 | 7 | plt.rcParams['axes.unicode_minus'] = False 8 | plt.rcParams['font.sans-serif'] = ['SimHei'] 9 | fig:Figure = plt.figure() 10 | ax = fig.add_subplot(1, 1, 1, projection='3d') 11 | ax.view_init(elev=20, azim=40) 12 | plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) 13 | 14 | def _plot_opaque_cube(x=10, y=20, z=30, dx=40, dy=50, dz=60): 15 | xx = np.linspace(x, x+dx, 2) 16 | yy = np.linspace(y, y+dy, 2) 17 | zz = np.linspace(z, z+dz, 2) 18 | xx2, yy2 = np.meshgrid(xx, yy) 19 | ax.plot_surface(xx2, yy2, np.full_like(xx2, z)) 20 | ax.plot_surface(xx2, yy2, np.full_like(xx2, z+dz)) 21 | yy2, zz2 = np.meshgrid(yy, zz) 22 | ax.plot_surface(np.full_like(yy2, x), yy2, zz2) 23 | ax.plot_surface(np.full_like(yy2, x+dx), yy2, zz2) 24 | xx2, zz2= np.meshgrid(xx, zz) 25 | ax.plot_surface(xx2, np.full_like(yy2, y), zz2) 26 | ax.plot_surface(xx2, np.full_like(yy2, y+dy), zz2) 27 | 28 | def _plot_linear_cube(x, y, z, dx, dy, dz, color='red'): 29 | xx = [x, x, x+dx, x+dx, x] 30 | yy = [y, y+dy, y+dy, y, y] 31 | kwargs = {'alpha': 1, 'color': color} 32 | ax.plot3D(xx, yy, [z]*5, **kwargs) 33 | ax.plot3D(xx, yy, [z+dz]*5, **kwargs) 34 | ax.plot3D([x, x], [y, y], [z, z+dz], **kwargs) 35 | ax.plot3D([x, x], [y+dy, y+dy], [z, z+dz], **kwargs) 36 | ax.plot3D([x+dx, x+dx], [y+dy, y+dy], [z, z+dz], **kwargs) 37 | ax.plot3D([x+dx, x+dx], [y, y], [z, z+dz], **kwargs) 38 | 39 | def _draw_container(container:Container): 40 | _plot_linear_cube( 41 | 0,0,0, 42 | container._length, 43 | container._width, 44 | container._height 45 | ) 46 | 47 | def _draw_cargo(cargo:Cargo): 48 | _plot_opaque_cube( 49 | cargo.x, cargo.y, cargo.z, 50 | cargo.length, cargo.width, cargo.height 51 | ) 52 | 53 | def draw_reslut(setted_container:Container): 54 | plt.gca().set_box_aspect(( 55 | setted_container._length, 56 | setted_container._width, 57 | setted_container._height 58 | )) 59 | _draw_container(setted_container) 60 | for cargo in setted_container._setted_cargos: 61 | _draw_cargo(cargo) 62 | plt.show() -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | import torch 4 | 5 | L = 587 6 | W = 233 7 | H = 220 8 | 9 | class CargoPose(Enum): 10 | tall_wide = 0 11 | tall_thin = 1 12 | mid_wide = 2 13 | mid_thin = 3 14 | short_wide = 4 15 | short_thin = 5 16 | 17 | 18 | class Point(object): 19 | def __init__(self, x: int, y: int, z: int) -> None: 20 | self.x = x 21 | self.y = y 22 | self.z = z 23 | 24 | def __repr__(self) -> str: 25 | return f"({self.x},{self.y},{self.z})" 26 | 27 | def __eq__(self, _o: object) -> bool: 28 | return self.x == _o.x and self.y == _o.y and self.z == _o.z 29 | 30 | @property 31 | def is_valid(self) -> bool: 32 | return self.x >= 0 and self.y >= 0 and self.z >= 0 33 | 34 | @property 35 | def tuple(self) -> tuple: 36 | return (self.x, self.y, self.z) 37 | 38 | 39 | class Cargo(object): 40 | def __init__(self, length: int, width: int, height: int) -> None: 41 | self._point = Point(-1, -1, -1) 42 | self._shape = {length, width, height} 43 | self._pose = CargoPose.tall_thin 44 | 45 | def __repr__(self) -> str: 46 | return f"{self._point} {self.shape}" 47 | 48 | @property 49 | def pose(self) -> CargoPose: 50 | return self._pose 51 | 52 | @pose.setter 53 | def pose(self, new_pose: CargoPose): 54 | self._pose = new_pose 55 | 56 | @property 57 | def _shape_swiche(self) -> dict: 58 | edges = sorted(self._shape) 59 | return { 60 | CargoPose.tall_thin: (edges[1], edges[0], edges[-1]), 61 | CargoPose.tall_wide: (edges[0], edges[1], edges[-1]), 62 | CargoPose.mid_thin: (edges[-1], edges[0], edges[1]), 63 | CargoPose.mid_wide: (edges[0], edges[-1], edges[1]), 64 | CargoPose.short_thin: (edges[-1], edges[1], edges[0]), 65 | CargoPose.short_wide: (edges[1], edges[-1], edges[0]) 66 | } 67 | 68 | @property 69 | def shape(self) -> tuple: 70 | return self._shape_swiche[self._pose] 71 | 72 | @shape.setter 73 | def shape(self, length, width, height): 74 | self._shape = {length, width, height} 75 | 76 | @property 77 | def length(self) -> int: 78 | return self.shape[0] # 宽、高类似 79 | 80 | @property 81 | def width(self) -> int: 82 | return self.shape[1] # 宽、高类似 83 | 84 | @property 85 | def height(self) -> int: 86 | return self.shape[-1] # 宽、高类似 87 | 88 | @property 89 | def point(self): 90 | return self._point 91 | 92 | @point.setter 93 | def point(self, new_point:Point): 94 | self._point = new_point 95 | 96 | @property 97 | def x(self) -> int: 98 | return self._point.x 99 | 100 | @property 101 | def y(self) -> int: 102 | return self._point.y 103 | 104 | @property 105 | def z(self) -> int: 106 | return self._point.z 107 | 108 | @x.setter 109 | def x(self, new_x: int): 110 | self._point = Point(new_x, self.y, self.z) 111 | # y、z 类似 112 | 113 | @property 114 | def volume(self) -> int: 115 | reslut = 1 116 | for i in self._shape: 117 | reslut *= i 118 | return reslut 119 | 120 | def get_shadow_of(self, planar: str) -> tuple: 121 | if planar in ("xy", "yx"): 122 | x0, y0 = self.x, self.y 123 | x1, y1 = self.x + self.length, self.y + self.width 124 | elif planar in ("xz", "zx"): 125 | x0, y0 = self.x, self.z 126 | x1, y1 = self.x + self.length, self.z + self.height 127 | elif planar in ("yz", "zy"): 128 | x0, y0 = self.y, self.z 129 | x1, y1 = self.y + self.width, self.z + self.height 130 | return (x0, y0, x1, y1) 131 | 132 | def matrix(self): 133 | cargo_matrix = torch.zeros(L, W) 134 | # cargo_matrix[point.x: point.x+cargo.length][point.y, point.y+cargo.width] += cargo.height 135 | for x in range(self.x, self.x + self.length): 136 | for y in range(self.y, self.y + self.width): 137 | cargo_matrix[x][y] += self.height 138 | 139 | return cargo_matrix 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /container.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from copy import deepcopy 3 | from data import * 4 | 5 | def _is_rectangles_overlap(rec1:tuple, rec2:tuple) -> bool: 6 | return not ( 7 | rec1[0] >= rec2[2] or rec1[1] >= rec2[3] or 8 | rec2[0] >= rec1[2] or rec2[1] >= rec1[3] 9 | ) 10 | 11 | def _is_cargos_collide(cargo0: Cargo, cargo1: Cargo) -> bool: 12 | return ( 13 | _is_rectangles_overlap(cargo0.get_shadow_of("xy"), cargo1.get_shadow_of("xy")) and 14 | _is_rectangles_overlap(cargo0.get_shadow_of("yz"), cargo1.get_shadow_of("yz")) and 15 | _is_rectangles_overlap(cargo0.get_shadow_of( 16 | "xz"), cargo1.get_shadow_of("xz")) 17 | ) 18 | 19 | 20 | class Container(object): 21 | def __init__(self, length: int, width: int, height: int) -> None: 22 | self._length = length 23 | self._width = width 24 | self._height = height 25 | self._refresh() 26 | 27 | def __repr__(self) -> str: 28 | return f"{self._length}, {self._width}, {self._height}" 29 | 30 | def _refresh(self): 31 | # self._horizontal_planar = 0 # 水平放置参考面 32 | # self._vertical_planar = 0 # 垂直放置参考面 33 | self._available_points = [Point(0, 0, 0)] # 可放置点有序列表 34 | self._setted_cargos: List[Cargo] = [] 35 | self._H_matrix = torch.zeros(L, W) 36 | 37 | def is_encasable(self, site: Point, cargo: Cargo) -> bool: 38 | encasable = True 39 | temp = deepcopy(cargo) 40 | temp.point = site 41 | if ( 42 | temp.x + temp.length > self._length or 43 | temp.y + temp.width > self._width or 44 | temp.z + temp.height > self._height 45 | ): 46 | encasable = False 47 | for setted_cargo in self._setted_cargos: 48 | if _is_cargos_collide(temp, setted_cargo): 49 | encasable = False 50 | return encasable 51 | 52 | 53 | # 所有可放置的点及生成的转换 54 | def encase(self, cargo: Cargo): 55 | temp = cargo 56 | input = torch.zeros(1, 2, L, W) 57 | points = [] 58 | poses = [] 59 | is_encase = False 60 | for point in self._available_points: 61 | for tmp_pose in CargoPose: 62 | temp.pose = tmp_pose 63 | # 可以放置 64 | if self.is_encasable(point, temp): 65 | # self._setted_cargos.append(cargo) 66 | # cargo.point(point) 67 | is_encase = True 68 | cargo_matrix = torch.zeros(L, W) 69 | # cargo_matrix[point.x: point.x+cargo.length][point.y, point.y+cargo.width] += cargo.height 70 | for x in range(point.x, point.x+cargo.length): 71 | for y in range(point.y, point.y+cargo.width): 72 | cargo_matrix[x][y] += cargo.height 73 | 74 | new_input = torch.stack((self._H_matrix, cargo_matrix), dim=0) 75 | new_input = new_input.unsqueeze(0) 76 | input = torch.cat((input, new_input), dim=0) 77 | points.append(point) 78 | poses.append(tmp_pose) 79 | 80 | if is_encase: 81 | return is_encase, input[1:], points, poses 82 | else: 83 | return is_encase, input, points, poses 84 | 85 | 86 | # 放置新的箱子 87 | def update_state(self, cargo:Cargo): 88 | # update settled cargos 89 | self._setted_cargos.append(cargo) 90 | 91 | # update available points 92 | origin_point = cargo.point 93 | self._available_points.remove(origin_point) 94 | new_point = Point(origin_point.x+cargo.length, origin_point.y, origin_point.z) 95 | # if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0): 96 | self._available_points.append(new_point) 97 | 98 | new_point = Point(origin_point.x, origin_point.y+cargo.width, origin_point.z) 99 | # if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0): 100 | self._available_points.append(new_point) 101 | 102 | new_point = Point(origin_point.x, origin_point.y, origin_point.z+cargo.height) 103 | #if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0): 104 | self._available_points.append(new_point) 105 | 106 | # update H_matrix 107 | for x in range(origin_point.x, origin_point.x + cargo.length): 108 | for y in range(origin_point.y, origin_point.y + cargo.width): 109 | self._H_matrix[x][y] += cargo.height 110 | 111 | 112 | def maximalSquare(self) -> int: 113 | matrix = self._H_matrix 114 | #长度为0,直接返回0 115 | if len(matrix) == 0 or len(matrix[0]) == 0: 116 | return 0 117 | 118 | maxSide0 = 0 119 | rows, columns = len(matrix), len(matrix[0]) 120 | 121 | #新增二个全为0的数组 122 | dp0 = [[0] * columns for _ in range(rows)] 123 | 124 | for i in range(rows): 125 | for j in range(columns): 126 | 127 | # 取0中的最大正方形 128 | if matrix[i][j] == 0: 129 | # 第一行和第一列为0时,新数组值为1 130 | if i == 0 or j == 0: 131 | dp0[i][j] = 1 132 | # 取改值左边、上边、左上中的最小值+1 133 | else: 134 | dp0[i][j] = min(dp0[i - 1][j], dp0[i][j - 1], dp0[i - 1][j - 1]) + 1 135 | maxSide0 = max(maxSide0, dp0[i][j]) 136 | 137 | maxSide = maxSide0 138 | maxSquare = maxSide * maxSide 139 | return maxSquare 140 | 141 | # obtain the reward R=D+w(V1+V2) 142 | def reward(self): 143 | # lines = self._H_matrix.norm(0, dim=0) 144 | # none_zeros = sum(lines) 145 | # zeros = self._H_matrix.size(0) * self._H_matrix.size(1) - none_zeros 146 | # maxSquare = self.maximalSquare() 147 | # V1_2 = zeros.item() + maxSquare 148 | 149 | length = self._length 150 | width = self._width 151 | for l in range(self._length - 1, -1, -1): 152 | if not (torch.equal(self._H_matrix[l], torch.zeros(self._H_matrix[l].shape))): 153 | length = l 154 | break 155 | 156 | for w in range(self._width - 1, -1, -1): 157 | if not (torch.equal(self._H_matrix[:, w], torch.zeros(self._H_matrix[:, w].shape))): 158 | width = w 159 | break 160 | 161 | length = self._length - (length + 1) 162 | width = self._width - (width + 1) 163 | V1_2 = length * width + self._width * length + self._length * width 164 | 165 | maxHeight = self._H_matrix.max() 166 | D = self._height - maxHeight.item() 167 | w = 256 168 | reward = D / H + w * V1_2 / (2 * L * W) 169 | return reward 170 | 171 | @property 172 | def volume(self) -> int: 173 | return self._height * self._length * self._width 174 | 175 | def occupy_volume(self): 176 | v_sum = 0 177 | for cargo in self._setted_cargos: 178 | v_sum += cargo.volume 179 | return v_sum 180 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | import numpy as np 7 | import random 8 | import copy 9 | import time 10 | 11 | # import draw 12 | from container import * 13 | from data import * 14 | # from draw import * 15 | 16 | # 1. Define some Hyper Parameters 17 | EPSILON = 0.9 # epsilon used for epsilon greedy approach 18 | BATCH_SIZE = 16 # batch size of sampling process from buffer 19 | LR = 0.0001 # learning rate 20 | GAMMA = 0.9 # discount factor 21 | TARGET_NETWORK_REPLACE_FREQ = 100 # How frequently target netowrk updates 22 | MEMORY_CAPACITY = 2000 # The capacity of experience replay buffer 23 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 24 | 25 | # 2. Random generate box data 26 | solution = [[(91, 54, 45, 32), (105, 77, 72, 24), (79, 78, 48, 30)], 27 | [(108, 76, 30, 24), (110, 43, 25, 7), (92, 81, 55, 22), (81, 33, 28, 13), (120, 99, 73, 15)], 28 | [(88, 54, 39, 16), (94, 54, 36, 14), (87, 77, 43, 20), (100, 80, 72, 16), (83, 40, 36, 6),(91, 54, 22, 15), (109, 58, 54, 17), (94, 55, 30, 9)], 29 | [(86, 84, 45, 18), (81, 45, 34, 19), (70, 54, 37, 13), (71, 61, 52, 16), (78, 73, 40, 10),(69, 63, 46, 13), (72, 67, 56, 10), (75, 75, 36, 8), (94, 88, 50, 12), (65, 51, 50, 13)], 30 | [(108, 76, 30, 12), (110, 43, 25, 12), (92, 81, 55, 6), (81, 33, 28, 9), (120, 99, 73, 5), (111, 70, 48, 12), (98, 72, 46, 9), (95, 66, 31, 10), (85, 84, 30, 8), (71, 32, 25, 3), (36, 34, 25, 10), (97, 67, 62, 7), (33, 25, 23, 7), (95, 27, 26, 10), (94, 81, 44, 9)]] 31 | 32 | def random_generate(): 33 | idx = np.random.randint(0,len(solution)) 34 | box_list = solution[idx] 35 | gen_box_order = [] 36 | while True: 37 | index = np.random.randint(0, len(box_list)) 38 | box_list[index] = (box_list[index][0], box_list[index][1], box_list[index][2], box_list[index][3]-1) 39 | gen_box_order.append((box_list[index][0], box_list[index][1], box_list[index][2])) 40 | if box_list[index][3] == 0: 41 | box_list.pop(index) 42 | if len(box_list) == 0: 43 | break 44 | return gen_box_order 45 | 46 | 47 | def normalization(data): 48 | _range = np.max(data) - np.min(data) 49 | return (data - np.min(data)) / _range 50 | 51 | 52 | def standardization(data): 53 | mu = np.mean(data) 54 | sigma = np.std(data) 55 | return (data - mu) / sigma 56 | 57 | # 3. Define the network used in both target net and the net for training 58 | class CNN(nn.Module): 59 | def __init__(self): 60 | super(CNN, self).__init__() # 继承__init__功能 61 | ## 第一层卷积 62 | self.conv1 = nn.Sequential( 63 | # 输入[2,587,233] 64 | nn.Conv2d( 65 | in_channels=2, # 输入图片的高度 66 | out_channels=16, # 输出图片的高度 67 | kernel_size=3, # 5x5的卷积核,相当于过滤器 68 | stride=1, # 卷积核在图上滑动,每隔一个扫一次 69 | padding=1, # 给图外边补上0 70 | ), 71 | # 经过卷积层 输出[16,28,28] 传入池化层 72 | nn.ReLU(), 73 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[16,14,14] 传入下一个卷积 74 | ) 75 | ## 第二层卷积 76 | self.conv2 = nn.Sequential( 77 | nn.Conv2d( 78 | in_channels=16, # 同上 79 | out_channels=32, 80 | kernel_size=3, 81 | stride=1, 82 | padding=1 83 | ), 84 | # 经过卷积 输出[32, 14, 14] 传入池化层 85 | nn.ReLU(), 86 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[32,7,7] 传入输出层 87 | ) 88 | ## 第三层卷积 89 | self.conv3 = nn.Sequential( 90 | nn.Conv2d( 91 | in_channels=32, # 同上 92 | out_channels=64, 93 | kernel_size=3, 94 | stride=1, 95 | padding=1 96 | ), 97 | # 经过卷积 输出[32, 14, 14] 传入池化层 98 | nn.ReLU(), 99 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[32,7,7] 传入输出层 100 | ) 101 | ## 第四层卷积 102 | self.conv4 = nn.Sequential( 103 | nn.Conv2d( 104 | in_channels=64, # 同上 105 | out_channels=128, 106 | kernel_size=3, 107 | stride=1, 108 | padding=1 109 | ), 110 | # 经过卷积 输出[32, 14, 14] 传入池化层 111 | nn.ReLU(), 112 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[32,7,7] 传入输出层 113 | ) 114 | ## 第五层卷积 115 | self.conv5 = nn.Sequential( 116 | nn.Conv2d( 117 | in_channels=128, # 同上 118 | out_channels=256, 119 | kernel_size=3, 120 | stride=1, 121 | padding=1 122 | ), 123 | # 经过卷积 输出[32, 14, 14] 传入池化层 124 | nn.ReLU(), 125 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[32,7,7] 传入输出层 126 | ) 127 | ## 第六层卷积 128 | self.conv6 = nn.Sequential( 129 | nn.Conv2d( 130 | in_channels=256, # 同上 131 | out_channels=512, 132 | kernel_size=3, 133 | stride=1, 134 | padding=1 135 | ), 136 | # 经过卷积 输出[32, 14, 14] 传入池化层 137 | nn.ReLU(), 138 | nn.MaxPool2d(kernel_size=2) # 经过池化 输出[32,7,7] 传入输出层 139 | ) 140 | ## 输出层 141 | self.output = nn.Linear(in_features=512*9*3, out_features=1) 142 | 143 | def forward(self, x): 144 | x = self.conv1(x) 145 | x = self.conv2(x) # [batch, 32,7,7] 146 | x = self.conv3(x) # [batch, 32,7,7] 147 | x = self.conv4(x) # [batch, 32,7,7] 148 | x = self.conv5(x) # [batch, 32,7,7] 149 | x = self.conv6(x) # [batch, 32,7,7] 150 | x = x.view(x.size(0), -1) # 保留batch, 将后面的乘到一起 [batch, 32*7*7] 151 | output = self.output(x) # 输出[50,10] 152 | return output 153 | 154 | 155 | class DQN(object): 156 | def __init__(self): 157 | # -----------Define 2 networks (target and training)------# 158 | self.eval_net, self.target_net = CNN(), CNN() 159 | # Define counter, memory size and loss function 160 | self.learn_step_counter = 0 # count the steps of learning process 161 | self.memory: List = [None] * MEMORY_CAPACITY 162 | self.memory_counter = 0 # counter used for experience replay buffer 163 | # ------- Define the optimizer------# 164 | self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) 165 | 166 | # ------Define the loss function-----# 167 | self.loss_func = nn.MSELoss() 168 | 169 | def choose_action(self, state:Container, cargo): 170 | # 可行点取最大的 171 | is_encase, inputs, points, poses = state.encase(cargo) 172 | 173 | # torch.set_printoptions(profile="full") 174 | # print(inputs) 175 | if is_encase == False: 176 | return is_encase, is_encase, is_encase 177 | 178 | if np.random.uniform() < EPSILON: # greedy 179 | data = inputs.data.cpu().numpy() 180 | data = normalization(data) 181 | data = standardization(data) 182 | inputs = torch.tensor(data) 183 | with torch.no_grad(): 184 | actions_value = self.target_net.forward(inputs) 185 | action = torch.max(actions_value, 0)[1].data.numpy() 186 | action = action[0] 187 | point = points[action] 188 | pose = poses[action] 189 | else: 190 | action = np.random.randint(0, high=len(points)) 191 | point = points[action] 192 | pose = poses[action] 193 | return point, pose 194 | 195 | def store_transition(self, s:torch.Tensor, a:Cargo, r, s_:Container, a_:Cargo): 196 | 197 | transition = [s, a.matrix(), r, s_, a_] 198 | # if the capacity is full, then use index to replace the old memory with new one 199 | index = self.memory_counter % MEMORY_CAPACITY 200 | self.memory[index] = transition 201 | self.memory_counter += 1 202 | 203 | def learn(self): 204 | 205 | # update the target network every fixed steps 206 | if self.learn_step_counter % TARGET_NETWORK_REPLACE_FREQ == 0: 207 | # Assign the parameters of eval_net to target_net 208 | self.target_net.load_state_dict(self.eval_net.state_dict()) 209 | self.learn_step_counter += 1 210 | 211 | # Determine the index of Sampled batch from buffer 212 | sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE) # randomly select some data from buffer 213 | 214 | b_s_a_list = torch.zeros(1, 2, L, W) 215 | q_next_max = [] 216 | b_r = [] 217 | for index in sample_index: 218 | b_memory = self.memory[index] 219 | b_s = b_memory[0] 220 | b_a = b_memory[1] 221 | b_r.append(b_memory[2]) 222 | b_s_a = torch.stack((b_s, b_a), dim=0) 223 | b_s_a = b_s_a.unsqueeze(0) 224 | b_s_a_list = torch.cat((b_s_a_list, b_s_a), dim=0) 225 | state_next = b_memory[-2] 226 | cargo_next = b_memory[-1] 227 | 228 | # calculate the q value of next state 229 | is_encasable, inputs, points, poses = state_next.encase(cargo_next) 230 | if is_encasable: 231 | data = inputs.data.cpu().numpy() 232 | data = normalization(data) 233 | data = standardization(data) 234 | inputs = torch.tensor(data) 235 | actions_value = self.target_net.forward(inputs) 236 | q_next = torch.max(actions_value, 0)[0] 237 | q_next_max.append(q_next) 238 | else: 239 | q_next = torch.zeros(size=[1]) 240 | q_next_max.append(q_next) 241 | 242 | b_s_a = b_s_a_list[1:] 243 | data = b_s_a.data.cpu().numpy() 244 | data = normalization(data) 245 | data = standardization(data) 246 | b_s_a = torch.tensor(data).to(device) 247 | 248 | 249 | self.eval_net.to(device) 250 | q_eval = self.eval_net(b_s_a) 251 | 252 | # q_next = torch.stack(q_next_max,0) 253 | # b_r = torch.stack(b_r, 0).unsqueeze(1) 254 | 255 | q_next = torch.tensor(q_next_max).unsqueeze(1) 256 | b_r = torch.tensor(b_r).unsqueeze(1) 257 | 258 | q_target = (b_r + q_next * GAMMA).to(device) 259 | q_target.detach() 260 | 261 | self.loss_func.to(device) 262 | 263 | loss = self.loss_func(q_eval, q_target) 264 | print('loss: %f iters:%d cargo_id: %d' %(loss.item(), i_episode, c_i)) 265 | 266 | self.optimizer.zero_grad() # reset the gradient to zero 267 | loss.backward() 268 | self.optimizer.step() # execute back propagation for one step 269 | 270 | # for parameters in self.eval_net.parameters(): 271 | # print(parameters) 272 | 273 | q_eval_again = self.eval_net(b_s_a) 274 | loss_again = self.loss_func(q_eval_again, q_target) 275 | print('loss_again: %f iters:%d cargo_id: %d' % (loss_again.item(), i_episode, c_i)) 276 | 277 | 278 | def evaluate(): 279 | start = time.time() 280 | c_index = 0 281 | state = Container(L, W, H) 282 | while True: 283 | cargo = Cargo(cargo_list[c_index][0], cargo_list[c_index][1], cargo_list[c_index][2]) 284 | action = dqn.choose_action(state, cargo) 285 | if(len(action)==3): 286 | c_index += 1 287 | if c_index >= len(cargo_list): 288 | break 289 | else: 290 | continue 291 | else: 292 | cargo.pose = action[1] 293 | cargo.point = action[0] 294 | state.update_state(cargo) 295 | 296 | if c_index == len(cargo_list)-1: 297 | break 298 | c_index += 1 299 | 300 | end = time.time() 301 | last_time = end - start 302 | print("time cost: %f s" % last_time) 303 | occupy = state.occupy_volume() / state.volume 304 | return occupy 305 | 306 | 307 | if __name__ == '__main__': 308 | 309 | ''' 310 | --------------Procedures of DQN Algorithm------------------ 311 | ''' 312 | # create the object of DQN class 313 | dqn = DQN() 314 | 315 | # Start training 316 | print("\nCollecting experience...") 317 | 318 | cargo_list = random_generate() 319 | print('cargo list length: %d' % len(cargo_list)) 320 | 321 | for i_episode in range(500): 322 | # refresh 323 | 324 | state = Container(L, W, H) 325 | c_i = 0 326 | ep_r = 0 327 | done = False 328 | while True: 329 | # print('episode: %d box id: %d' % (i_episode, c_i)) 330 | # take action based on the current state 331 | cargo = Cargo(cargo_list[c_i][0], cargo_list[c_i][1], cargo_list[c_i][2]) 332 | 333 | # take action based on the current state 334 | action = dqn.choose_action(state, cargo) 335 | 336 | if len(action) == 3: 337 | done = True 338 | else: 339 | pose = action[1] 340 | point = action[0] 341 | 342 | if done: 343 | c_i += 1 344 | if c_i >= len(cargo_list): 345 | break 346 | else: 347 | continue 348 | 349 | pre_s = copy.deepcopy(state._H_matrix) 350 | # obtain the reward and next state and some other information 351 | cargo.pose = pose 352 | cargo.point = point 353 | state.update_state(cargo) 354 | # obtain the reward 355 | r = state.reward() 356 | 357 | # store the transitions of states 358 | cargo_next = Cargo(cargo_list[c_i+1][0], cargo_list[c_i+1][1], cargo_list[c_i+1][2]) 359 | dqn.store_transition(pre_s, cargo, r, state, cargo_next) 360 | # memory = dqn.memory 361 | # print("store finished") 362 | 363 | ep_r += r 364 | # if the experience repaly buffer is filled, DQN begins to learn or update 365 | # its parameters. 366 | if dqn.memory_counter > MEMORY_CAPACITY: 367 | dqn.learn() 368 | if done: 369 | print('Ep: ', i_episode, ' |', 'Ep_r: ', round(ep_r, 2)) 370 | 371 | if c_i+1 == len(cargo_list)-1: 372 | break 373 | c_i += 1 374 | 375 | occupy_rate = evaluate() 376 | print('occupy_rate: %f' %occupy_rate) 377 | 378 | torch.save(dqn.target_net.state_dict(), 'cnn.pth') 379 | 380 | 381 | 382 | 383 | 384 | --------------------------------------------------------------------------------