├── images
    ├── fig1
    ├── 图片1.png
    └── 图片2.png
├── cnn.pth
├── eval.py
├── README.md
├── draw.py
├── data.py
├── container.py
└── train.py


/images/fig1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/cnn.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/cnn.pth


--------------------------------------------------------------------------------
/images/图片1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/images/图片1.png


--------------------------------------------------------------------------------
/images/图片2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/1024-program/RF_binbox/HEAD/images/图片2.png


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from train import DQN, solution
 3 | import time
 4 | from draw import *
 5 | 
 6 | # create the object of DQN class
 7 | dqn = DQN()
 8 | 
 9 | bn_state_dict = torch.load('cnn.pth')
10 | dqn.target_net.load_state_dict(bn_state_dict)
11 | 
12 | def random_generate():
13 |     idx = np.random.randint(0,len(solution))
14 |     box_list = solution[idx]
15 |     gen_box_order = []
16 |     while True:
17 |         index = np.random.randint(0, len(box_list))
18 |         box_list[index] = (box_list[index][0], box_list[index][1], box_list[index][2], box_list[index][3]-1)
19 |         gen_box_order.append((box_list[index][0], box_list[index][1], box_list[index][2]))
20 |         if box_list[index][3] == 0:
21 |             box_list.pop(index)
22 |         if len(box_list) == 0:
23 |             break
24 |     return gen_box_order, idx
25 | 
26 | def evaluate_draw(cargo_list):
27 |     start = time.time()
28 |     c_index = 0
29 |     state = Container(L, W, H)
30 |     while True:
31 |         cargo = Cargo(cargo_list[c_index][0], cargo_list[c_index][1], cargo_list[c_index][2])
32 |         action = dqn.choose_action(state, cargo)
33 |         if(len(action)==3):
34 |             c_index += 1
35 |             if c_index >= len(cargo_list):
36 |                 break
37 |             else:
38 |                 continue
39 |         else:
40 |             cargo.pose = action[1]
41 |             cargo.point = action[0]
42 |             state.update_state(cargo)
43 | 
44 |         if c_index == len(cargo_list)-1:
45 |             break
46 |         c_index += 1
47 | 
48 |     end = time.time()
49 |     last_time = end - start
50 |     print("time cost: %f s" % last_time)
51 |     occupy = state.occupy_volume() / state.volume
52 |     print('final occupy rate: %f'% occupy)
53 |     draw_reslut(state)
54 | 
55 | cargo_list, idx = random_generate()
56 | kind_num = 3
57 | if idx == 1:
58 |     kind_num = 5
59 | elif idx == 2:
60 |     kind_num = 8
61 | elif idx == 3:
62 |     kind_num = 10
63 | elif idx == 4:
64 |     kind_num = 15
65 | 
66 | print('%d kinds of cargos' % kind_num)
67 | print('cargo list length: %d' % len(cargo_list))
68 | print("cargo list:")
69 | print(cargo_list)
70 | evaluate_draw(cargo_list)
71 | 
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RF_binbox
 2 | DQN深度强化学习解决三维在线装箱问题
 3 | 
 4 | ## 问题描述
 5 | 物流公司在流通过程中，需要将打包完毕的箱子装入到一个货车的车厢中，为了提高物流效率，需要将车厢尽量填满，显然，车厢如果能被100%填满是最优的，但通常认为，车厢能够填满85%，可认为装箱是比较优化的。
 6 | 设车厢为长方形，其长宽高分别为L，W，H；共有n个箱子，箱子也为长方形，第i个箱子的长宽高为li，wi，hi（n个箱子的体积总和是要远远大于车厢的体积），做以下假设和要求：
 7 | 1. 长方形的车厢共有8个角，并设靠近驾驶室并位于下端的一个角的坐标为（0,0,0），车厢共6个面，其中长的4个面，以及靠近驾驶室的面是封闭的，只有一个面是开着的，用于工人搬运箱子；
 8 | 2. 需要计算出每个箱子在车厢中的坐标，即每个箱子摆放后，其和车厢坐标为（0,0,0）的角相对应的角在车厢中的坐标，并计算车厢的填充率。
 9 | 
10 | ## 运行环境
11 | 
12 |  主机 |内存 | 显卡 | IDE | Python | torch 
13 | -----|------|------|-----|--------|-----
14 | CPU：12th Gen Intel(R) Core (TM) i7-12700H  2.30 GHz | 6GB RAM | NVIDIA GEFORCE RTX 3050 | Pycharm2022.2.1 | python3.8 | 1.13.0
15 | 
16 | ## 思路
17 | 
18 | （1）箱子到来后，根据车厢的实际空间情况，按照策略选择放置点；
19 | 
20 | （2）当摆放箱子时，以6种姿态摆放，并对其进行评估，使用评估值最高的姿态将箱子摆放在选中的角点上；
21 | 
22 | （3）重复以上步骤，直到摆放完毕。
23 | 
24 | ## 建立模型
25 | 在车厢内部设置坐标系，靠近驾驶室并位于下端的一个角的坐标为（0,0,0），相交于原点的车厢长边、宽边和高边分别为x轴，y轴和z轴方向，L、W、H分别为车厢的长、宽、高。箱子具有六种摆放姿态，分别以箱子的长宽、长高、宽高平面为底，旋转90°可以得到另外三种摆放姿态。
26 | 
27 | ## 核心
28 | ### 箱子放置策略
29 | 本算法将角点作为车厢内部空间中箱子的摆放位置，每次放入新箱子后搜索新生成的角点，当向车厢中放入第一个箱子时，假设车厢中只有原点一个角点，当一个箱子放入后，会产生新的角点，再放置箱子后，又会产生新的角点。
30 | 建立箱子可放置点列表，表示箱子i到来时，车厢内部所有可选的摆放位置，在放置新箱子后更新可放置点列表，并记录已放置箱子到车厢顶部距离，用于后续的奖励函数。
31 | 
32 | ### DQN
33 | 
34 | （1）设置一些超参数，包括ε-greedy使用的ε，折扣因子γ，目标网络更新频率，经验池容量等。
35 | 
36 | （2）由于给定的箱子数据较少，为了增加模型训练数据数量，将给定的箱子数据打乱，以随机的形式生成并保存，作为训练数据，训练网络模型。
37 | 
38 | （3）奖励函数
39 | 使用x-y平面中两个最大剩余矩形面积（如下图）之和与箱子到车厢顶部的距离作为奖励值R，奖励函数表示如下：
40 | 
41 | 
42 | ![image](https://github.com/1024-program/RF_binbox/blob/main/images/%E5%9B%BE%E7%89%872.png)
43 | 
44 | ![image](https://github.com/1024-program/RF_binbox/blob/main/images/%E5%9B%BE%E7%89%871.png)
45 | 
46 | （4）动作-价值函数网络和目标动作-价值函数网络设置为包含6层卷积层的CNN。对当前状态和动作建模，使其能够输入到价值网络Q和Q’中。以车厢的底面为基准，建模L*W的矩阵，每个元素代表该点放置的箱子最大高度。
47 | 
48 | （5）动作选择
49 | 根据当前的状态（当前车厢的属性，包括尺寸、放置的所有箱子、H矩阵、可放置点列表等），使用ε-greedy方法选择具有最大Q值的动作或随机选择动作（动作是箱子的放置点和摆放姿态）。
50 | 
51 | （6）经验重放
52 | 
53 | ## 说明
54 | 将所有文件夹放置在同一目录下，train.py用于模型训练，cnn.pth是已经训练好的模型，在eval.py中导入后直接运行eval.py即可。
55 | 
56 | ## 不足
57 | 1、填充率
58 | 
59 | 一般认为车厢填充率高于85%，认为装箱算法是较优的，本实验设计的装箱方案填充率较低，在60%-80%间，分析原因可能在于强化学习网络的参数不够合适，算法有待优化。
60 | 改进的方向：调整强化学习网络的参数，选择更加合适的参数。
61 | 
62 | 2、运行时间
63 | 
64 | 本实验的代码时间消耗较高，难以满足实时性要求。该算法在在放置货物时需要遍历每个可放置点，每个可放置点需要进行碰撞检测，时间复杂度很高，导致代码运行时间较长。
65 | 后续通过改进代码或者更换编程语言，减少时间复杂度以提高运行速度，改进算法，减少遍历箱子的数量，提高运行速度。
66 | 


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | import numpy as np
 4 | from data import *
 5 | from container import *
 6 | 
 7 | plt.rcParams['axes.unicode_minus'] = False
 8 | plt.rcParams['font.sans-serif'] = ['SimHei']
 9 | fig:Figure = plt.figure()
10 | ax = fig.add_subplot(1, 1, 1, projection='3d')
11 | ax.view_init(elev=20, azim=40)
12 | plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
13 | 
14 | def _plot_opaque_cube(x=10, y=20, z=30, dx=40, dy=50, dz=60):
15 |     xx = np.linspace(x, x+dx, 2)
16 |     yy = np.linspace(y, y+dy, 2)
17 |     zz = np.linspace(z, z+dz, 2)
18 |     xx2, yy2 = np.meshgrid(xx, yy)
19 |     ax.plot_surface(xx2, yy2, np.full_like(xx2, z))
20 |     ax.plot_surface(xx2, yy2, np.full_like(xx2, z+dz))
21 |     yy2, zz2 = np.meshgrid(yy, zz)
22 |     ax.plot_surface(np.full_like(yy2, x), yy2, zz2)
23 |     ax.plot_surface(np.full_like(yy2, x+dx), yy2, zz2)
24 |     xx2, zz2= np.meshgrid(xx, zz)
25 |     ax.plot_surface(xx2, np.full_like(yy2, y), zz2)
26 |     ax.plot_surface(xx2, np.full_like(yy2, y+dy), zz2)
27 | 
28 | def _plot_linear_cube(x, y, z, dx, dy, dz, color='red'):
29 |     xx = [x, x, x+dx, x+dx, x]
30 |     yy = [y, y+dy, y+dy, y, y]
31 |     kwargs = {'alpha': 1, 'color': color}
32 |     ax.plot3D(xx, yy, [z]*5, **kwargs)
33 |     ax.plot3D(xx, yy, [z+dz]*5, **kwargs)
34 |     ax.plot3D([x, x], [y, y], [z, z+dz], **kwargs)
35 |     ax.plot3D([x, x], [y+dy, y+dy], [z, z+dz], **kwargs)
36 |     ax.plot3D([x+dx, x+dx], [y+dy, y+dy], [z, z+dz], **kwargs)
37 |     ax.plot3D([x+dx, x+dx], [y, y], [z, z+dz], **kwargs)
38 | 
39 | def _draw_container(container:Container):
40 |     _plot_linear_cube(
41 |         0,0,0,
42 |         container._length,
43 |         container._width,
44 |         container._height
45 |     )
46 | 
47 | def _draw_cargo(cargo:Cargo):
48 |     _plot_opaque_cube(
49 |         cargo.x, cargo.y, cargo.z,
50 |         cargo.length, cargo.width, cargo.height
51 |     )
52 | 
53 | def draw_reslut(setted_container:Container):
54 |     plt.gca().set_box_aspect((
55 |         setted_container._length,
56 |         setted_container._width,
57 |         setted_container._height
58 |     ))
59 |     _draw_container(setted_container)
60 |     for cargo in setted_container._setted_cargos:
61 |         _draw_cargo(cargo)
62 |     plt.show()


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | 
  3 | import torch
  4 | 
  5 | L = 587
  6 | W = 233
  7 | H = 220
  8 | 
  9 | class CargoPose(Enum):
 10 |     tall_wide  = 0
 11 |     tall_thin  = 1
 12 |     mid_wide   = 2
 13 |     mid_thin   = 3
 14 |     short_wide = 4
 15 |     short_thin = 5
 16 | 
 17 | 
 18 | class Point(object):
 19 |     def __init__(self, x: int, y: int, z: int) -> None:
 20 |         self.x = x
 21 |         self.y = y
 22 |         self.z = z
 23 | 
 24 |     def __repr__(self) -> str:
 25 |         return f"({self.x},{self.y},{self.z})"
 26 | 
 27 |     def __eq__(self, _o: object) -> bool:
 28 |         return self.x == _o.x and self.y == _o.y and self.z == _o.z
 29 | 
 30 |     @property
 31 |     def is_valid(self) -> bool:
 32 |         return self.x >= 0 and self.y >= 0 and self.z >= 0
 33 | 
 34 |     @property
 35 |     def tuple(self) -> tuple:
 36 |         return (self.x, self.y, self.z)
 37 | 
 38 | 
 39 | class Cargo(object):
 40 |     def __init__(self, length: int, width: int, height: int) -> None:
 41 |         self._point = Point(-1, -1, -1)
 42 |         self._shape = {length, width, height}
 43 |         self._pose = CargoPose.tall_thin
 44 | 
 45 |     def __repr__(self) -> str:
 46 |         return f"{self._point} {self.shape}"
 47 | 
 48 |     @property
 49 |     def pose(self) -> CargoPose:
 50 |         return self._pose
 51 | 
 52 |     @pose.setter
 53 |     def pose(self, new_pose: CargoPose):
 54 |         self._pose = new_pose
 55 | 
 56 |     @property
 57 |     def _shape_swiche(self) -> dict:
 58 |         edges = sorted(self._shape)
 59 |         return {
 60 |             CargoPose.tall_thin: (edges[1], edges[0], edges[-1]),
 61 |             CargoPose.tall_wide: (edges[0], edges[1], edges[-1]),
 62 |             CargoPose.mid_thin: (edges[-1], edges[0], edges[1]),
 63 |             CargoPose.mid_wide: (edges[0], edges[-1], edges[1]),
 64 |             CargoPose.short_thin: (edges[-1], edges[1], edges[0]),
 65 |             CargoPose.short_wide: (edges[1], edges[-1], edges[0])
 66 |         }
 67 | 
 68 |     @property
 69 |     def shape(self) -> tuple:
 70 |         return self._shape_swiche[self._pose]
 71 | 
 72 |     @shape.setter
 73 |     def shape(self, length, width, height):
 74 |         self._shape = {length, width, height}
 75 | 
 76 |     @property
 77 |     def length(self) -> int:
 78 |         return self.shape[0] # 宽、高类似
 79 | 
 80 |     @property
 81 |     def width(self) -> int:
 82 |         return self.shape[1]  # 宽、高类似
 83 | 
 84 |     @property
 85 |     def height(self) -> int:
 86 |         return self.shape[-1]  # 宽、高类似
 87 | 
 88 |     @property
 89 |     def point(self):
 90 |         return self._point
 91 | 
 92 |     @point.setter
 93 |     def point(self, new_point:Point):
 94 |         self._point = new_point
 95 | 
 96 |     @property
 97 |     def x(self) -> int:
 98 |         return self._point.x
 99 | 
100 |     @property
101 |     def y(self) -> int:
102 |         return self._point.y
103 | 
104 |     @property
105 |     def z(self) -> int:
106 |         return self._point.z
107 | 
108 |     @x.setter
109 |     def x(self, new_x: int):
110 |         self._point = Point(new_x, self.y, self.z)
111 | # y、z 类似
112 | 
113 |     @property
114 |     def volume(self) -> int:
115 |         reslut = 1
116 |         for i in self._shape:
117 |             reslut *= i
118 |         return reslut
119 | 
120 |     def get_shadow_of(self, planar: str) -> tuple:
121 |         if planar in ("xy", "yx"):
122 |             x0, y0 = self.x, self.y
123 |             x1, y1 = self.x + self.length, self.y + self.width
124 |         elif planar in ("xz", "zx"):
125 |             x0, y0 = self.x, self.z
126 |             x1, y1 = self.x + self.length, self.z + self.height
127 |         elif planar in ("yz", "zy"):
128 |             x0, y0 = self.y, self.z
129 |             x1, y1 = self.y + self.width, self.z + self.height
130 |         return (x0, y0, x1, y1)
131 | 
132 |     def matrix(self):
133 |         cargo_matrix = torch.zeros(L, W)
134 |         # cargo_matrix[point.x: point.x+cargo.length][point.y, point.y+cargo.width] += cargo.height
135 |         for x in range(self.x, self.x + self.length):
136 |             for y in range(self.y, self.y + self.width):
137 |                 cargo_matrix[x][y] += self.height
138 | 
139 |         return cargo_matrix
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/container.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | from copy import deepcopy
  3 | from data import *
  4 | 
  5 | def _is_rectangles_overlap(rec1:tuple, rec2:tuple) -> bool:
  6 |     return not (
  7 |         rec1[0] >= rec2[2] or rec1[1] >= rec2[3] or
  8 |         rec2[0] >= rec1[2] or rec2[1] >= rec1[3]
  9 |     )
 10 | 
 11 | def _is_cargos_collide(cargo0: Cargo, cargo1: Cargo) -> bool:
 12 |     return (
 13 |         _is_rectangles_overlap(cargo0.get_shadow_of("xy"), cargo1.get_shadow_of("xy")) and
 14 |         _is_rectangles_overlap(cargo0.get_shadow_of("yz"), cargo1.get_shadow_of("yz")) and
 15 |         _is_rectangles_overlap(cargo0.get_shadow_of(
 16 |             "xz"), cargo1.get_shadow_of("xz"))
 17 |     )
 18 | 
 19 | 
 20 | class Container(object):
 21 |     def __init__(self, length: int, width: int, height: int) -> None:
 22 |         self._length = length
 23 |         self._width = width
 24 |         self._height = height
 25 |         self._refresh()
 26 | 
 27 |     def __repr__(self) -> str:
 28 |         return f"{self._length}, {self._width}, {self._height}"
 29 | 
 30 |     def _refresh(self):
 31 |         # self._horizontal_planar = 0  # 水平放置参考面
 32 |         # self._vertical_planar = 0  # 垂直放置参考面
 33 |         self._available_points = [Point(0, 0, 0)]  # 可放置点有序列表
 34 |         self._setted_cargos: List[Cargo] = []
 35 |         self._H_matrix = torch.zeros(L, W)
 36 | 
 37 |     def is_encasable(self, site: Point, cargo: Cargo) -> bool:
 38 |         encasable = True
 39 |         temp = deepcopy(cargo)
 40 |         temp.point = site
 41 |         if (
 42 |             temp.x + temp.length > self._length or
 43 |             temp.y + temp.width > self._width or
 44 |             temp.z + temp.height > self._height
 45 |         ):
 46 |             encasable = False
 47 |         for setted_cargo in self._setted_cargos:
 48 |             if _is_cargos_collide(temp, setted_cargo):
 49 |                 encasable = False
 50 |         return encasable
 51 | 
 52 | 
 53 |     # 所有可放置的点及生成的转换
 54 |     def encase(self, cargo: Cargo):
 55 |         temp = cargo
 56 |         input = torch.zeros(1, 2, L, W)
 57 |         points = []
 58 |         poses = []
 59 |         is_encase = False
 60 |         for point in self._available_points:
 61 |             for tmp_pose in CargoPose:
 62 |                 temp.pose = tmp_pose
 63 |                 # 可以放置
 64 |                 if self.is_encasable(point, temp):
 65 |                     # self._setted_cargos.append(cargo)
 66 |                     # cargo.point(point)
 67 |                     is_encase = True
 68 |                     cargo_matrix = torch.zeros(L, W)
 69 |                     # cargo_matrix[point.x: point.x+cargo.length][point.y, point.y+cargo.width] += cargo.height
 70 |                     for x in range(point.x, point.x+cargo.length):
 71 |                         for y in range(point.y, point.y+cargo.width):
 72 |                             cargo_matrix[x][y] += cargo.height
 73 | 
 74 |                     new_input = torch.stack((self._H_matrix, cargo_matrix), dim=0)
 75 |                     new_input = new_input.unsqueeze(0)
 76 |                     input = torch.cat((input, new_input), dim=0)
 77 |                     points.append(point)
 78 |                     poses.append(tmp_pose)
 79 | 
 80 |         if is_encase:
 81 |             return is_encase, input[1:], points, poses
 82 |         else:
 83 |             return is_encase, input, points, poses
 84 | 
 85 | 
 86 |     # 放置新的箱子
 87 |     def update_state(self, cargo:Cargo):
 88 |         # update settled cargos
 89 |         self._setted_cargos.append(cargo)
 90 | 
 91 |         # update available points
 92 |         origin_point = cargo.point
 93 |         self._available_points.remove(origin_point)
 94 |         new_point = Point(origin_point.x+cargo.length, origin_point.y, origin_point.z)
 95 |         # if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0):
 96 |         self._available_points.append(new_point)
 97 | 
 98 |         new_point = Point(origin_point.x, origin_point.y+cargo.width, origin_point.z)
 99 |         # if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0):
100 |         self._available_points.append(new_point)
101 | 
102 |         new_point = Point(origin_point.x, origin_point.y, origin_point.z+cargo.height)
103 |         #if not (new_point.z > 0 and self._H_matrix[new_point.x][new_point.y] == 0):
104 |         self._available_points.append(new_point)
105 | 
106 |         # update H_matrix
107 |         for x in range(origin_point.x, origin_point.x + cargo.length):
108 |             for y in range(origin_point.y, origin_point.y + cargo.width):
109 |                 self._H_matrix[x][y] += cargo.height
110 | 
111 | 
112 |     def maximalSquare(self) -> int:
113 |         matrix = self._H_matrix
114 |         #长度为0，直接返回0
115 |         if len(matrix) == 0 or len(matrix[0]) == 0:
116 |             return 0
117 | 
118 |         maxSide0 = 0
119 |         rows, columns = len(matrix), len(matrix[0])
120 | 
121 |         #新增二个全为0的数组
122 |         dp0 = [[0] * columns for _ in range(rows)]
123 | 
124 |         for i in range(rows):
125 |             for j in range(columns):
126 | 
127 |                 # 取0中的最大正方形
128 |                 if matrix[i][j] == 0:
129 |                     # 第一行和第一列为0时，新数组值为1
130 |                     if i == 0 or j == 0:
131 |                         dp0[i][j] = 1
132 |                     # 取改值左边、上边、左上中的最小值+1
133 |                     else:
134 |                         dp0[i][j] = min(dp0[i - 1][j], dp0[i][j - 1], dp0[i - 1][j - 1]) + 1
135 |                     maxSide0 = max(maxSide0, dp0[i][j])
136 | 
137 |         maxSide = maxSide0
138 |         maxSquare = maxSide * maxSide
139 |         return maxSquare
140 | 
141 |     # obtain the reward R=D+w(V1+V2)
142 |     def reward(self):
143 |         # lines = self._H_matrix.norm(0, dim=0)
144 |         # none_zeros = sum(lines)
145 |         # zeros = self._H_matrix.size(0) * self._H_matrix.size(1) - none_zeros
146 |         # maxSquare = self.maximalSquare()
147 |         # V1_2 = zeros.item() + maxSquare
148 | 
149 |         length = self._length
150 |         width = self._width
151 |         for l in range(self._length - 1, -1, -1):
152 |             if not (torch.equal(self._H_matrix[l], torch.zeros(self._H_matrix[l].shape))):
153 |                 length = l
154 |                 break
155 | 
156 |         for w in range(self._width - 1, -1, -1):
157 |             if not (torch.equal(self._H_matrix[:, w], torch.zeros(self._H_matrix[:, w].shape))):
158 |                 width = w
159 |                 break
160 | 
161 |         length = self._length - (length + 1)
162 |         width = self._width - (width + 1)
163 |         V1_2 = length * width + self._width * length + self._length * width
164 | 
165 |         maxHeight = self._H_matrix.max()
166 |         D = self._height - maxHeight.item()
167 |         w = 256
168 |         reward = D / H + w * V1_2 / (2 * L * W)
169 |         return reward
170 | 
171 |     @property
172 |     def volume(self) -> int:
173 |         return self._height * self._length * self._width
174 | 
175 |     def occupy_volume(self):
176 |         v_sum = 0
177 |         for cargo in self._setted_cargos:
178 |             v_sum += cargo.volume
179 |         return v_sum
180 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import random
  8 | import copy
  9 | import time
 10 | 
 11 | # import draw
 12 | from container import *
 13 | from data import *
 14 | # from draw import *
 15 | 
 16 | # 1. Define some Hyper Parameters
 17 | EPSILON = 0.9  # epsilon used for epsilon greedy approach
 18 | BATCH_SIZE = 16  # batch size of sampling process from buffer
 19 | LR = 0.0001  # learning rate
 20 | GAMMA = 0.9  # discount factor
 21 | TARGET_NETWORK_REPLACE_FREQ = 100  # How frequently target netowrk updates
 22 | MEMORY_CAPACITY = 2000  # The capacity of experience replay buffer
 23 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 24 | 
 25 | # 2. Random generate box data
 26 | solution = [[(91, 54, 45, 32), (105, 77, 72, 24), (79, 78, 48, 30)],
 27 | [(108, 76, 30, 24), (110, 43, 25, 7), (92, 81, 55, 22), (81, 33, 28, 13), (120, 99, 73, 15)],
 28 | [(88, 54, 39, 16), (94, 54, 36, 14), (87, 77, 43, 20), (100, 80, 72, 16), (83, 40, 36, 6),(91, 54, 22, 15), (109, 58, 54, 17), (94, 55, 30, 9)],
 29 | [(86, 84, 45, 18), (81, 45, 34, 19), (70, 54, 37, 13), (71, 61, 52, 16), (78, 73, 40, 10),(69, 63, 46, 13), (72, 67, 56, 10), (75, 75, 36, 8), (94, 88, 50, 12), (65, 51, 50, 13)],
 30 | [(108, 76, 30, 12), (110, 43, 25, 12), (92, 81, 55, 6), (81, 33, 28, 9), (120, 99, 73, 5), (111, 70, 48, 12), (98, 72, 46, 9), (95, 66, 31, 10), (85, 84, 30, 8), (71, 32, 25, 3), (36, 34, 25, 10), (97, 67, 62, 7), (33, 25, 23, 7), (95, 27, 26, 10), (94, 81, 44, 9)]]
 31 | 
 32 | def random_generate():
 33 |     idx = np.random.randint(0,len(solution))
 34 |     box_list = solution[idx]
 35 |     gen_box_order = []
 36 |     while True:
 37 |         index = np.random.randint(0, len(box_list))
 38 |         box_list[index] = (box_list[index][0], box_list[index][1], box_list[index][2], box_list[index][3]-1)
 39 |         gen_box_order.append((box_list[index][0], box_list[index][1], box_list[index][2]))
 40 |         if box_list[index][3] == 0:
 41 |             box_list.pop(index)
 42 |         if len(box_list) == 0:
 43 |             break
 44 |     return gen_box_order
 45 | 
 46 | 
 47 | def normalization(data):
 48 |     _range = np.max(data) - np.min(data)
 49 |     return (data - np.min(data)) / _range
 50 | 
 51 | 
 52 | def standardization(data):
 53 |     mu = np.mean(data)
 54 |     sigma = np.std(data)
 55 |     return (data - mu) / sigma
 56 | 
 57 | # 3. Define the network used in both target net and the net for training
 58 | class CNN(nn.Module):
 59 |     def __init__(self):
 60 |         super(CNN, self).__init__()   # 继承__init__功能
 61 |         ## 第一层卷积
 62 |         self.conv1 = nn.Sequential(
 63 |             # 输入[2,587,233]
 64 |             nn.Conv2d(
 65 |                 in_channels=2,    # 输入图片的高度
 66 |                 out_channels=16,  # 输出图片的高度
 67 |                 kernel_size=3,    # 5x5的卷积核，相当于过滤器
 68 |                 stride=1,         # 卷积核在图上滑动，每隔一个扫一次
 69 |                 padding=1,        # 给图外边补上0
 70 |             ),
 71 |             # 经过卷积层 输出[16,28,28] 传入池化层
 72 |             nn.ReLU(),
 73 |             nn.MaxPool2d(kernel_size=2)   # 经过池化 输出[16,14,14] 传入下一个卷积
 74 |         )
 75 |         ## 第二层卷积
 76 |         self.conv2 = nn.Sequential(
 77 |             nn.Conv2d(
 78 |                 in_channels=16,    # 同上
 79 |                 out_channels=32,
 80 |                 kernel_size=3,
 81 |                 stride=1,
 82 |                 padding=1
 83 |             ),
 84 |             # 经过卷积 输出[32, 14, 14] 传入池化层
 85 |             nn.ReLU(),
 86 |             nn.MaxPool2d(kernel_size=2)  # 经过池化 输出[32,7,7] 传入输出层
 87 |         )
 88 |         ## 第三层卷积
 89 |         self.conv3 = nn.Sequential(
 90 |             nn.Conv2d(
 91 |                 in_channels=32,  # 同上
 92 |                 out_channels=64,
 93 |                 kernel_size=3,
 94 |                 stride=1,
 95 |                 padding=1
 96 |             ),
 97 |             # 经过卷积 输出[32, 14, 14] 传入池化层
 98 |             nn.ReLU(),
 99 |             nn.MaxPool2d(kernel_size=2)  # 经过池化 输出[32,7,7] 传入输出层
100 |         )
101 |         ## 第四层卷积
102 |         self.conv4 = nn.Sequential(
103 |             nn.Conv2d(
104 |                 in_channels=64,  # 同上
105 |                 out_channels=128,
106 |                 kernel_size=3,
107 |                 stride=1,
108 |                 padding=1
109 |             ),
110 |             # 经过卷积 输出[32, 14, 14] 传入池化层
111 |             nn.ReLU(),
112 |             nn.MaxPool2d(kernel_size=2)  # 经过池化 输出[32,7,7] 传入输出层
113 |         )
114 |         ## 第五层卷积
115 |         self.conv5 = nn.Sequential(
116 |             nn.Conv2d(
117 |                 in_channels=128,  # 同上
118 |                 out_channels=256,
119 |                 kernel_size=3,
120 |                 stride=1,
121 |                 padding=1
122 |             ),
123 |             # 经过卷积 输出[32, 14, 14] 传入池化层
124 |             nn.ReLU(),
125 |             nn.MaxPool2d(kernel_size=2)  # 经过池化 输出[32,7,7] 传入输出层
126 |         )
127 |         ## 第六层卷积
128 |         self.conv6 = nn.Sequential(
129 |             nn.Conv2d(
130 |                 in_channels=256,  # 同上
131 |                 out_channels=512,
132 |                 kernel_size=3,
133 |                 stride=1,
134 |                 padding=1
135 |             ),
136 |             # 经过卷积 输出[32, 14, 14] 传入池化层
137 |             nn.ReLU(),
138 |             nn.MaxPool2d(kernel_size=2)  # 经过池化 输出[32,7,7] 传入输出层
139 |         )
140 |         ## 输出层
141 |         self.output = nn.Linear(in_features=512*9*3, out_features=1)
142 | 
143 |     def forward(self, x):
144 |         x = self.conv1(x)
145 |         x = self.conv2(x)           # [batch, 32,7,7]
146 |         x = self.conv3(x)  # [batch, 32,7,7]
147 |         x = self.conv4(x)  # [batch, 32,7,7]
148 |         x = self.conv5(x)  # [batch, 32,7,7]
149 |         x = self.conv6(x)  # [batch, 32,7,7]
150 |         x = x.view(x.size(0), -1)   # 保留batch, 将后面的乘到一起 [batch, 32*7*7]
151 |         output = self.output(x)     # 输出[50,10]
152 |         return output
153 | 
154 | 
155 | class DQN(object):
156 |     def __init__(self):
157 |         # -----------Define 2 networks (target and training)------#
158 |         self.eval_net, self.target_net = CNN(), CNN()
159 |         # Define counter, memory size and loss function
160 |         self.learn_step_counter = 0  # count the steps of learning process
161 |         self.memory: List = [None] * MEMORY_CAPACITY
162 |         self.memory_counter = 0  # counter used for experience replay buffer
163 |         # ------- Define the optimizer------#
164 |         self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)
165 | 
166 |         # ------Define the loss function-----#
167 |         self.loss_func = nn.MSELoss()
168 | 
169 |     def choose_action(self, state:Container, cargo):
170 |         # 可行点取最大的
171 |         is_encase, inputs, points, poses = state.encase(cargo)
172 | 
173 |         # torch.set_printoptions(profile="full")
174 |         # print(inputs)
175 |         if is_encase == False:
176 |             return is_encase, is_encase, is_encase
177 | 
178 |         if np.random.uniform() < EPSILON:  # greedy
179 |             data = inputs.data.cpu().numpy()
180 |             data = normalization(data)
181 |             data = standardization(data)
182 |             inputs = torch.tensor(data)
183 |             with torch.no_grad():
184 |                 actions_value = self.target_net.forward(inputs)
185 |             action = torch.max(actions_value, 0)[1].data.numpy()
186 |             action = action[0]
187 |             point = points[action]
188 |             pose = poses[action]
189 |         else:
190 |             action = np.random.randint(0, high=len(points))
191 |             point = points[action]
192 |             pose = poses[action]
193 |         return point, pose
194 | 
195 |     def store_transition(self, s:torch.Tensor, a:Cargo, r, s_:Container, a_:Cargo):
196 | 
197 |         transition = [s, a.matrix(), r, s_, a_]
198 |         # if the capacity is full, then use index to replace the old memory with new one
199 |         index = self.memory_counter % MEMORY_CAPACITY
200 |         self.memory[index] = transition
201 |         self.memory_counter += 1
202 | 
203 |     def learn(self):
204 | 
205 |         # update the target network every fixed steps
206 |         if self.learn_step_counter % TARGET_NETWORK_REPLACE_FREQ == 0:
207 |             # Assign the parameters of eval_net to target_net
208 |             self.target_net.load_state_dict(self.eval_net.state_dict())
209 |         self.learn_step_counter += 1
210 | 
211 |         # Determine the index of Sampled batch from buffer
212 |         sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)  # randomly select some data from buffer
213 | 
214 |         b_s_a_list = torch.zeros(1, 2, L, W)
215 |         q_next_max = []
216 |         b_r = []
217 |         for index in sample_index:
218 |             b_memory = self.memory[index]
219 |             b_s = b_memory[0]
220 |             b_a = b_memory[1]
221 |             b_r.append(b_memory[2])
222 |             b_s_a = torch.stack((b_s, b_a), dim=0)
223 |             b_s_a = b_s_a.unsqueeze(0)
224 |             b_s_a_list = torch.cat((b_s_a_list, b_s_a), dim=0)
225 |             state_next = b_memory[-2]
226 |             cargo_next = b_memory[-1]
227 | 
228 |             # calculate the q value of next state
229 |             is_encasable, inputs, points, poses = state_next.encase(cargo_next)
230 |             if is_encasable:
231 |                 data = inputs.data.cpu().numpy()
232 |                 data = normalization(data)
233 |                 data = standardization(data)
234 |                 inputs = torch.tensor(data)
235 |                 actions_value = self.target_net.forward(inputs)
236 |                 q_next = torch.max(actions_value, 0)[0]
237 |                 q_next_max.append(q_next)
238 |             else:
239 |                 q_next = torch.zeros(size=[1])
240 |                 q_next_max.append(q_next)
241 | 
242 |         b_s_a = b_s_a_list[1:]
243 |         data = b_s_a.data.cpu().numpy()
244 |         data = normalization(data)
245 |         data = standardization(data)
246 |         b_s_a = torch.tensor(data).to(device)
247 | 
248 | 
249 |         self.eval_net.to(device)
250 |         q_eval = self.eval_net(b_s_a)
251 | 
252 |         # q_next = torch.stack(q_next_max,0)
253 |         # b_r = torch.stack(b_r, 0).unsqueeze(1)
254 | 
255 |         q_next = torch.tensor(q_next_max).unsqueeze(1)
256 |         b_r = torch.tensor(b_r).unsqueeze(1)
257 | 
258 |         q_target = (b_r + q_next * GAMMA).to(device)
259 |         q_target.detach()
260 | 
261 |         self.loss_func.to(device)
262 | 
263 |         loss = self.loss_func(q_eval, q_target)
264 |         print('loss: %f iters:%d cargo_id: %d' %(loss.item(), i_episode, c_i))
265 | 
266 |         self.optimizer.zero_grad()  # reset the gradient to zero
267 |         loss.backward()
268 |         self.optimizer.step()  # execute back propagation for one step
269 | 
270 |         # for parameters in self.eval_net.parameters():
271 |         #     print(parameters)
272 | 
273 |         q_eval_again = self.eval_net(b_s_a)
274 |         loss_again = self.loss_func(q_eval_again, q_target)
275 |         print('loss_again: %f iters:%d cargo_id: %d' % (loss_again.item(), i_episode, c_i))
276 | 
277 | 
278 | def evaluate():
279 |     start = time.time()
280 |     c_index = 0
281 |     state = Container(L, W, H)
282 |     while True:
283 |         cargo = Cargo(cargo_list[c_index][0], cargo_list[c_index][1], cargo_list[c_index][2])
284 |         action = dqn.choose_action(state, cargo)
285 |         if(len(action)==3):
286 |             c_index += 1
287 |             if c_index >= len(cargo_list):
288 |                 break
289 |             else:
290 |                 continue
291 |         else:
292 |             cargo.pose = action[1]
293 |             cargo.point = action[0]
294 |             state.update_state(cargo)
295 | 
296 |         if c_index == len(cargo_list)-1:
297 |             break
298 |         c_index += 1
299 | 
300 |     end = time.time()
301 |     last_time = end - start
302 |     print("time cost: %f s" % last_time)
303 |     occupy = state.occupy_volume() / state.volume
304 |     return occupy
305 | 
306 | 
307 | if __name__ == '__main__':
308 | 
309 |     '''
310 |     --------------Procedures of DQN Algorithm------------------
311 |     '''
312 |     # create the object of DQN class
313 |     dqn = DQN()
314 | 
315 |     # Start training
316 |     print("\nCollecting experience...")
317 | 
318 |     cargo_list = random_generate()
319 |     print('cargo list length: %d' % len(cargo_list))
320 | 
321 |     for i_episode in range(500):
322 |         # refresh
323 | 
324 |         state = Container(L, W, H)
325 |         c_i = 0
326 |         ep_r = 0
327 |         done = False
328 |         while True:
329 |             # print('episode: %d box id: %d' % (i_episode, c_i))
330 |             # take action based on the current state
331 |             cargo = Cargo(cargo_list[c_i][0], cargo_list[c_i][1], cargo_list[c_i][2])
332 | 
333 |             # take action based on the current state
334 |             action = dqn.choose_action(state, cargo)
335 | 
336 |             if len(action) == 3:
337 |                 done = True
338 |             else:
339 |                 pose = action[1]
340 |                 point = action[0]
341 | 
342 |             if done:
343 |                 c_i += 1
344 |                 if c_i >= len(cargo_list):
345 |                     break
346 |                 else:
347 |                     continue
348 | 
349 |             pre_s = copy.deepcopy(state._H_matrix)
350 |             # obtain the reward and next state and some other information
351 |             cargo.pose = pose
352 |             cargo.point = point
353 |             state.update_state(cargo)
354 |             # obtain the reward
355 |             r = state.reward()
356 | 
357 |             # store the transitions of states
358 |             cargo_next = Cargo(cargo_list[c_i+1][0], cargo_list[c_i+1][1], cargo_list[c_i+1][2])
359 |             dqn.store_transition(pre_s, cargo, r, state, cargo_next)
360 |             # memory = dqn.memory
361 |             # print("store finished")
362 | 
363 |             ep_r += r
364 |             # if the experience repaly buffer is filled, DQN begins to learn or update
365 |             # its parameters.
366 |             if dqn.memory_counter > MEMORY_CAPACITY:
367 |                 dqn.learn()
368 |                 if done:
369 |                     print('Ep: ', i_episode, ' |', 'Ep_r: ', round(ep_r, 2))
370 | 
371 |             if c_i+1 == len(cargo_list)-1:
372 |                 break
373 |             c_i += 1
374 | 
375 |         occupy_rate = evaluate()
376 |         print('occupy_rate: %f' %occupy_rate)
377 | 
378 |     torch.save(dqn.target_net.state_dict(), 'cnn.pth')
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 


--------------------------------------------------------------------------------