├── .gitignore
├── README.md
├── game_thread.py
├── main.py
├── net.py
└── reinforce_learning.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | target
3 | /__pycache__
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 用深度学习+强化学习编写的一个五子棋人工智障，所有基本功能已实现。无人类知识，全靠左右互搏积累经验。深度学习框架为 Tensorflow ，ui 为 pygame，棋盘大小为15*15。训练了一个 V 网络，全靠 V 网络对胜率的估计下棋，没有蒙塔卡罗树搜索。
2 | 


--------------------------------------------------------------------------------
/game_thread.py:
--------------------------------------------------------------------------------
  1 | import pygame
  2 | from pygame.locals import *
  3 | from sys import exit
  4 | import threading
  5 | import time
  6 | import reinforce_learning as rl
  7 | import random
  8 | import math
  9 | 
 10 | random.seed(time.time())
 11 | 
 12 | 
 13 | class GameThread(threading.Thread):
 14 | 
 15 |     screen_width = 640
 16 |     screen_height = 560
 17 | 
 18 |     line_num = 15
 19 |     width = 36
 20 | 
 21 |     piece_width = 18
 22 | 
 23 |     board_offset = [screen_width / 40, screen_height / 40]
 24 | 
 25 |     screen = pygame.display.set_mode((screen_width, screen_height), 0, 32)
 26 | 
 27 |     color_dict = {-1: (255, 255, 255), 1: (0, 0, 0)}
 28 | 
 29 |     now_color = 1
 30 |     chess_board = [[0 for col in range(15)] for row in range(15)]
 31 | 
 32 |     history = []
 33 | 
 34 |     step_num = 0
 35 | 
 36 |     explore = 0.0005
 37 | 
 38 |     explore_value = 0.5
 39 | 
 40 |     is_explore = 0.8
 41 | 
 42 |     alpha = 0.9
 43 | 
 44 |     def __init__(self, thread_id):
 45 |         threading.Thread.__init__(self)
 46 |         self.id = thread_id
 47 | 
 48 |     def loop(self):
 49 |         self.display()
 50 | 
 51 |         mouse_pos = [-1, -1]
 52 | 
 53 |         while True:
 54 |             for event in pygame.event.get():
 55 |                 if event.type == QUIT:
 56 |                     exit()
 57 |                 # if event.type == MOUSEBUTTONUP:
 58 |                 #     if event.button == 1:
 59 |                 #         mouse_pos2 = self.get_xy(event.pos)
 60 |                 #         if mouse_pos2 == mouse_pos and self.chess_board[mouse_pos[0]][mouse_pos[1]] == 0:
 61 |                 #             self.chess_board[mouse_pos[0]][mouse_pos[1]] = self.now_color
 62 |                 #             if self.is_win(mouse_pos[0], mouse_pos[1], self.now_color):
 63 |                 #                 self.add_train_data()
 64 |                 #                 self.init_board()
 65 |                 #             self.history.append(self.copy_self())
 66 |                 #             self.now_color = -self.now_color
 67 |                 #             self.step_num += 1
 68 |                 # if event.type == MOUSEBUTTONDOWN:
 69 |                 #     if event.button == 1:
 70 |                 #         mouse_pos = self.get_xy(event.pos)
 71 |             self.display()
 72 |             time.sleep(0.2)
 73 | 
 74 |     def run(self):
 75 |         time.sleep(1)
 76 |         num = 0
 77 |         while True:
 78 |             self.generate_data()
 79 |             print(rl.train_data['y'])
 80 |             rl.train()
 81 |             num += 1
 82 |             print("train num %d " % (num, ))
 83 | 
 84 |     def place_pieces(self, x, y):
 85 |         self.chess_board[x][y] = self.now_color
 86 |         self.history.append(self.copy_self())
 87 |         self.step_num += 1
 88 |         if self.is_win(x, y, self.now_color):
 89 |             self.win()
 90 |             return
 91 |         self.now_color = -self.now_color
 92 | 
 93 |     def win(self):
 94 |         print("step_num %d" % self.step_num)
 95 |         self.add_train_data()
 96 |         print("len(x) %d" % len(rl.train_data['x']))
 97 |         self.explore_value = 0.01
 98 |         print("explore %f, explore_value %f" % (self.explore, self.explore_value))
 99 |         self.init_board()
100 | 
101 |     def copy_self(self):
102 |         board_copy = [[0 for col in range(self.line_num)] for row in range(self.line_num)]
103 |         length = len(self.chess_board)
104 |         side = self.now_color
105 |         for i in range(length):
106 |             for j in range(length):
107 |                 board_copy[i][j] = side*self.chess_board[i][j]
108 | 
109 |         return board_copy
110 | 
111 |     def get_xy(self, pos):
112 |         x = (pos[0] - self.board_offset[0] + self.width/2) / self.width
113 |         y = (pos[1] - self.board_offset[1] + self.width/2) / self.width
114 |         xy = (int(x), int(y))
115 |         return xy
116 | 
117 |     def draw_piece(self, chess_color, pos):
118 |         x = self.board_offset[0] + pos[0]*self.width
119 |         y = self.board_offset[1] + pos[1]*self.width
120 |         xy = (int(x), int(y))
121 |         color_num = self.color_dict[chess_color]
122 |         pygame.draw.circle(self.screen, color_num, xy, self.piece_width)
123 | 
124 |     def draw_board(self):
125 |         self.screen.fill((100, 255, 100))
126 |         for i in range(0, self.line_num):
127 |             pygame.draw.line(self.screen, (0, 0, 0), (self.board_offset[0] + i * self.width, self.board_offset[1]),
128 |                              (self.board_offset[0] + i * self.width,
129 |                               self.board_offset[1] + (self.line_num - 1) * self.width))
130 |             pygame.draw.line(self.screen, (0, 0, 0), (self.board_offset[0], self.board_offset[1] + i * self.width),
131 |                              (self.board_offset[0] + (self.line_num - 1) * self.width,
132 |                               self.board_offset[1] + i * self.width))
133 | 
134 |     def init_board(self):
135 |         length = len(self.chess_board)
136 |         for i in range(length):
137 |             for j in range(length):
138 |                 self.chess_board[i][j] = 0
139 | 
140 |         self.history = []
141 |         self.now_color = 1
142 |         self.step_num = 0
143 | 
144 |     def display(self):
145 |         pygame.init()
146 |         self.draw_board()
147 |         for i in range(len(self.chess_board)):
148 |             for j in range(len(self.chess_board[i])):
149 |                 if self.chess_board[i][j] != 0:
150 |                     self.draw_piece(self.chess_board[i][j], (i, j))
151 |         pygame.display.update()
152 | 
153 |     def add_train_data(self):
154 |         y = 0.5
155 |         side = self.now_color
156 |         for i in range(self.step_num):
157 |             a = math.pow(self.alpha, self.step_num - i - 1)/2
158 |             y2 =0.5 + a
159 |             if side == -1:
160 |                 y2 = 1 - y2
161 |             side = -side
162 |             if random.random() > 2*a:
163 |                 continue
164 |             rl.train_data['x'].append(self.to_input(self.history[i]))
165 |             rl.train_data['y'].append([y2, 1 - y2])
166 | 
167 |     def to_input(self, board):
168 |         c = [[[0.0 for col in range(2)] for col in range(self.line_num)] for row in range(self.line_num)]
169 |         length = len(board)
170 |         for i in range(length):
171 |             for j in range(length):
172 |                 if board[i][j] == 1:
173 |                     c[i][j][0] = 1.0
174 |                 elif board[i][j] == -1:
175 |                     c[i][j][1] = 1.0
176 |         return c
177 | 
178 |     def is_win(self, i, j, color):
179 |         length = len(self.chess_board)
180 |         a = 5
181 |         count = 1
182 |         for x in range(1, a):
183 |             tx = i - x
184 |             ty = j
185 |             if tx < 0 or tx >= length:
186 |                 break
187 |             if self.chess_board[tx][ty] == color:
188 |                 count += 1
189 |             else:
190 |                 break
191 | 
192 |         for x in range(1, a):
193 |             tx = i + x
194 |             ty = j
195 |             if tx < 0 or tx >= length:
196 |                 break
197 |             if self.chess_board[tx][ty] == color:
198 |                 count += 1
199 |             else:
200 |                 break
201 | 
202 |         if count >= 5:
203 |             return True
204 | 
205 |         count = 1
206 |         for x in range(1, a):
207 |             tx = i - x
208 |             ty = j - x
209 |             if tx < 0 or tx >= length:
210 |                 break
211 |             if ty < 0 or ty >= length:
212 |                 break
213 |             if self.chess_board[tx][ty] == color:
214 |                 count += 1
215 |             else:
216 |                 break
217 | 
218 |         for x in range(1, a):
219 |             tx = i + x
220 |             ty = j + x
221 |             if tx < 0 or tx >= length:
222 |                 break
223 |             if ty < 0 or ty >= length:
224 |                 break
225 |             if self.chess_board[tx][ty] == color:
226 |                 count += 1
227 |             else:
228 |                 break
229 | 
230 |         if count >= 5:
231 |             return True
232 | 
233 |         count = 1
234 |         for x in range(1, a):
235 |             tx = i
236 |             ty = j - x
237 |             if ty < 0 or ty >= length:
238 |                 break
239 |             if self.chess_board[tx][ty] == color:
240 |                 count += 1
241 |             else:
242 |                 break
243 | 
244 |         for x in range(1, a):
245 |             tx = i
246 |             ty = j + x
247 |             if ty < 0 or ty >= length:
248 |                 break
249 |             if self.chess_board[tx][ty] == color:
250 |                 count += 1
251 |             else:
252 |                 break
253 | 
254 |         if count >= 5:
255 |             return True
256 | 
257 |         count = 1
258 |         for x in range(1, a):
259 |             tx = i - x
260 |             ty = j + x
261 |             if tx < 0 or tx >= length:
262 |                 break
263 |             if ty < 0 or ty >= length:
264 |                 break
265 |             if self.chess_board[tx][ty] == color:
266 |                 count += 1
267 |             else:
268 |                 break
269 | 
270 |         for x in range(1, a):
271 |             tx = i + x
272 |             ty = j - x
273 |             if tx < 0 or tx >= length:
274 |                 break
275 |             if ty < 0 or ty >= length:
276 |                 break
277 |             if self.chess_board[tx][ty] == color:
278 |                 count += 1
279 |             else:
280 |                 break
281 | 
282 |         if count >= 5:
283 |             return True
284 |         return False
285 | 
286 |     def generate_data(self, ):
287 |         rl.train_data = {"x": [], "y": []}
288 |         num = 100
289 |         self.explore = 0
290 |         self.explore_value = 0
291 | 
292 |         while len(rl.train_data['x']) < num:
293 |             self.next_move()
294 | 
295 |     def next_move(self):
296 |         p = self.get_next_move()
297 |         self.place_pieces(p[0], p[1])
298 | 
299 |     def get_next_move(self, ):
300 |         board = self.copy_self()
301 |         board2 = self.to_input(board)
302 |         index = 0
303 |         max_value = -2
304 |         max_position = [0, 0]
305 |         for i in range(len(board)):
306 |             for j in range(len(board[i])):
307 |                 if board[i][j] == 0:
308 |                     board2[i][j][index] = 1
309 |                     value = rl.get_value(board2)
310 |                     value += random.random()*self.explore_value
311 |                     if value > max_value:
312 |                         max_value = value
313 |                         max_position = [i, j]
314 |                     board2[i][j][index] = 0
315 |         print(max_position[0], max_position[1], self.now_color, max_value)
316 |         return max_position
317 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import game_thread
 4 | 
 5 | game = game_thread.GameThread(1)
 6 | game.start()
 7 | game.loop()
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/net.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def weight_variable(shape):
 5 |     initial = tf.truncated_normal(shape, stddev=0.1)
 6 |     return tf.Variable(initial)
 7 | 
 8 | 
 9 | def bias_variable(shape):
10 |     initial = tf.constant(0.1, shape=shape)
11 |     return tf.Variable(initial)
12 | 
13 | 
14 | def conv2d(x, w):
15 |     return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='VALID')
16 | 
17 | 
18 | def max_pool_2x2(x):
19 |     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
20 |                           strides=[1, 2, 2, 1], padding='VALID')
21 | 
22 | 
23 | def convolutional_neural_network(input):
24 | 
25 |     #9*9*2
26 |     W_conv1 = weight_variable([3, 3, 2, 64])
27 |     b_conv1 = bias_variable([64])
28 | 
29 |     h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1)
30 | 
31 |     #9*9*2
32 |     W_conv2 = weight_variable([4, 4, 64, 128])
33 |     b_conv2 = bias_variable([128])
34 | 
35 |     h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
36 | 
37 |     #9*9*2
38 |     W_conv3 = weight_variable([5, 5, 128, 128])
39 |     b_conv3 = bias_variable([128])
40 | 
41 |     h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3) + b_conv3)
42 | 
43 |     #9*9*2
44 |     W_conv4 = weight_variable([5, 5, 128, 256])
45 |     b_conv4 = bias_variable([256])
46 | 
47 |     h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
48 | 
49 |     h_pool1_flat = tf.reshape(h_conv4, [-1, 2 * 2 * 256])
50 | 
51 |     #2*2*128
52 |     W_fc1 = weight_variable([2 * 2 * 256, 128])
53 |     b_fc1 = bias_variable([128])
54 | 
55 |     h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
56 | 
57 |     #2*2*128
58 |     W_fc2 = weight_variable([128, 32])
59 |     b_fc2 = bias_variable([32])
60 | 
61 |     h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
62 | 
63 |     #2*2*128
64 |     W_fc3 = weight_variable([32, 2])
65 |     b_fc3 = bias_variable([2])
66 | 
67 |     h_fc3 = tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3)
68 | 
69 |     return h_fc3
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/reinforce_learning.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import tensorflow as tf
 4 | import net
 5 | 
 6 | history_data = []
 7 | 
 8 | train_data = {"x": [], "y": []}
 9 | sess = tf.Session()
10 | x = tf.placeholder(tf.float32, [None, 15, 15, 2])
11 | y_ = tf.placeholder("float", shape=[None, 2])
12 | 
13 | x_board = tf.reshape(x, [-1, 15, 15, 2])
14 | y_conv = net.convolutional_neural_network(x_board)
15 | 
16 | # cost = -tf.reduce_sum(tf.square(y_ - y_conv))
17 | cost = -tf.reduce_sum(y_*tf.log(y_conv))
18 | train_step = tf.train.AdamOptimizer(1e-3).minimize(cost)
19 | 
20 | sess.run(tf.initialize_all_variables())
21 | 
22 | 
23 | def train():
24 |     for i in range(50):
25 |         if i % 10 == 0:
26 |             print("step %d " % (i, ))
27 |         sess.run(train_step, feed_dict={x: train_data['x'], y_: train_data['y']})
28 | 
29 | 
30 | def get_value(board):
31 |     board2 = [board, ]
32 |     value = sess.run(y_conv, feed_dict={x: board2})
33 |     return value[0][0]
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------