├── .gitignore ├── README.md ├── game_thread.py ├── main.py ├── net.py └── reinforce_learning.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | /__pycache__ 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 用深度学习+强化学习编写的一个五子棋人工智障,所有基本功能已实现。无人类知识,全靠左右互搏积累经验。深度学习框架为 Tensorflow ,ui 为 pygame,棋盘大小为15*15。训练了一个 V 网络,全靠 V 网络对胜率的估计下棋,没有蒙塔卡罗树搜索。 2 | -------------------------------------------------------------------------------- /game_thread.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | from pygame.locals import * 3 | from sys import exit 4 | import threading 5 | import time 6 | import reinforce_learning as rl 7 | import random 8 | import math 9 | 10 | random.seed(time.time()) 11 | 12 | 13 | class GameThread(threading.Thread): 14 | 15 | screen_width = 640 16 | screen_height = 560 17 | 18 | line_num = 15 19 | width = 36 20 | 21 | piece_width = 18 22 | 23 | board_offset = [screen_width / 40, screen_height / 40] 24 | 25 | screen = pygame.display.set_mode((screen_width, screen_height), 0, 32) 26 | 27 | color_dict = {-1: (255, 255, 255), 1: (0, 0, 0)} 28 | 29 | now_color = 1 30 | chess_board = [[0 for col in range(15)] for row in range(15)] 31 | 32 | history = [] 33 | 34 | step_num = 0 35 | 36 | explore = 0.0005 37 | 38 | explore_value = 0.5 39 | 40 | is_explore = 0.8 41 | 42 | alpha = 0.9 43 | 44 | def __init__(self, thread_id): 45 | threading.Thread.__init__(self) 46 | self.id = thread_id 47 | 48 | def loop(self): 49 | self.display() 50 | 51 | mouse_pos = [-1, -1] 52 | 53 | while True: 54 | for event in pygame.event.get(): 55 | if event.type == QUIT: 56 | exit() 57 | # if event.type == MOUSEBUTTONUP: 58 | # if event.button == 1: 59 | # mouse_pos2 = self.get_xy(event.pos) 60 | # if mouse_pos2 == mouse_pos and self.chess_board[mouse_pos[0]][mouse_pos[1]] == 0: 61 | # self.chess_board[mouse_pos[0]][mouse_pos[1]] = self.now_color 62 | # if self.is_win(mouse_pos[0], mouse_pos[1], self.now_color): 63 | # self.add_train_data() 64 | # self.init_board() 65 | # self.history.append(self.copy_self()) 66 | # self.now_color = -self.now_color 67 | # self.step_num += 1 68 | # if event.type == MOUSEBUTTONDOWN: 69 | # if event.button == 1: 70 | # mouse_pos = self.get_xy(event.pos) 71 | self.display() 72 | time.sleep(0.2) 73 | 74 | def run(self): 75 | time.sleep(1) 76 | num = 0 77 | while True: 78 | self.generate_data() 79 | print(rl.train_data['y']) 80 | rl.train() 81 | num += 1 82 | print("train num %d " % (num, )) 83 | 84 | def place_pieces(self, x, y): 85 | self.chess_board[x][y] = self.now_color 86 | self.history.append(self.copy_self()) 87 | self.step_num += 1 88 | if self.is_win(x, y, self.now_color): 89 | self.win() 90 | return 91 | self.now_color = -self.now_color 92 | 93 | def win(self): 94 | print("step_num %d" % self.step_num) 95 | self.add_train_data() 96 | print("len(x) %d" % len(rl.train_data['x'])) 97 | self.explore_value = 0.01 98 | print("explore %f, explore_value %f" % (self.explore, self.explore_value)) 99 | self.init_board() 100 | 101 | def copy_self(self): 102 | board_copy = [[0 for col in range(self.line_num)] for row in range(self.line_num)] 103 | length = len(self.chess_board) 104 | side = self.now_color 105 | for i in range(length): 106 | for j in range(length): 107 | board_copy[i][j] = side*self.chess_board[i][j] 108 | 109 | return board_copy 110 | 111 | def get_xy(self, pos): 112 | x = (pos[0] - self.board_offset[0] + self.width/2) / self.width 113 | y = (pos[1] - self.board_offset[1] + self.width/2) / self.width 114 | xy = (int(x), int(y)) 115 | return xy 116 | 117 | def draw_piece(self, chess_color, pos): 118 | x = self.board_offset[0] + pos[0]*self.width 119 | y = self.board_offset[1] + pos[1]*self.width 120 | xy = (int(x), int(y)) 121 | color_num = self.color_dict[chess_color] 122 | pygame.draw.circle(self.screen, color_num, xy, self.piece_width) 123 | 124 | def draw_board(self): 125 | self.screen.fill((100, 255, 100)) 126 | for i in range(0, self.line_num): 127 | pygame.draw.line(self.screen, (0, 0, 0), (self.board_offset[0] + i * self.width, self.board_offset[1]), 128 | (self.board_offset[0] + i * self.width, 129 | self.board_offset[1] + (self.line_num - 1) * self.width)) 130 | pygame.draw.line(self.screen, (0, 0, 0), (self.board_offset[0], self.board_offset[1] + i * self.width), 131 | (self.board_offset[0] + (self.line_num - 1) * self.width, 132 | self.board_offset[1] + i * self.width)) 133 | 134 | def init_board(self): 135 | length = len(self.chess_board) 136 | for i in range(length): 137 | for j in range(length): 138 | self.chess_board[i][j] = 0 139 | 140 | self.history = [] 141 | self.now_color = 1 142 | self.step_num = 0 143 | 144 | def display(self): 145 | pygame.init() 146 | self.draw_board() 147 | for i in range(len(self.chess_board)): 148 | for j in range(len(self.chess_board[i])): 149 | if self.chess_board[i][j] != 0: 150 | self.draw_piece(self.chess_board[i][j], (i, j)) 151 | pygame.display.update() 152 | 153 | def add_train_data(self): 154 | y = 0.5 155 | side = self.now_color 156 | for i in range(self.step_num): 157 | a = math.pow(self.alpha, self.step_num - i - 1)/2 158 | y2 =0.5 + a 159 | if side == -1: 160 | y2 = 1 - y2 161 | side = -side 162 | if random.random() > 2*a: 163 | continue 164 | rl.train_data['x'].append(self.to_input(self.history[i])) 165 | rl.train_data['y'].append([y2, 1 - y2]) 166 | 167 | def to_input(self, board): 168 | c = [[[0.0 for col in range(2)] for col in range(self.line_num)] for row in range(self.line_num)] 169 | length = len(board) 170 | for i in range(length): 171 | for j in range(length): 172 | if board[i][j] == 1: 173 | c[i][j][0] = 1.0 174 | elif board[i][j] == -1: 175 | c[i][j][1] = 1.0 176 | return c 177 | 178 | def is_win(self, i, j, color): 179 | length = len(self.chess_board) 180 | a = 5 181 | count = 1 182 | for x in range(1, a): 183 | tx = i - x 184 | ty = j 185 | if tx < 0 or tx >= length: 186 | break 187 | if self.chess_board[tx][ty] == color: 188 | count += 1 189 | else: 190 | break 191 | 192 | for x in range(1, a): 193 | tx = i + x 194 | ty = j 195 | if tx < 0 or tx >= length: 196 | break 197 | if self.chess_board[tx][ty] == color: 198 | count += 1 199 | else: 200 | break 201 | 202 | if count >= 5: 203 | return True 204 | 205 | count = 1 206 | for x in range(1, a): 207 | tx = i - x 208 | ty = j - x 209 | if tx < 0 or tx >= length: 210 | break 211 | if ty < 0 or ty >= length: 212 | break 213 | if self.chess_board[tx][ty] == color: 214 | count += 1 215 | else: 216 | break 217 | 218 | for x in range(1, a): 219 | tx = i + x 220 | ty = j + x 221 | if tx < 0 or tx >= length: 222 | break 223 | if ty < 0 or ty >= length: 224 | break 225 | if self.chess_board[tx][ty] == color: 226 | count += 1 227 | else: 228 | break 229 | 230 | if count >= 5: 231 | return True 232 | 233 | count = 1 234 | for x in range(1, a): 235 | tx = i 236 | ty = j - x 237 | if ty < 0 or ty >= length: 238 | break 239 | if self.chess_board[tx][ty] == color: 240 | count += 1 241 | else: 242 | break 243 | 244 | for x in range(1, a): 245 | tx = i 246 | ty = j + x 247 | if ty < 0 or ty >= length: 248 | break 249 | if self.chess_board[tx][ty] == color: 250 | count += 1 251 | else: 252 | break 253 | 254 | if count >= 5: 255 | return True 256 | 257 | count = 1 258 | for x in range(1, a): 259 | tx = i - x 260 | ty = j + x 261 | if tx < 0 or tx >= length: 262 | break 263 | if ty < 0 or ty >= length: 264 | break 265 | if self.chess_board[tx][ty] == color: 266 | count += 1 267 | else: 268 | break 269 | 270 | for x in range(1, a): 271 | tx = i + x 272 | ty = j - x 273 | if tx < 0 or tx >= length: 274 | break 275 | if ty < 0 or ty >= length: 276 | break 277 | if self.chess_board[tx][ty] == color: 278 | count += 1 279 | else: 280 | break 281 | 282 | if count >= 5: 283 | return True 284 | return False 285 | 286 | def generate_data(self, ): 287 | rl.train_data = {"x": [], "y": []} 288 | num = 100 289 | self.explore = 0 290 | self.explore_value = 0 291 | 292 | while len(rl.train_data['x']) < num: 293 | self.next_move() 294 | 295 | def next_move(self): 296 | p = self.get_next_move() 297 | self.place_pieces(p[0], p[1]) 298 | 299 | def get_next_move(self, ): 300 | board = self.copy_self() 301 | board2 = self.to_input(board) 302 | index = 0 303 | max_value = -2 304 | max_position = [0, 0] 305 | for i in range(len(board)): 306 | for j in range(len(board[i])): 307 | if board[i][j] == 0: 308 | board2[i][j][index] = 1 309 | value = rl.get_value(board2) 310 | value += random.random()*self.explore_value 311 | if value > max_value: 312 | max_value = value 313 | max_position = [i, j] 314 | board2[i][j][index] = 0 315 | print(max_position[0], max_position[1], self.now_color, max_value) 316 | return max_position 317 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import game_thread 4 | 5 | game = game_thread.GameThread(1) 6 | game.start() 7 | game.loop() 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def weight_variable(shape): 5 | initial = tf.truncated_normal(shape, stddev=0.1) 6 | return tf.Variable(initial) 7 | 8 | 9 | def bias_variable(shape): 10 | initial = tf.constant(0.1, shape=shape) 11 | return tf.Variable(initial) 12 | 13 | 14 | def conv2d(x, w): 15 | return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='VALID') 16 | 17 | 18 | def max_pool_2x2(x): 19 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], 20 | strides=[1, 2, 2, 1], padding='VALID') 21 | 22 | 23 | def convolutional_neural_network(input): 24 | 25 | #9*9*2 26 | W_conv1 = weight_variable([3, 3, 2, 64]) 27 | b_conv1 = bias_variable([64]) 28 | 29 | h_conv1 = tf.nn.relu(conv2d(input, W_conv1) + b_conv1) 30 | 31 | #9*9*2 32 | W_conv2 = weight_variable([4, 4, 64, 128]) 33 | b_conv2 = bias_variable([128]) 34 | 35 | h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2) 36 | 37 | #9*9*2 38 | W_conv3 = weight_variable([5, 5, 128, 128]) 39 | b_conv3 = bias_variable([128]) 40 | 41 | h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3) + b_conv3) 42 | 43 | #9*9*2 44 | W_conv4 = weight_variable([5, 5, 128, 256]) 45 | b_conv4 = bias_variable([256]) 46 | 47 | h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4) 48 | 49 | h_pool1_flat = tf.reshape(h_conv4, [-1, 2 * 2 * 256]) 50 | 51 | #2*2*128 52 | W_fc1 = weight_variable([2 * 2 * 256, 128]) 53 | b_fc1 = bias_variable([128]) 54 | 55 | h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1) 56 | 57 | #2*2*128 58 | W_fc2 = weight_variable([128, 32]) 59 | b_fc2 = bias_variable([32]) 60 | 61 | h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) 62 | 63 | #2*2*128 64 | W_fc3 = weight_variable([32, 2]) 65 | b_fc3 = bias_variable([2]) 66 | 67 | h_fc3 = tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3) 68 | 69 | return h_fc3 70 | 71 | 72 | -------------------------------------------------------------------------------- /reinforce_learning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import tensorflow as tf 4 | import net 5 | 6 | history_data = [] 7 | 8 | train_data = {"x": [], "y": []} 9 | sess = tf.Session() 10 | x = tf.placeholder(tf.float32, [None, 15, 15, 2]) 11 | y_ = tf.placeholder("float", shape=[None, 2]) 12 | 13 | x_board = tf.reshape(x, [-1, 15, 15, 2]) 14 | y_conv = net.convolutional_neural_network(x_board) 15 | 16 | # cost = -tf.reduce_sum(tf.square(y_ - y_conv)) 17 | cost = -tf.reduce_sum(y_*tf.log(y_conv)) 18 | train_step = tf.train.AdamOptimizer(1e-3).minimize(cost) 19 | 20 | sess.run(tf.initialize_all_variables()) 21 | 22 | 23 | def train(): 24 | for i in range(50): 25 | if i % 10 == 0: 26 | print("step %d " % (i, )) 27 | sess.run(train_step, feed_dict={x: train_data['x'], y_: train_data['y']}) 28 | 29 | 30 | def get_value(board): 31 | board2 = [board, ] 32 | value = sess.run(y_conv, feed_dict={x: board2}) 33 | return value[0][0] 34 | 35 | 36 | --------------------------------------------------------------------------------