├── .gitignore ├── AlphaGomoku ├── __init__.py ├── agent │ ├── __init__.py │ ├── agent.py │ ├── ai.py │ ├── human.py │ ├── mcts.py │ └── node.py ├── config.py ├── dataset │ ├── __init__.py │ ├── dataset.py │ ├── generator.py │ ├── human_play_data │ │ ├── human_15_col.npy │ │ ├── human_15_last_move.npy │ │ ├── human_15_obs.npy │ │ ├── human_15_pi.npy │ │ ├── human_15_z.npy │ │ └── save human play data here.txt │ └── self_play_data │ │ └── save self play data here.txt ├── env.py ├── network │ ├── __init__.py │ ├── history │ │ ├── log_15_20_512.txt │ │ ├── log_15_2_512.txt │ │ ├── log_8_20_512.txt │ │ └── save training history here.txt │ ├── model │ │ ├── log.txt │ │ ├── model_b_15.h5 │ │ └── model_w_15.h5 │ └── network.py ├── rules.py ├── ui │ ├── __init__.py │ ├── board.py │ ├── image │ │ ├── black.png │ │ ├── desk.jpg │ │ └── white.png │ └── renderer.py └── utils.py ├── LICENSE ├── README.md ├── requirements.txt └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/ 3 | __pycache__/ 4 | *.pyc 5 | workspace.xml 6 | venv/ 7 | gen_col.npy 8 | gen_last_move.npy 9 | gen_obs.npy 10 | gen_pi.npy 11 | gen_z.npy 12 | self_play_15_0_temp_col.npy 13 | self_play_15_0_temp_last_move.npy 14 | self_play_15_0_temp_obs.npy 15 | self_play_15_0_temp_pi.npy 16 | self_play_15_0_temp_z.npy 17 | utils.py -------------------------------------------------------------------------------- /AlphaGomoku/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent import * 2 | from .dataset import * 3 | from .network import * 4 | from .ui import * 5 | from .config import * 6 | from .env import * 7 | from .rules import * 8 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/__init__.py: -------------------------------------------------------------------------------- 1 | from .human import HumanAgent 2 | from .ai import * 3 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/agent.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from ..rules import * 3 | 4 | 5 | class Agent: 6 | def __init__(self, color): 7 | if color != BLACK or color != WHITE: 8 | self._color = BLACK 9 | else: 10 | self._color = color 11 | 12 | @abstractmethod 13 | def play(self, *args, **kwargs): 14 | pass 15 | 16 | @property 17 | def color(self): 18 | return self._color 19 | 20 | @color.setter 21 | def color(self, value): 22 | if value != BLACK and value != WHITE: 23 | self._color = BLACK 24 | else: 25 | self._color = value 26 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/ai.py: -------------------------------------------------------------------------------- 1 | from .agent import Agent 2 | from ..network.network import * 3 | from .mcts import * 4 | from ..utils import * 5 | 6 | MIN = -99999999 7 | MAX = 99999999 8 | 9 | score_5 = 5 10 | score_4_live = 4.5 11 | score_4_and_3_live = 4.3 12 | score_4 = 4 13 | score_double_3_live = 3.8 14 | score_3_live = 3.5 15 | score_3 = 3 16 | score_double_2_live = 3 17 | score_2_live = 2.5 18 | score_2 = 2 19 | 20 | 21 | class AI(Agent): 22 | def __init__(self, color): 23 | Agent.__init__(self, color) 24 | 25 | def play(self, *args, **kwargs): 26 | pass 27 | 28 | 29 | class MCTSAgent(AI): 30 | def __init__(self, conf, color, use_stochastic_policy, specify_model_ver=-1): 31 | AI.__init__(self, color) 32 | black_model_path = 'AlphaGomoku/network/model/model_b_' + str(conf['board_size']) 33 | white_model_path = 'AlphaGomoku/network/model/model_w_' + str(conf['board_size']) 34 | if specify_model_ver != -1: 35 | black_model_path = black_model_path + '_ver_' + str(specify_model_ver) 36 | white_model_path = white_model_path + '_ver_' + str(specify_model_ver) 37 | black_model_path = black_model_path + '.h5' 38 | white_model_path = white_model_path + '.h5' 39 | 40 | conf.update(net_para_file=black_model_path) 41 | black_net = Network(conf) 42 | conf.update(net_para_file=white_model_path) 43 | white_net = Network(conf) 44 | 45 | self._mcts = MCTS(conf, black_net, white_net, color, use_stochastic_policy) 46 | self._black_net = black_net 47 | self._white_net = white_net 48 | self._board_size = conf['board_size'] 49 | 50 | def play(self, obs, action, stone_num): 51 | act_ind, pi, prior_prob, value = self._mcts.action(obs, action, stone_num) 52 | act_cor = index2coordinate(act_ind, self._board_size) 53 | return act_cor, pi, prior_prob, value 54 | 55 | def set_self_play(self, is_self_play): 56 | self._mcts.set_self_play(is_self_play) 57 | 58 | def set_stochastic_policy(self, use_stochastic_policy): 59 | self._mcts.set_stochastic_policy(use_stochastic_policy) 60 | 61 | def reset_mcts(self): 62 | self._mcts.reset() 63 | 64 | @log 65 | def train(self, obs, color, last_move, pi, z): 66 | obs_b, obs_w = obs[0::2], obs[1::2] 67 | color_b, color_w = color[0::2], color[1::2] 68 | last_move_b, last_move_w = last_move[0::2], last_move[1::2] 69 | pi_b, pi_w = pi[0::2], pi[1::2] 70 | z_b, z_w = z[0::2], z[1::2] 71 | 72 | loss_b = self._black_net.train(obs_b, color_b, last_move_b, pi_b, z_b) 73 | loss_w = self._white_net.train(obs_w, color_w, last_move_w, pi_w, z_w) 74 | return loss_b, loss_w 75 | 76 | def save_model(self): 77 | self._black_net.save_model() 78 | self._white_net.save_model() 79 | print('> model saved') 80 | 81 | def load_model(self): 82 | self._black_net.load_model() 83 | self._white_net.load_model() 84 | 85 | 86 | class FastAgent(AI): 87 | def __init__(self, color, depth=1): # depth must be even 88 | AI.__init__(self, color) 89 | self._action_list = [] 90 | self._score_list = [] 91 | self._depth = depth 92 | self._cut_count = 0 93 | self._last_move_list = [] 94 | self._atk_def_ratio = 0.1 95 | self._show_info = False 96 | 97 | def play(self, obs, action, stone_num, *args): 98 | self._action_list = [] 99 | self._score_list = [] 100 | if action is not None: 101 | self._last_move_list.append(action) 102 | 103 | size = obs.shape[0] 104 | if sum(sum(abs(obs))) == 0: # 若AI执黑,第一步一定下在棋盘中央位置 105 | pi = [0 for _ in range(size * size)] 106 | pi[int((size * size) / 2)] = 1 107 | self._last_move_list.append((7, 7)) 108 | return (7, 7), pi, None, None 109 | 110 | pos_list = self.generate(obs, all=True) 111 | if self._show_info: 112 | print('position generated: ', pos_list) 113 | alpha, beta = MIN, MAX 114 | score_dict = dict() 115 | thread_list = [] 116 | 117 | for i, j in pos_list: 118 | new_obs = obs.copy() 119 | new_obs[i][j] = self.color 120 | target = self._get_thread_target(obs=new_obs, last_move=(i, j), alpha=alpha, beta=beta, 121 | depth=self._depth - 1, score_dict=score_dict) 122 | thr = threading.Thread(target=target, name='thread ' + str((i, j))) 123 | thread_list.append(thr) 124 | thr.start() 125 | 126 | for thr in thread_list: 127 | thr.join() 128 | 129 | best_action_list = get_best_action_list(score_dict) 130 | if self._show_info: 131 | print('best action list:', best_action_list, ' score = ', score_dict[best_action_list[0]]) 132 | 133 | ind = np.random.choice([i for i in range(len(best_action_list))]) 134 | action = best_action_list[ind] 135 | 136 | pi = [0 for _ in range(size * size)] 137 | pi[coordinate2index(action, size)] = 1 138 | 139 | self._last_move_list.append(action) 140 | return action, pi, best_action_list, score_dict 141 | 142 | def _get_thread_target(self, obs, last_move, alpha, beta, depth, score_dict): 143 | def _min(): 144 | _beta = beta 145 | self._last_move_list.append(last_move) 146 | if depth == 0: 147 | score_atk, score_def = self.evaluate(obs) 148 | self._last_move_list.pop() 149 | # 对于只搜一层的情况下,必须要教会AI防守活三和冲四。这里的做法是手动提高对方活三和冲四的分数 150 | if score_def < score_3_live: 151 | if score_atk > score_def: 152 | score = score_atk - self._atk_def_ratio * score_def 153 | else: 154 | score = -score_def + self._atk_def_ratio * score_atk 155 | else: 156 | if score_def == score_3_live: 157 | if score_atk >= score_4: 158 | score = score_atk - self._atk_def_ratio * score_def 159 | else: 160 | score = -score_4 161 | else: 162 | # 为了防止AI在对方有活四的情况下放弃治疗 163 | if score_def >= score_4_live: 164 | score = score_5 if score_atk == score_5 else -score_5 165 | else: 166 | score = score_5 if score_atk == score_5 else -score_4_live 167 | x, y = int(last_move[0]), int(last_move[1]) 168 | score_dict[(x, y)] = score 169 | if self._show_info: 170 | print((x, y), 'atk=', score_atk, 'def=', score_def, 'total=', score) 171 | return score 172 | 173 | pos_list = self.generate(obs) 174 | for i, j in pos_list: 175 | obs[i][j] = -self.color 176 | value = self._max(obs, (i, j), alpha, _beta, depth - 1) 177 | if value < _beta: 178 | _beta = value 179 | obs[i][j] = 0 180 | if alpha > _beta: 181 | break 182 | # this indicates that the parent node (belongs to max layer) will select a node with value 183 | # no less than alpha, however, the value of child selected in this node (belongs to min layer) 184 | # will no more than beta <= alpha, so there is no need to search this node 185 | 186 | self._last_move_list.pop() 187 | x, y = int(last_move[0]), int(last_move[1]) 188 | score_dict[(x, y)] = _beta 189 | self._action_list.append((x, y)) 190 | 191 | return _min 192 | 193 | # if an obs is in max layer, then the agent is supposed to select the action with max score 194 | # alpha represents the lower bound of the value of this node 195 | def _max(self, obs, last_move, alpha, beta, depth): 196 | self._last_move_list.append(last_move) 197 | if depth == 0: 198 | score_atk, score_def = self.evaluate(obs) 199 | self._last_move_list.pop() 200 | score = score_atk if score_atk > score_def else -score_def 201 | return score 202 | 203 | pos_list = self.generate(obs) 204 | 205 | for i, j in pos_list: 206 | obs[i][j] = self.color 207 | value = self._min(obs, (i, j), alpha, beta, depth - 1) 208 | if value > alpha: 209 | alpha = value 210 | obs[i][j] = 0 211 | if alpha > beta: 212 | break 213 | 214 | self._last_move_list.pop() 215 | return alpha 216 | 217 | # if an obs is in min layer, then the agent is supposed to select the action with min scores 218 | # beta represents the upper bound of the value of this node 219 | def _min(self, obs, last_move, alpha, beta, depth): 220 | self._last_move_list.append(last_move) 221 | if depth == 0: 222 | score_atk, score_def = self.evaluate(obs) 223 | self._last_move_list.pop() 224 | score = score_atk if score_atk > score_def else -score_def 225 | return score 226 | 227 | pos_list = self.generate(obs) 228 | 229 | for i, j in pos_list: 230 | obs[i][j] = -self.color 231 | value = self._max(obs, (i, j), alpha, beta, depth - 1) 232 | # print((i, j), value) 233 | if value < beta: 234 | beta = value 235 | obs[i][j] = 0 236 | if alpha > beta: 237 | break 238 | # this indicates that the parent node (belongs to max layer) will select a node with value 239 | # no less than alpha, however, the value of child selected in this node (belongs to min layer) 240 | # will no more than beta <= alpha, so there is no need to search this node 241 | 242 | self._last_move_list.pop() 243 | return beta 244 | 245 | def evaluate(self, obs): 246 | pos_ind = np.where(obs) 247 | pos_set = [(pos_ind[0][i], pos_ind[1][i]) for i in range(len(pos_ind[0]))] 248 | 249 | score_atk, score_def = 0, 0 250 | for x, y in pos_set: 251 | c = obs[x][y] 252 | pt_score = self.evaluate_point(obs, (x, y)) 253 | if c != self.color: 254 | score_def = max(score_def, pt_score) 255 | else: 256 | score_atk = max(score_atk, pt_score) 257 | 258 | return score_atk, score_def 259 | 260 | def evaluate_point(self, obs, pos): 261 | i, j = pos[0], pos[1] 262 | color = obs[i][j] 263 | dir_set = [(1, 0), (0, 1), (1, 1), (1, -1)] 264 | max_count = 0 265 | max_consecutive_count = 0 266 | max_score = 0 267 | 268 | for dir in dir_set: 269 | score = 0 270 | count_1, count_2 = 1, 1 271 | consecutive_count_1, consecutive_count_2 = 1, 1 272 | space_1, space_2 = 0, 0 273 | block_1, block_2 = 0, 0 274 | consecutive_flag = True 275 | 276 | for k in range(1, 5): 277 | if i + k * dir[0] in range(0, 15) and j + k * dir[1] in range(0, 15): 278 | if obs[i + k * dir[0]][j + k * dir[1]] == color: 279 | if space_1 == 2: 280 | break 281 | count_1 += 1 282 | if consecutive_flag: 283 | consecutive_count_1 += 1 284 | if obs[i + k * dir[0]][j + k * dir[1]] == -color: 285 | block_1 = 1 286 | break 287 | if obs[i + k * dir[0]][j + k * dir[1]] == 0: 288 | space_1 += 1 289 | consecutive_flag = False 290 | if space_1 == 3: 291 | break 292 | else: 293 | block_1 = 1 294 | break 295 | 296 | consecutive_flag = True 297 | 298 | for k in range(1, 5): 299 | if i - k * dir[0] in range(0, 15) and j - k * dir[1] in range(0, 15): 300 | if obs[i - k * dir[0]][j - k * dir[1]] == color: 301 | if space_2 == 2: 302 | break 303 | count_2 += 1 304 | if consecutive_flag: 305 | consecutive_count_2 += 1 306 | if obs[i - k * dir[0]][j - k * dir[1]] == -color: 307 | block_2 = 1 308 | break 309 | if obs[i - k * dir[0]][j - k * dir[1]] == 0: 310 | space_2 += 1 311 | consecutive_flag = False 312 | if space_2 == 3: 313 | break 314 | else: 315 | block_2 = 1 316 | break 317 | 318 | # there are several cases: 319 | # 1. ooox: block=1, space=0, count=consecutive_count 320 | # 2. ooo__: block=0, space=2, count=consecutive_count 321 | # 3. ooo_x: block=1, space=1, count=consecutive_count 322 | # 4. oo_ox: block=1, space=1, count>consecutive_count 323 | 324 | count = max(count_1 + consecutive_count_2, count_2 + consecutive_count_1) - 1 325 | 326 | consecutive_count = consecutive_count_1 + consecutive_count_2 - 1 327 | 328 | if consecutive_count >= 5: 329 | return score_5 330 | 331 | if count == 4: 332 | if consecutive_count == 4: # ??oooo?? 333 | if space_1 >= 1 and space_2 >= 1: # ?_oooo_? 334 | score = score_4_live 335 | else: 336 | if space_1 == 0 and space_2 == 0: # xoooox 337 | pass 338 | else: # xoooo_ 339 | score = score_4 340 | else: 341 | if consecutive_count == 3: # ??ooo_o?? 342 | score = score_4 343 | else: # (consecutive_count == 2) ??oo_oo?? 344 | score = score_4 345 | 346 | if count == 3: 347 | if consecutive_count == 3: # ??ooo?? 348 | if space_1 >= 1 and space_2 >= 1: # ?_ooo_? 349 | score = score_3_live 350 | else: 351 | if space_1 == 0 and space_2 == 0: # xooox 352 | pass 353 | else: # xooo_ 354 | score = score_3 355 | else: # (consecutive_count == 2) ??oo_o?? 356 | if consecutive_count_1 == 2: 357 | if space_1 >= 1 and space_2 >= 2: # ?_oo_o_? 358 | score = score_3_live 359 | else: 360 | if space_1 == 0 and space_2 == 1: # xoo_ox 361 | pass 362 | else: 363 | score = score_3 364 | else: # (consecutive_count_2 == 2) 365 | if space_2 >= 1 and space_1 >= 2: # ?_o_oo_? 366 | score = score_3_live 367 | else: 368 | if space_1 == 1 and space_2 == 0: # xo_oox 369 | pass 370 | else: 371 | score = score_3 372 | 373 | if count == 2: 374 | if consecutive_count == 2: # ??oo?? 375 | if space_1 <= 1 and space_2 <= 1: # x?oo?x 376 | pass 377 | else: 378 | if space_1 == 0 or space_2 == 0: # xoo__? 379 | if space_1 == 3 or space_2 == 3: # xoo___ 380 | score = score_2 381 | else: 382 | pass 383 | else: # ?__oo_?? 384 | score = score_2_live 385 | 386 | else: # ??o_o?? 387 | if space_1 + space_2 < 3: 388 | pass 389 | else: 390 | if count_1 == 2: 391 | if space_2 == 0: # (space_1 == 3) __o_ox 392 | score = score_2 393 | else: 394 | score = score_2_live 395 | else: # (count_2 == 2) 396 | if space_1 == 0: # (space_2 == 3) xo_o__ 397 | score = score_2 398 | else: 399 | score = score_2_live 400 | 401 | # bonus 402 | if max_score == score_2_live and score == score_2_live: 403 | score = score_double_2_live 404 | if max_score == score_3_live and score == score_3_live: 405 | score = score_double_3_live 406 | if max_score == score_4 and score == score_3_live: 407 | score = score_4_and_3_live 408 | if max_score == score_3_live and score == score_4: 409 | score = score_4_and_3_live 410 | 411 | if count > max_count: 412 | max_count = count 413 | if consecutive_count > max_consecutive_count: 414 | max_consecutive_count = consecutive_count 415 | 416 | if score > max_score: 417 | max_score = score 418 | 419 | return max_score 420 | 421 | def generate(self, obs, all=False): 422 | good_pts = [] 423 | good_scores = [] 424 | pts = [] 425 | scores = [] 426 | dir_set = [(1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1), (0, 1), (1, 1)] 427 | 428 | if all: 429 | indices = np.where(obs) 430 | check_list = [(indices[0][i], indices[1][i]) for i in range(len(indices[0]))] 431 | else: 432 | if len(self._last_move_list) > 7: 433 | check_list = self._last_move_list[-7:] 434 | else: 435 | check_list = self._last_move_list 436 | 437 | for x0, y0 in check_list: 438 | for dir in dir_set: 439 | if x0 + dir[0] in range(0, 15) and y0 + dir[1] in range(0, 15): 440 | pos = (x0 + dir[0], y0 + dir[1]) 441 | if obs[pos[0]][pos[1]] == 0 and pos not in pts: 442 | obs[pos[0]][pos[1]] = self.color 443 | score_atk = self.evaluate_point(obs, pos) 444 | obs[pos[0]][pos[1]] = -self.color 445 | score_def = self.evaluate_point(obs, pos) 446 | score = max(score_atk, score_def) 447 | if score >= score_3_live: 448 | good_pts.append(pos) 449 | good_scores.append(score) 450 | if score_atk == score_5: 451 | break 452 | pts.append(pos) 453 | scores.append(score) 454 | obs[pos[0]][pos[1]] = 0 455 | 456 | if len(good_pts) > 0 and max(good_scores) >= score_4: 457 | # print('good') 458 | pts = good_pts 459 | scores = good_scores 460 | lst = np.array([pts, scores]) 461 | pts = lst[:, lst[1].argsort()][0] 462 | pos_list = list(pts) 463 | 464 | pos_list.reverse() 465 | return pos_list 466 | 467 | 468 | def get_best_action_list(score_dict): 469 | best_action_list = [] 470 | max_score = MIN 471 | for key in score_dict: 472 | if max_score < score_dict[key]: 473 | best_action_list = [key] 474 | max_score = score_dict[key] 475 | elif max_score == score_dict[key]: 476 | best_action_list.append(key) 477 | return best_action_list 478 | 479 | 480 | def print_score_dict(score_dict): 481 | for key in score_dict: 482 | print(str(key) + ': ' + str(score_dict[key])) 483 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/human.py: -------------------------------------------------------------------------------- 1 | from .agent import Agent 2 | from .mcts import coordinate2index 3 | import numpy as np 4 | 5 | 6 | class HumanAgent(Agent): 7 | def __init__(self, renderer, color, board_size): 8 | self._renderer = renderer 9 | self._color = color 10 | self._board_size = board_size 11 | 12 | def set_renderer(self, renderer): 13 | self._renderer = renderer 14 | 15 | def play(self, obs, action, stone_num, *args): 16 | x, y = self._renderer.ask_for_click() 17 | ind = coordinate2index((x, y), self._board_size) 18 | pi = np.zeros(self._board_size * self._board_size) 19 | pi[ind] = 1 20 | return (x, y), pi, None, None 21 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/mcts.py: -------------------------------------------------------------------------------- 1 | from .node import Node 2 | import numpy as np 3 | from ..rules import * 4 | from ..utils import * 5 | import time 6 | import threading 7 | from ..config import tau_decay 8 | 9 | 10 | class MCTS: 11 | def __init__(self, conf, black_net, white_net, color, use_stochastic_policy): 12 | # hyperparameters 13 | self._c_puct = conf['c_puct'] # PUCT 14 | self._simulation_times = conf['simulation_times'] # number of simulation 15 | self._initial_tau = conf['initial_tau'] # temperature parameter 16 | self._tau = self._initial_tau 17 | self._epsilon = conf['epsilon'] # proportion of dirichlet noise 18 | self._use_dirichlet = conf['use_dirichlet'] 19 | self._alpha = conf['alpha'] 20 | self._board_size = conf['board_size'] 21 | self._color = color # MCTS Agent's color ( 1 for black; -1 for white) 22 | 23 | self._root = Node(1.0, None, BLACK, conf['virtual_loss']) # Monte Carlo tree 24 | 25 | self._black_net = black_net 26 | self._white_net = white_net 27 | 28 | self._is_self_play = conf['is_self_play'] 29 | self._use_stochastic_policy = use_stochastic_policy 30 | self._careful_stage = conf['careful_stage'] 31 | 32 | self._threading_num = conf['threading_num'] 33 | self._virtual_loss = conf['virtual_loss'] 34 | self._expanding_list = [] 35 | 36 | def set_self_play(self, is_self_play): 37 | self._is_self_play = is_self_play 38 | 39 | def set_stochastic_policy(self, use_stochastic_policy): 40 | self._use_stochastic_policy = use_stochastic_policy 41 | 42 | def reset(self): 43 | self._root = Node(1.0, None, BLACK, self._virtual_loss) 44 | 45 | def action(self, board, last_action, stage): 46 | # step 1: rebase tree 47 | # so far the root corresponds to the last board = board - last_action 48 | # thus we need to find out the node that correspond to the argument [board] 49 | 50 | # if the current root is a leaf node, then we should simulate in advance 51 | if self._root.is_leaf(): 52 | last_board = np.copy(board) 53 | # A special case: if the board is empty, then last_action is None 54 | if last_action is not None: 55 | row, col = last_action[0], last_action[1] 56 | last_board[row][col] = 0 57 | 58 | # now the last_board is correspond to the root 59 | self._simulate(last_board, last_action) 60 | 61 | # if the current root is not a leaf, then we can move the root to the child node correspond 62 | # to the board directly 63 | if last_action is not None: 64 | # last action might be None (when the board is empty) 65 | last_action_ind = coordinate2index(last_action, self._board_size) 66 | self._root = self._root.children()[last_action_ind] 67 | 68 | # now the root corresponds to the board 69 | # update tau 70 | if self._tau < 0.04: 71 | self._careful_stage = 0 72 | else: 73 | self._tau = self._initial_tau * (tau_decay ** int(stage / 2)) 74 | 75 | original_pi, pi = self._predict(board, last_action) 76 | 77 | # action decision 78 | if self._use_stochastic_policy and stage <= self._careful_stage: # stochastic policy 79 | position_list = [i for i in range(self._board_size * self._board_size)] 80 | action = np.random.choice(position_list, p=pi) 81 | else: # deterministic policy 82 | action = np.argmax(pi) 83 | 84 | next_node = self._root.children()[action] 85 | prior_prob = next_node.P() 86 | value = next_node.value 87 | 88 | # adjust the root node and discard the remainder of the tree 89 | if not self._is_self_play: 90 | self._root = self._root.children()[action] 91 | 92 | return action, original_pi, prior_prob, value 93 | # return pi for training use 94 | 95 | def _predict(self, board, last_move): 96 | # now board correspond to the root, last_move is the last move of the board 97 | self._simulate(board, last_move) 98 | # generate the action distribution 99 | original_pi = np.array([node.N * 1.0 for node in self._root.children()]) 100 | try: 101 | pi = np.array([node.N ** (1 / self._tau) for node in self._root.children()]) 102 | except: 103 | pi = original_pi 104 | self._tau = 0.03 105 | 106 | if len(pi) != len(board) ** 2: 107 | print('>> error: MCTS._predict') 108 | print(len(pi)) 109 | return 110 | original_pi /= sum(original_pi) 111 | pi /= sum(pi) 112 | 113 | return original_pi, pi 114 | 115 | # ROOT BOARD MUST CORRESPOND TO THE ROOT NODE!!! 116 | def _get_simulate_thread_target(self, root_board, last_move): 117 | def _simulate_thread(): 118 | legal_vec_root = board2legalvec(root_board) 119 | each_simulation_times = int(self._simulation_times / self._threading_num) 120 | 121 | for epoch in range(each_simulation_times): 122 | # initiate the current node as the root node and initiate the current color as the color of the root 123 | current_node = self._root 124 | current_color = self._root.color 125 | 126 | legal_vec_current = np.copy(legal_vec_root) # deep copy 127 | current_board = np.copy(root_board) 128 | 129 | # initiate select_action as last_move 130 | select_action = last_move 131 | 132 | # so far, root node might be a leaf (eg: root_board is empty) 133 | 134 | # if the root node is not a leaf, then it will enter the following loop 135 | while not current_node.is_leaf(): 136 | current_node, select_action_ind = current_node.select(self._c_puct, legal_vec_current) 137 | 138 | # add virtual loss in order to make other threads avoid this node 139 | current_node.select_num += 1 140 | current_node.N += self._virtual_loss 141 | 142 | # update legal vector 143 | legal_vec_current[select_action_ind] = 0 144 | 145 | # update current board 146 | row, col = index2coordinate(select_action_ind, self._board_size) 147 | current_board[row][col] = current_color 148 | select_action = (row, col) 149 | 150 | # update current color 151 | current_color = -current_color 152 | 153 | # if current node is not a leaf node, then it can't be in expanding list. 154 | # if current node is a leaf node, it may be expanding in other thread, so here we wait until it 155 | # is expanded (so that it is no longer a leaf node) 156 | while current_node in self._expanding_list: 157 | time.sleep(1e-4) 158 | 159 | # so far, current node must be a leaf node (including end node) 160 | if current_node.is_end: 161 | current_node.backup(-current_node.value) 162 | continue 163 | 164 | # add current node to expanding list 165 | if current_node not in self._expanding_list: 166 | self._expanding_list.append(current_node) 167 | else: 168 | continue 169 | 170 | # calculate the prior probabilities and value 171 | if current_color is BLACK: 172 | net = self._black_net 173 | else: 174 | net = self._white_net 175 | p, v = net.predict(board=current_board, 176 | color=current_color, 177 | last_move=select_action) 178 | current_node.value = v 179 | prior_prob = p[0] 180 | 181 | if self._use_dirichlet: 182 | alpha = [self._alpha] * (self._board_size * self._board_size) 183 | noise = np.random.dirichlet(alpha) 184 | prior_prob = (1 - self._epsilon) * prior_prob + self._epsilon * noise 185 | 186 | # now check whether this leaf node is an end node or not 187 | if select_action is not None: 188 | end_flag = check_rules(current_board, select_action, -current_color) 189 | if end_flag == 'blackwins' or end_flag == 'whitewins' or end_flag == 'full': 190 | current_node.is_end = True 191 | if end_flag == 'full': 192 | current_node.value = 0 193 | else: 194 | current_node.value = -1 195 | else: 196 | current_node.expand(prior_prob, self._board_size) 197 | else: 198 | # if action is None, then the root node must be a leaf 199 | current_node.expand(prior_prob, self._board_size) 200 | 201 | self._expanding_list.remove(current_node) 202 | 203 | # backup 204 | current_node.backup(-current_node.value) 205 | 206 | return _simulate_thread 207 | 208 | def _simulate(self, root_board, last_move): 209 | target = self._get_simulate_thread_target(root_board, last_move) 210 | thread_list = [] 211 | for i in range(self._threading_num): 212 | thr = threading.Thread(target=target, name='thread_' + str(i + 1)) 213 | thr.start() 214 | thread_list.append(thr) 215 | time.sleep(1e-3) 216 | for thr in thread_list: 217 | thr.join() 218 | 219 | 220 | def check_rules(board, action_cor, color): 221 | stone_num = sum(sum(np.abs(board))) 222 | if stone_num <= 8: # Impossible to end since the maximal length of consecutive lines with the same color is four. 223 | return 'continue' 224 | else: 225 | if stone_num == board.shape[0] * board.shape[0]: 226 | return 'full' 227 | else: # Greedy Match 228 | # cor = index2coordinate(action, board.shape[0]) 229 | # Horizontal Check 230 | count = 1 231 | for i in range(1, 5): 232 | if action_cor[1] + i <= board.shape[0] - 1: 233 | if board[action_cor[0]][action_cor[1] + i] == color: 234 | count += 1 235 | else: 236 | break 237 | else: 238 | break 239 | for i in range(1, 5): 240 | if action_cor[1] - i >= 0: 241 | if board[action_cor[0]][action_cor[1] - i] == color: 242 | count += 1 243 | else: 244 | break 245 | else: 246 | break 247 | if count >= 5: 248 | if color == 1: 249 | return 'blackwins' 250 | else: 251 | return 'whitewins' 252 | # Vertical Check 253 | count = 1 254 | for i in range(1, 5): 255 | if action_cor[0] + i <= board.shape[0] - 1: 256 | if board[action_cor[0] + i][action_cor[1]] == color: 257 | count += 1 258 | else: 259 | break 260 | else: 261 | break 262 | for i in range(1, 5): 263 | if action_cor[0] - i >= 0: 264 | if board[action_cor[0] - i][action_cor[1]] == color: 265 | count += 1 266 | else: 267 | break 268 | else: 269 | break 270 | if count >= 5: 271 | if color == 1: 272 | return 'blackwins' 273 | else: 274 | return 'whitewins' 275 | # Diagonal Check 276 | count = 1 277 | for i in range(1, 5): 278 | if (action_cor[0] + i <= board.shape[0] - 1) and (action_cor[1] + i <= board.shape[0] - 1): 279 | if board[action_cor[0] + i][action_cor[1] + i] == color: 280 | count += 1 281 | else: 282 | break 283 | else: 284 | break 285 | for i in range(1, 5): 286 | if (action_cor[0] - i >= 0) and (action_cor[1] - i >= 0): 287 | if board[action_cor[0] - i][action_cor[1] - i] == color: 288 | count += 1 289 | else: 290 | break 291 | else: 292 | break 293 | if count >= 5: 294 | if color == 1: 295 | return 'blackwins' 296 | else: 297 | return 'whitewins' 298 | # Anti-Diagonal Check 299 | count = 1 300 | for i in range(1, 5): 301 | if (action_cor[0] + i <= board.shape[0] - 1) and (action_cor[1] - i >= 0): 302 | if board[action_cor[0] + i][action_cor[1] - i] == color: 303 | count += 1 304 | else: 305 | break 306 | else: 307 | break 308 | for i in range(1, 5): 309 | if (action_cor[0] - i >= 0) and (action_cor[1] + i <= board.shape[0] - 1): 310 | if board[action_cor[0] - i][action_cor[1] + i] == color: 311 | count += 1 312 | else: 313 | break 314 | else: 315 | break 316 | if count >= 5: 317 | if color == 1: 318 | return 'blackwins' 319 | else: 320 | return 'whitewins' 321 | -------------------------------------------------------------------------------- /AlphaGomoku/agent/node.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | import numpy as np 3 | from ..config import * 4 | 5 | 6 | class Node: 7 | count = 0 8 | backup_count = 0 9 | conflict_count = 0 10 | 11 | def __init__(self, prior_prob, parent, color, virtual_loss): 12 | 13 | # actually N, Q, W, U are properties of edge 14 | self.N = 0 # Number of visits 15 | self._Q = 0 # Quality of the edge 16 | self.W = 0 # Intermediate value for Q update 17 | self._P = prior_prob # Prior probability predicted by network 18 | self._U = 0 19 | 20 | self._virtual_loss = virtual_loss 21 | self.select_num = 0 22 | 23 | self._parent = parent 24 | self._children = [] # if self._children is an empty list, it is viewed as a leaf node 25 | 26 | # when it is an end leaf 27 | self.is_end = False 28 | self.value = 0 29 | 30 | self.color = color # color of next player 31 | self.num = Node.count 32 | Node.count += 1 33 | 34 | def P(self): 35 | return self._P 36 | 37 | def Q(self): 38 | return self._Q 39 | 40 | def U(self): 41 | return self._U 42 | 43 | def parent(self): 44 | return self._parent 45 | 46 | def children(self): 47 | return self._children 48 | 49 | def is_root(self): 50 | return self._parent is None 51 | 52 | def is_leaf(self): 53 | return self._children == [] 54 | 55 | def upper_confidence_bound(self, c_puct): 56 | try: 57 | self._U = c_puct * self._P * sqrt(self._parent.N) / (1 + self.N) 58 | except ValueError: 59 | print('> valueError: Node.upper_confidence_bound') 60 | print(self._U, self._P, self._parent.N, self.N) 61 | return self._U + self._Q 62 | 63 | def select(self, c_puct, legal_vec_current): 64 | ucb_list = np.array([node.upper_confidence_bound(c_puct) for node in self._children]) 65 | ind = np.argsort(ucb_list) 66 | for i in range(len(ind)): 67 | if legal_vec_current[ind[-(i + 1)]] == 1: 68 | action = ind[-(i + 1)] 69 | break 70 | next_node = self._children[action] 71 | return next_node, action 72 | 73 | def expand(self, prior_prob, board_size=15): 74 | if not self.is_leaf(): 75 | print('> error: node.expand') 76 | return 77 | for i in range(board_size * board_size): 78 | prob = prior_prob[i] 79 | self._children.append(Node(prob, self, -self.color, self._virtual_loss)) 80 | 81 | def backup(self, value): 82 | # remove virtual loss 83 | if self.select_num > 0: 84 | self.select_num -= 1 85 | self.N -= self._virtual_loss 86 | if self.N < 0: 87 | self.N += self._virtual_loss 88 | 89 | self.N += 1 90 | self.W += value 91 | self._Q = self.W / self.N 92 | if not self.is_root(): 93 | self._parent.backup(-value_decay * value) 94 | -------------------------------------------------------------------------------- /AlphaGomoku/config.py: -------------------------------------------------------------------------------- 1 | value_decay = 0.95 2 | 3 | tau_decay = 0.8 4 | 5 | 6 | class Config(dict): 7 | def __init__(self, **kwargs): 8 | # mode 1: training mode, 2: AI vs Human, 3: Human vs Human, 0: Debug 9 | self['mode'] = 1 10 | 11 | # display mode 12 | self['display'] = False 13 | 14 | # screen size of renderer 15 | self['screen_size'] = (720, 720) 16 | 17 | # self play mode 18 | self['is_self_play'] = True 19 | 20 | # true: 3-3, 4-4, 6+ are not allowed for black 21 | self['forbidden_moves'] = False 22 | 23 | # PUCT: when c_puct gets smaller, the simulation becomes deeper 24 | self['c_puct'] = 5 25 | 26 | # simulation times 27 | self['simulation_times'] = 400 28 | 29 | # initial tau 30 | self['initial_tau'] = 1 31 | 32 | # proportion of dirichlet noise 33 | self['epsilon'] = 0.25 34 | 35 | # coef of dirichlet noise 36 | self['alpha'] = 0.03 37 | 38 | # use dirichlet 39 | self['use_dirichlet'] = False 40 | 41 | # board size 42 | self['board_size'] = 15 43 | 44 | # epoch: number of games played to train 45 | self['epoch'] = 20 46 | 47 | # sample percentage 48 | self['sample_percentage'] = 1 49 | 50 | # number of games in each training epoch 51 | self['games_num'] = 30 52 | 53 | # learning rate 54 | self['learning_rate'] = 2e-3 55 | 56 | # momentum 57 | self['momentum'] = 9e-1 58 | 59 | # coefficient of l2 penalty 60 | self['l2'] = 1e-4 61 | 62 | # path of network parameters 63 | self['net_para_file'] = 'AlphaGomoku/network/model/model_' + str(self['board_size']) + '.h5' 64 | 65 | # path of history of fitting 66 | self['fit_history_file'] = 'AlphaGomoku/network/history/log_' + str(self['board_size']) 67 | 68 | # human play data path 69 | self['human_play_data_path'] = 'AlphaGomoku/dataset/human_play_data/human_' + str(self['board_size']) + '_' 70 | 71 | # self play data path 72 | self['self_play_data_path'] = 'AlphaGomoku/dataset/self_play_data/self_play_' + str( 73 | self['board_size']) + '_' 74 | 75 | # generated data path 76 | self['generated_data_path'] = 'AlphaGomoku/dataset/generated_data/gen_' 77 | 78 | # use previous model 79 | self['use_previous_model'] = True 80 | 81 | # number of games played for evaluation, must be an even number!!! 82 | self['evaluate_games_num'] = 20 83 | 84 | # epoch from which evaluation starts 85 | self['evaluate_start_epoch'] = 1 86 | 87 | # Mini-Batch Size 88 | self['mini_batch_size'] = 512 89 | 90 | # fit epochs, number of each sample used 91 | self['fit_epochs'] = 10 92 | 93 | # use supervised learning 94 | self['is_supervised'] = False 95 | 96 | # careful stage 97 | self['careful_stage'] = 6 98 | 99 | # number of threads 100 | self['threading_num'] = 8 101 | 102 | # virtual loss 103 | self['virtual_loss'] = 10 104 | 105 | # show evaluation score given by agent 106 | self['show_score'] = True 107 | 108 | self.update(**kwargs) 109 | 110 | def update(self, **kwargs): 111 | for key in kwargs: 112 | self[key] = kwargs[key] 113 | 114 | def set_mode(self, mode): 115 | if mode not in [1, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0]: 116 | print('> Error: mode not found!') 117 | mode = 1 118 | if mode == 1: 119 | self['display'] = False 120 | self['is_self_play'] = True 121 | self['mode'] = 1 122 | self['show_score'] = False 123 | print('> Training mode') 124 | if mode == 2: 125 | self['display'] = True 126 | self['is_self_play'] = False 127 | self['mode'] = 2 128 | self['simulation_times'] = 800 129 | self['show_score'] = False 130 | print('> AI vs Human mode') 131 | if mode == 2.5: 132 | self['display'] = True 133 | self['is_self_play'] = False 134 | self['mode'] = 2.5 135 | self['simulation_times'] = 800 136 | self['show_score'] = False 137 | print('> AI vs Human mode') 138 | if mode == 3: 139 | self['display'] = True 140 | self['is_self_play'] = False 141 | self['mode'] = 3 142 | print('> Human vs Human mode') 143 | if mode == 4: 144 | self['display'] = False 145 | self['is_self_play'] = False 146 | self['show_score'] = False 147 | self['mode'] = 4 148 | self['simulation_times'] = 400 149 | print('> AI vs AI mode') 150 | if mode == 5: 151 | self['display'] = True 152 | self['is_self_play'] = False 153 | self['mode'] = 5 154 | self['games_num'] = 100 155 | print('> Collect human play data mode') 156 | if mode == 6: 157 | self['display'] = False 158 | self['is_self_play'] = True 159 | self['mode'] = 6 160 | self['games_num'] = 20 161 | self['epoch'] = 10 162 | self['simulation_times'] = 1600 163 | self['careful_stage'] = 226 164 | self['show_score'] = False 165 | print('> Collect self play data mode') 166 | if mode == 7: 167 | self['display'] = False 168 | self['is_self_play'] = True 169 | self['mode'] = 7 170 | self['is_supervised'] = True 171 | self['show_score'] = False 172 | print('> Train on external data mode') 173 | if mode == 8: 174 | self['display'] = True 175 | self['is_self_play'] = False 176 | self['mode'] = 8 177 | print('> Collect human vs AI play data mode') 178 | if mode == 9: 179 | self['display'] = True 180 | self['is_self_play'] = False 181 | self['mode'] = 9 182 | print('> AI(NaiveAgent) vs Human mode') 183 | if mode == 10: 184 | self['display'] = False 185 | self['is_self_play'] = False 186 | self['mode'] = 10 187 | self['show_score'] = False 188 | print('> AI vs AI(NaiveAgent) mode') 189 | if mode == 11: 190 | self['display'] = False 191 | self['is_self_play'] = False 192 | self['mode'] = 11 193 | print('> Train on generated data mode') 194 | self['simulation_times'] = 1600 195 | self['games_num'] = 50 196 | self['epoch'] = 100 197 | self['show_score'] = False 198 | if mode == 12: 199 | self['display'] = False 200 | self['is_self_play'] = False 201 | self['mode'] = 12 202 | self['games_num'] = 100 203 | self['epoch'] = 20 204 | self['show_score'] = True 205 | print('> Collect self play data mode') 206 | if mode == 13: 207 | self['display'] = False 208 | self['is_self_play'] = True 209 | self['show_score'] = False 210 | self['epoch'] = 10 211 | self['games_num'] = 60 212 | self['simulation_times'] = 1600 213 | self['careful_stage'] = 226 # disable careful stage 214 | self['mode'] = 13 215 | print('> Self play and train mode') 216 | if mode == 0: 217 | self['display'] = True 218 | self['is_self_play'] = True 219 | self['mode'] = 0 220 | self['simulation_times'] = 100 221 | self['games_num'] = 3 222 | self['epoch'] = 2 223 | self['show_score'] = True 224 | print('> Debug mode') 225 | 226 | def print_current_config(self): 227 | print('------------------') 228 | print('> CURRENT CONFIG:') 229 | for key in self: 230 | print('{}: {}'.format(key, self[key])) 231 | print('------------------') 232 | -------------------------------------------------------------------------------- /AlphaGomoku/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import * 2 | from .generator import * -------------------------------------------------------------------------------- /AlphaGomoku/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import os 4 | from ..config import value_decay 5 | 6 | 7 | class DataSet: 8 | def __init__(self): 9 | self._game_record = [] 10 | 11 | def clear(self): 12 | self._game_record = [] 13 | 14 | def add_record(self, record): 15 | self._game_record.append(record) 16 | 17 | def get_sample(self, percentage, shuffle=True): 18 | obs = [] 19 | col = [] 20 | last_move = [] 21 | pi = [] 22 | z = [] 23 | for record in self._game_record: 24 | a, b, c, d, e = record.get_sample(percentage, shuffle) 25 | obs.extend(a) 26 | col.extend(b) 27 | last_move.extend(c) 28 | pi.extend(d) 29 | z.extend(e) 30 | return obs, col, last_move, pi, z 31 | 32 | def record_num(self): 33 | return len(self._game_record) 34 | 35 | def save(self, path): 36 | obs, col, last_move, pi, z = self.get_sample(1) 37 | 38 | obs_path = path + 'obs' 39 | np.save(obs_path, obs) 40 | 41 | col_path = path + 'col' 42 | np.save(col_path, col) 43 | 44 | last_move_path = path + 'last_move' 45 | np.save(last_move_path, last_move) 46 | 47 | pi_path = path + 'pi' 48 | np.save(pi_path, pi) 49 | 50 | z_path = path + 'z' 51 | np.save(z_path, z) 52 | 53 | print('> ' + str(len(z)) + ' positions of data saved') 54 | 55 | def load(self, path): 56 | if not os.path.exists(path + 'obs.npy'): 57 | print('> error: model ' + path + 'obs.npy' + 'not found') 58 | return 59 | 60 | obs_path = path + 'obs.npy' 61 | obs = np.load(obs_path) 62 | 63 | col_path = path + 'col.npy' 64 | col = np.load(col_path) 65 | 66 | last_move_path = path + 'last_move.npy' 67 | last_move = np.load(last_move_path) 68 | 69 | pi_path = path + 'pi.npy' 70 | pi = np.load(pi_path) 71 | 72 | z_path = path + 'z.npy' 73 | z = np.load(z_path) 74 | 75 | size = len(z) 76 | record = GameRecord() 77 | for i in range(size): 78 | record.add(obs[i], col[i], last_move[i], pi[i], z[i]) 79 | self.add_record(record) 80 | 81 | 82 | class GameRecord: 83 | def __init__(self): 84 | self._obs_list = [] 85 | self._color_list = [] 86 | self._last_move_list = [] 87 | self._pi_list = [] 88 | self._z_list = [] 89 | self._total_num = 0 90 | 91 | def add(self, obs, color, last_move, pi, z=None): 92 | self._obs_list.append(obs) 93 | self._color_list.append(color) 94 | self._last_move_list.append(last_move) 95 | self._pi_list.append(pi) 96 | self._z_list.append(z) 97 | self._total_num += 1 98 | 99 | def add_list(self, obs, color, last_move, pi, z): 100 | self._obs_list.extend(obs) 101 | self._color_list.extend(color) 102 | self._last_move_list.extend(last_move) 103 | self._pi_list.extend(pi) 104 | self._z_list.extend(z) 105 | self._total_num += len(z) 106 | 107 | # the method to define the value of z 108 | def set_z(self, result): 109 | if result == 0: 110 | self._z_list = [0 for _ in range(self._total_num)] 111 | return 112 | for i in range(self._total_num): 113 | if result == self._color_list[i]: 114 | self._z_list[i] = 1 * value_decay ** (self._total_num - i - 1) 115 | else: 116 | self._z_list[i] = -1 * value_decay ** (self._total_num - i - 1) 117 | 118 | def get_sample(self, percentage, shuffle=True): 119 | if shuffle: 120 | sample_num = int(self._total_num * percentage) 121 | indices = random.sample([i for i in range(self._total_num)], sample_num) 122 | obs_sample = [self._obs_list[index] for index in indices] 123 | color_sample = [self._color_list[index] for index in indices] 124 | last_move_sample = [self._last_move_list[index] for index in indices] 125 | pi_sample = [self._pi_list[index] for index in indices] 126 | z_sample = [self._z_list[index] for index in indices] 127 | return obs_sample, color_sample, last_move_sample, pi_sample, z_sample 128 | else: 129 | return self._obs_list, self._color_list, self._last_move_list, self._pi_list, self._z_list 130 | -------------------------------------------------------------------------------- /AlphaGomoku/dataset/generator.py: -------------------------------------------------------------------------------- 1 | from ..ui import * 2 | from ..config import * 3 | from ..utils import * 4 | from .dataset import * 5 | 6 | 7 | class Generator: 8 | def __init__(self, board_size, max_noise_stone_num): 9 | self._board_size = board_size 10 | self._max_noise_stone_num = max_noise_stone_num 11 | 12 | @log 13 | def generate_live_4_attack(self, sample_num=10000): 14 | record = GameRecord() 15 | i = 0 16 | while i < sample_num: 17 | color = np.random.random_integers(0, 1) * 2 - 1 18 | board = self._empty_board() 19 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4) 20 | if len(fix_pos_list) == 0: 21 | continue 22 | 23 | for x, y in pos_list: 24 | board[x][y] = color 25 | 26 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 27 | if len(fix_pos_list) == 2: 28 | ind_1 = coordinate2index(fix_pos_list[0], self._board_size) 29 | ind_2 = coordinate2index(fix_pos_list[1], self._board_size) 30 | pi[ind_1], pi[ind_2] = 0.5, 0.5 31 | if len(fix_pos_list) == 1: 32 | ind = coordinate2index(fix_pos_list[0], self._board_size) 33 | pi[ind] = 1 34 | 35 | self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num, 36 | fix_pos_list=fix_pos_list) 37 | 38 | record.add(obs=board, color=color, last_move=pos_list[0], pi=pi, z=1) 39 | i += 1 40 | return record 41 | 42 | @log 43 | def generate_live_4_defend(self, sample_num=10000): 44 | record = GameRecord() 45 | i = 0 46 | while i < sample_num: 47 | color = np.random.random_integers(0, 1) * 2 - 1 48 | board = self._empty_board() 49 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4) 50 | if len(fix_pos_list) == 0: 51 | continue 52 | 53 | for x, y in pos_list: 54 | board[x][y] = color 55 | 56 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 57 | if len(fix_pos_list) == 2: 58 | ind_1 = coordinate2index(fix_pos_list[0], self._board_size) 59 | ind_2 = coordinate2index(fix_pos_list[1], self._board_size) 60 | pi[ind_1], pi[ind_2] = 0.5, 0.5 61 | if len(fix_pos_list) == 1: 62 | ind = coordinate2index(fix_pos_list[0], self._board_size) 63 | pi[ind] = 1 64 | 65 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 66 | fix_pos_list=fix_pos_list) 67 | 68 | record.add(obs=board, color=-color, last_move=pos_list[0], pi=pi, z=-1) 69 | i += 1 70 | return record 71 | 72 | @log 73 | def generate_dead_4_oooo_defend(self, sample_num=10000): 74 | record = GameRecord() 75 | i = 0 76 | while i < sample_num: 77 | color = np.random.random_integers(0, 1) * 2 - 1 78 | board = self._empty_board() 79 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4) 80 | if len(fix_pos_list) == 0: 81 | continue 82 | 83 | for x, y in pos_list: 84 | board[x][y] = color 85 | 86 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 87 | if len(fix_pos_list) == 2: 88 | ind = coordinate2index(fix_pos_list[0], self._board_size) 89 | pi[ind] = 1 90 | fx, fy = fix_pos_list[1][0], fix_pos_list[1][1] 91 | board[fx][fy] = -color 92 | if len(fix_pos_list) == 1: 93 | ind = coordinate2index(fix_pos_list[0], self._board_size) 94 | pi[ind] = 1 95 | 96 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 97 | fix_pos_list=fix_pos_list) 98 | 99 | record.add(obs=board, color=-color, last_move=pos_list[0], pi=pi, 100 | z=0) # last move should be next to an empty position 101 | i += 1 102 | return record 103 | 104 | @log 105 | def generate_dead_4_ooo_o_defend(self, sample_num=10000): 106 | record = GameRecord() 107 | for _ in range(sample_num): 108 | color = np.random.random_integers(0, 1) * 2 - 1 109 | board = self._empty_board() 110 | pos_list, _ = self._generate_consecutive_line(consecutive_num=5) 111 | fix_pos_list = [pos_list[3]] 112 | 113 | for x, y in pos_list: 114 | board[x][y] = color 115 | board[pos_list[3][0]][pos_list[3][1]] = 0 116 | 117 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 118 | 119 | ind = coordinate2index(pos_list[3], self._board_size) 120 | pi[ind] = 1 121 | 122 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 123 | fix_pos_list=fix_pos_list) 124 | 125 | record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi, 126 | z=0) # last move should be next to an empty position 127 | return record 128 | 129 | @log 130 | def generate_dead_4_oo_oo_defend(self, sample_num=10000): 131 | record = GameRecord() 132 | for _ in range(sample_num): 133 | color = np.random.random_integers(0, 1) * 2 - 1 134 | board = self._empty_board() 135 | pos_list, _ = self._generate_consecutive_line(consecutive_num=5) 136 | fix_pos_list = [pos_list[2]] 137 | 138 | for x, y in pos_list: 139 | board[x][y] = color 140 | board[pos_list[2][0]][pos_list[2][1]] = 0 141 | 142 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 143 | 144 | ind = coordinate2index(pos_list[2], self._board_size) 145 | pi[ind] = 1 146 | 147 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 148 | fix_pos_list=fix_pos_list) 149 | 150 | record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi, 151 | z=0) # last move should be next to an empty position 152 | return record 153 | 154 | @log 155 | def generate_live_3_ooo_attack(self, sample_num=10000): 156 | record = GameRecord() 157 | i = 0 158 | while i < sample_num: 159 | color = np.random.random_integers(0, 1) * 2 - 1 160 | board = self._empty_board() 161 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=3) 162 | if len(fix_pos_list) == 0 or len(fix_pos_list) == 1: 163 | continue 164 | 165 | for x, y in pos_list: 166 | board[x][y] = color 167 | 168 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 169 | ind_1 = coordinate2index(fix_pos_list[0], self._board_size) 170 | ind_2 = coordinate2index(fix_pos_list[1], self._board_size) 171 | pi[ind_1], pi[ind_2] = 0.5, 0.5 172 | 173 | self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num, 174 | fix_pos_list=fix_pos_list) 175 | 176 | record.add(obs=board, color=color, last_move=pos_list[1], pi=pi, z=1) 177 | i += 1 178 | return record 179 | 180 | @log 181 | def generate_live_3_oo_o_attack(self, sample_num=10000): 182 | record = GameRecord() 183 | i = 0 184 | while i < sample_num: 185 | color = np.random.random_integers(0, 1) * 2 - 1 186 | board = self._empty_board() 187 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4) 188 | if len(fix_pos_list) == 0 or len(fix_pos_list) == 1: 189 | continue 190 | 191 | fix_pos_list.append(list(pos_list[2])) 192 | 193 | for x, y in pos_list: 194 | board[x][y] = color 195 | board[pos_list[2][0]][pos_list[2][1]] = 0 196 | 197 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 198 | ind = coordinate2index(pos_list[2], self._board_size) 199 | pi[ind] = 1 200 | 201 | self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num, 202 | fix_pos_list=fix_pos_list) 203 | 204 | record.add(obs=board, color=color, last_move=pos_list[1], pi=pi, z=1) 205 | i += 1 206 | return record 207 | 208 | @log 209 | def generate_live_3_ooo_defend(self, sample_num=10000): 210 | record = GameRecord() 211 | i = 0 212 | while i < sample_num: 213 | color = np.random.random_integers(0, 1) * 2 - 1 214 | board = self._empty_board() 215 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=3) 216 | if len(fix_pos_list) == 0 or len(fix_pos_list) == 1: 217 | continue 218 | 219 | for x, y in pos_list: 220 | board[x][y] = color 221 | 222 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 223 | ind_1 = coordinate2index(fix_pos_list[0], self._board_size) 224 | ind_2 = coordinate2index(fix_pos_list[1], self._board_size) 225 | pi[ind_1], pi[ind_2] = 0.5, 0.5 226 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 227 | fix_pos_list=fix_pos_list) 228 | 229 | record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi, 230 | z=0) # last move should be next to an empty position 231 | i += 1 232 | return record 233 | 234 | @log 235 | def generate_live_3_oo_o_defend(self, sample_num=10000): 236 | record = GameRecord() 237 | i = 0 238 | while i < sample_num: 239 | color = np.random.random_integers(0, 1) * 2 - 1 240 | board = self._empty_board() 241 | pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4) 242 | if len(fix_pos_list) == 0 or len(fix_pos_list) == 1: 243 | continue 244 | 245 | fix_pos_list.append(list(pos_list[2])) 246 | 247 | for x, y in pos_list: 248 | board[x][y] = color 249 | board[pos_list[2][0]][pos_list[2][1]] = 0 250 | 251 | pi = np.array([0.0 for _ in range(self._board_size ** 2)]) 252 | ind_1 = coordinate2index(fix_pos_list[0], self._board_size) 253 | ind_2 = coordinate2index(fix_pos_list[1], self._board_size) 254 | ind_3 = coordinate2index(fix_pos_list[2], self._board_size) 255 | pi[ind_1], pi[ind_2], pi[ind_3] = 0.25, 0.25, 0.5 256 | 257 | self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num, 258 | fix_pos_list=fix_pos_list) 259 | 260 | record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi, 261 | z=0) # last move should be next to an empty position 262 | i += 1 263 | return record 264 | 265 | def _generate_consecutive_line(self, consecutive_num): 266 | start_pos = np.random.random_integers(0, self._board_size - 1, 2) 267 | end_pos = [-1, -1] 268 | while end_pos[0] < 0 or end_pos[0] > 14 or end_pos[1] < 0 or end_pos[1] > 14: 269 | dx, dy = list(np.random.random_integers(-1, 1, 2)) 270 | if dx == 0 and dy == 0: 271 | continue 272 | end_pos[0] = start_pos[0] + (consecutive_num - 1) * dx 273 | end_pos[1] = start_pos[1] + (consecutive_num - 1) * dy 274 | fix_pos_list = [] 275 | if dx == 0: 276 | x_list = [start_pos[0]] * consecutive_num 277 | else: 278 | x_list = list(range(start_pos[0], end_pos[0] + dx, dx)) 279 | if dy == 0: 280 | y_list = [start_pos[1]] * consecutive_num 281 | else: 282 | y_list = list(range(start_pos[1], end_pos[1] + dy, dy)) 283 | 284 | fp_1 = [start_pos[0] - dx, start_pos[1] - dy] 285 | if fp_1[0] in list(range(0, self._board_size)) and fp_1[1] in list(range(0, self._board_size)): 286 | fix_pos_list.append(fp_1) 287 | fp_2 = [end_pos[0] + dx, end_pos[1] + dy] 288 | if fp_2[0] in list(range(0, self._board_size)) and fp_2[1] in list(range(0, self._board_size)): 289 | fix_pos_list.append(fp_2) 290 | 291 | pos_list = list(zip(x_list, y_list)) 292 | return pos_list, fix_pos_list 293 | 294 | def _empty_board(self): 295 | empty_board = [[0 for _ in range(self._board_size)] for _ in range(self._board_size)] 296 | return np.array(empty_board) 297 | 298 | def _add_noise(self, board, next_player, max_stone_num, fix_pos_list): 299 | stone_num = np.random.random_integers(30, max_stone_num) 300 | black_stone_ind = np.where(board == BLACK) 301 | white_stone_ind = np.where(board == WHITE) 302 | black_stone_num = len(black_stone_ind[0]) 303 | white_stone_num = len(white_stone_ind[0]) 304 | black_origin, white_origin = black_stone_num, white_stone_num 305 | 306 | delta = black_stone_num - white_stone_num 307 | # 假设下一步轮到黑棋走,要放x个黑棋,y个白棋,则x+b=y+w, x+y=stone_num 308 | # x-y=-delta, 2x=stone_num-delta 309 | # 假设下一步轮到白棋走,要放x个黑棋,y个白棋,则x+b+1=y+w, x+y=stone_num 310 | # x-y=-delta-1, 2x=stone_num-delta-1 311 | 312 | if next_player == BLACK: 313 | black_stone_num = int((stone_num - delta) / 2) 314 | white_stone_num = black_stone_num + delta 315 | if black_stone_num + black_origin > white_stone_num + white_origin: 316 | white_stone_num += 1 317 | else: 318 | black_stone_num = int((stone_num - delta - 1) / 2) 319 | white_stone_num = black_stone_num + delta 320 | if black_stone_num + black_origin == white_stone_num + white_origin: 321 | black_stone_num += 1 322 | 323 | while white_stone_num > 0: 324 | pos = list(np.random.random_integers(0, self._board_size - 1, 2)) 325 | if board[pos[0]][pos[1]] == 0 and pos not in fix_pos_list: 326 | white_stone_num -= 1 327 | board[pos[0]][pos[1]] = WHITE 328 | 329 | while black_stone_num > 0: 330 | pos = list(np.random.random_integers(0, self._board_size - 1, 2)) 331 | if board[pos[0]][pos[1]] == 0 and pos not in fix_pos_list: 332 | black_stone_num -= 1 333 | board[pos[0]][pos[1]] = BLACK 334 | -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/human_15_col.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_col.npy -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/human_15_last_move.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_last_move.npy -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/human_15_obs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_obs.npy -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/human_15_pi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_pi.npy -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/human_15_z.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_z.npy -------------------------------------------------------------------------------- /AlphaGomoku/dataset/human_play_data/save human play data here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/save human play data here.txt -------------------------------------------------------------------------------- /AlphaGomoku/dataset/self_play_data/save self play data here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/self_play_data/save self play data here.txt -------------------------------------------------------------------------------- /AlphaGomoku/env.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | from .dataset.dataset import * 3 | # import matplotlib.pyplot as plt 4 | import os 5 | import re 6 | from .rules import * 7 | from .utils import * 8 | import shutil 9 | 10 | 11 | class Env: 12 | def __init__(self, conf): 13 | if not display_mode: 14 | conf['display'] = False 15 | print('> error: display mode is not available (requires pygame and threading)') 16 | 17 | self._conf = conf 18 | self._is_self_play = conf['is_self_play'] 19 | self._show_score = conf['show_score'] 20 | 21 | self._value_list = [] 22 | self._loss_list = [] 23 | 24 | self._network_version = self._get_model_version() 25 | self._agent_1_ver = 0 26 | self._agent_2_ver = 0 27 | 28 | self._evaluator_agent = FastAgent(color=BLACK) 29 | self._epoch = conf['epoch'] 30 | self._sample_percentage = conf['sample_percentage'] 31 | self._games_num = conf['games_num'] 32 | self._evaluate_games_num = conf['evaluate_games_num'] 33 | self._renderer = None 34 | 35 | # Training 36 | if conf['mode'] in [0, 1, 6, 7, 13]: 37 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=True) 38 | self._agent_2 = None 39 | # AI vs Human 40 | if conf['mode'] == 2: 41 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False) 42 | self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size']) 43 | 44 | if conf['mode'] == 2.5: 45 | self._agent_1 = HumanAgent(self._renderer, color=BLACK, board_size=conf['board_size']) 46 | self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False) 47 | # Human vs Human 48 | if conf['mode'] == 3 or conf['mode'] == 5: 49 | self._agent_1 = HumanAgent(self._renderer, color=BLACK, board_size=conf['board_size']) 50 | self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size']) 51 | 52 | if conf['mode'] == 4: 53 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False) 54 | # self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False) 55 | self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False) 56 | # self._agent_1, self._agent_2 = self._agent_2, self._agent_1 57 | 58 | if conf['mode'] == 8: 59 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False) 60 | self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size']) 61 | 62 | if conf['mode'] == 9: 63 | self._agent_1 = FastAgent(color=BLACK) 64 | self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size']) 65 | 66 | if conf['mode'] == 10: 67 | # self._agent_1 = FastAgent(color=BLACK) 68 | # self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False) 69 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False) 70 | self._agent_2 = FastAgent(color=WHITE) 71 | 72 | if conf['mode'] == 12: 73 | self._agent_1 = FastAgent(color=BLACK) 74 | self._agent_2 = FastAgent(color=WHITE) 75 | 76 | if conf['mode'] == 11: 77 | self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=True) 78 | self._agent_2 = FastAgent(color=WHITE) 79 | 80 | if conf['mode'] in [0, 1, 7]: 81 | self._agent_eval = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False) 82 | self._agent_eval.set_self_play(False) 83 | 84 | if self._is_self_play: 85 | self._agent_2 = self._agent_1 86 | 87 | self._rules = Rules(conf) 88 | self._renderer = Renderer(conf['screen_size'], conf['board_size']) if conf['display'] else None 89 | self._board = Board(self._renderer, conf['board_size']) 90 | self._conf = conf 91 | 92 | if type(self._agent_1) == HumanAgent: 93 | self._agent_1.set_renderer(renderer=self._renderer) 94 | if type(self._agent_2) == HumanAgent: 95 | self._agent_2.set_renderer(renderer=self._renderer) 96 | 97 | def start_mode(self): 98 | mode = self._conf['mode'] 99 | if mode == 1 or mode == 0: 100 | self.train() 101 | if mode in [2, 2.5, 3, 9, 10]: 102 | self.run(use_stochastic_policy=False) 103 | if mode == 4: 104 | self.compare(game_num=50) 105 | if mode == 5: 106 | self.collect_human_data() 107 | if mode in [6, 12]: 108 | self.collect_self_play_data() 109 | if mode == 7: 110 | self.train_on_external_data() 111 | if mode == 8: 112 | self.collect_human_vs_ai_data() 113 | if mode == 11: 114 | self.train_on_generated_data() 115 | if mode == 13: 116 | self.self_play_and_train() 117 | 118 | def set_mcts_agent_version(self, agent_1_ver, agent_2_ver): 119 | self._agent_1_ver = agent_1_ver 120 | self._agent_2_ver = agent_2_ver 121 | self._agent_1 = MCTSAgent(self._conf, color=BLACK, use_stochastic_policy=False, specify_model_ver=agent_1_ver) 122 | self._agent_2 = MCTSAgent(self._conf, color=WHITE, use_stochastic_policy=False, specify_model_ver=agent_2_ver) 123 | 124 | @log 125 | def run(self, use_stochastic_policy, record=None): 126 | if type(self._agent_1) == MCTSAgent: 127 | self._agent_1.set_stochastic_policy(use_stochastic_policy) 128 | if type(self._agent_2) == MCTSAgent: 129 | self._agent_2.set_stochastic_policy(use_stochastic_policy) 130 | 131 | self._value_list = [] 132 | Node.count = 0 133 | max_score = 0 134 | 135 | while True: 136 | if self._is_self_play: 137 | self._agent_1.color = self._board.current_player() 138 | 139 | # input.obs: current board 140 | # input.action: the last move of current board 141 | # input.stone_num: the stone num of current board 142 | # output.action: the action given by current agent 143 | # output.pi: the action distribution given by current agent, it will be added in the game record 144 | # output.prior_prob: the prior probability of this action given by the neural network of current agent 145 | # output.value: the winning rate given by the current agent 146 | action, pi, prior_prob, value = self._current_agent().play(obs=self._obs(), action=self._board.last_move(), 147 | stone_num=self._board.stone_num()) 148 | 149 | # show score: an agent will work as an evaluator, giving its evaluation of each possible position 150 | if self._show_score: 151 | try: 152 | legal_moves = list(value.keys()) # here value is score_dict 153 | score_list = [value[legal_moves[i]] for i in range(len(legal_moves))] 154 | self._board.show_scores(action_list=legal_moves, score_list=score_list) 155 | prior_prob, value = None, None 156 | except AttributeError: 157 | print('> using evaluator agent') 158 | legal_moves = self._evaluator_agent.generate(obs=self._obs(), all=True) 159 | score_list = list() 160 | for i in range(len(legal_moves)): 161 | x, y = legal_moves[i] 162 | temp_board = np.copy(self._obs()) 163 | temp_board[x][y] = self._current_agent().color 164 | self._evaluator_agent.color = self._current_agent().color 165 | score_atk, score_def = self._evaluator_agent.evaluate(temp_board) 166 | print('pos:', (x, y), ' atk:', score_atk, ' def:', score_def) 167 | score = score_atk if score_atk > score_def else -score_def 168 | score_list.append(score) 169 | self._board.show_scores(action_list=legal_moves, score_list=score_list) 170 | try: 171 | max_score = max(max(score_list), -min(score_list)) 172 | except: 173 | max_score = 0 174 | else: 175 | if type(value) is dict: 176 | prior_prob, value = None, None 177 | 178 | # show info 179 | if prior_prob is None: 180 | info = '1_2' 181 | else: 182 | prior_prob = str(round(float(prior_prob), 3)) 183 | value = str(round(-value, 3)) 184 | # now value indicates the winning rate of the last player of the current observation 185 | info = prior_prob + '_' + value 186 | 187 | result = self._check_rules(action) 188 | 189 | if self._conf['mode'] == 12 and self._board.stone_num() >= 30 and max_score < score_3_live: 190 | result = 'draw' 191 | 192 | if result == 'continue': 193 | if record is not None: 194 | record.add(self._obs(), self._board.current_player(), self._board.last_move(), pi) 195 | 196 | # self._evaluator_agent.color = self._board.current_player() 197 | self._board.move(self._board.current_player(), action, info) 198 | # print(self._evaluator_agent.evaluate(self._obs())) 199 | 200 | if value is not None: 201 | self._value_list.append(float(value)) 202 | if len(self._value_list) >= 5 and self._board.stone_num() >= 30: 203 | if self._conf['mode'] in [2, 2.5] and sum(list(map(np.abs, self._value_list[-5:]))) < 0.06: 204 | self._value_list = [] 205 | if ask_for_draw() == 1: 206 | show_result(2, 'draw') 207 | time.sleep(20) 208 | break 209 | 210 | if result == 'occupied': 211 | print(result + ': ' + str(action)) 212 | continue 213 | if result == 'blackwins' or result == 'whitewins' or result == 'draw': 214 | if record is not None: 215 | record.add(self._obs(), self._board.current_player(), self._board.last_move(), pi) 216 | self._board.move(self._board.current_player(), action, info) 217 | time.sleep(0.1) 218 | show_result(self._conf['mode'], result) 219 | 220 | if record is not None: 221 | if result == 'blackwins': 222 | flag = BLACK 223 | if result == 'whitewins': 224 | flag = WHITE 225 | if result == 'draw': 226 | flag = 0 227 | record.set_z(flag) 228 | if self._conf['mode'] in [2, 2.5, 3, 4, 9]: 229 | time.sleep(30) 230 | break 231 | self._board.clear() 232 | print('> Node number of game tree = ' + str(Node.count)) 233 | if type(self._agent_1) == MCTSAgent: 234 | self._agent_1.reset_mcts() 235 | if type(self._agent_2) == MCTSAgent: 236 | self._agent_2.reset_mcts() 237 | if result == 'blackwins': 238 | return BLACK 239 | if result == 'whitewins': 240 | return WHITE 241 | return 0 242 | 243 | def train(self): 244 | # use human play data to initialize network 245 | if self._conf['is_supervised']: 246 | human_play_data_set = DataSet() 247 | human_play_data_set.load(self._conf['human_play_data_path']) 248 | obs, col, last_move, pi, z = human_play_data_set.get_sample(1) 249 | print('> ' + str(len(obs)) + ' positions of data loaded') 250 | for i in range(50): 251 | print('supervise stage = ' + str(i + 1)) 252 | new_obs = obs.copy() 253 | new_col = col.copy() 254 | new_last_move = last_move.copy() 255 | new_pi = pi.copy() 256 | new_z = z.copy() 257 | self._agent_1.train(new_obs, new_col, new_last_move, new_pi, new_z) 258 | 259 | self._agent_1.save_model() 260 | 261 | # training based on self-play 262 | data_set = DataSet() 263 | for epoch in range(self._epoch): 264 | print('> epoch = ' + str(epoch + 1)) 265 | 266 | # self-play 267 | for i in range(self._games_num): 268 | record = GameRecord() 269 | print('> game num = ' + str(i + 1)) 270 | self.run(use_stochastic_policy=True, record=record) 271 | data_set.add_record(record) 272 | 273 | # train 274 | obs, col, last_move, pi, z = data_set.get_sample(self._sample_percentage) 275 | loss = self._agent_1.train(obs, col, last_move, pi, z) 276 | self._loss_list.append(loss) 277 | 278 | # evaluate 279 | self.evaluate() 280 | self._agent_1.save_model() 281 | self._network_version += 1 282 | data_set.clear() 283 | print('> network version = ' + str(self._network_version)) 284 | print('*****************************************************') 285 | 286 | # save loss 287 | hist_path = self._conf['fit_history_file'] + '_loss.txt' 288 | with open(hist_path, 'a') as f: 289 | f.write(str(self._loss_list)) 290 | # plot loss 291 | # x = range(1, len(self._loss_list) + 1) 292 | # y = self._loss_list 293 | # plt.plot(x, y) 294 | # plt.xlabel('epoch') 295 | # plt.ylabel('loss') 296 | # plt.savefig(self._conf['fit_history_file'] + str('.png'), dpi=300) 297 | # plt.show() 298 | 299 | def evaluate(self): 300 | print('> Evaluation begins:') 301 | 302 | # switch mode 303 | self._is_self_play = False 304 | self._agent_1.set_self_play(False) 305 | self._agent_2 = self._agent_eval 306 | self._agent_2.load_model() 307 | 308 | new_model_wins_num = 0 309 | old_model_wins_num = 0 310 | draw_num = 0 311 | total_num = self._evaluate_games_num 312 | end = False 313 | 314 | # new model plays BLACK 315 | for i in range(int(total_num / 2)): 316 | result = self.run(use_stochastic_policy=True, record=None) 317 | if result == BLACK: 318 | new_model_wins_num += 1 319 | if result == WHITE: 320 | old_model_wins_num += 1 321 | if result == 0: 322 | draw_num += 1 323 | print('> eval game ' + str(i + 1) + ' , score: ' + str(new_model_wins_num) + ':' + str(old_model_wins_num)) 324 | if new_model_wins_num > (total_num - draw_num) / 2: 325 | pass 326 | # end = True 327 | # break 328 | if old_model_wins_num > (total_num - draw_num) / 2: 329 | pass 330 | # end = True 331 | # break 332 | 333 | # switch agents 334 | self._agent_1, self._agent_2 = self._agent_2, self._agent_1 335 | self._agent_1.color = BLACK 336 | self._agent_2.color = WHITE 337 | 338 | if not end: 339 | for i in range(int(total_num / 2)): 340 | result = self.run(use_stochastic_policy=True, record=None) 341 | if result == BLACK: 342 | old_model_wins_num += 1 343 | if result == WHITE: 344 | new_model_wins_num += 1 345 | if result == 0: 346 | draw_num += 1 347 | print('> eval game ' + str(i + 1 + int(total_num / 2)) + ' , score: ' + str( 348 | new_model_wins_num) + ':' + str(old_model_wins_num)) 349 | if new_model_wins_num > (total_num - draw_num) / 2: 350 | pass 351 | # break 352 | if old_model_wins_num > (total_num - draw_num) / 2: 353 | pass 354 | # break 355 | 356 | # so far self._agent_1 -> self._agent_eval 357 | 358 | self._agent_1 = self._agent_2 359 | self._agent_1.color = BLACK 360 | self._agent_1.set_self_play(True) 361 | self._is_self_play = True 362 | 363 | if new_model_wins_num == 0: 364 | rate = 0 365 | else: 366 | rate = new_model_wins_num / (new_model_wins_num + old_model_wins_num) 367 | print('> winning rate of new model = ' + str(rate)) 368 | if rate > 0.5: 369 | print('> New model adopted') 370 | return True 371 | else: 372 | print('> New model discarded') 373 | return False 374 | 375 | def collect_human_data(self): 376 | human_data_set = DataSet() 377 | human_data_set.load(self._conf['human_play_data_path']) 378 | 379 | for i in range(self._games_num): 380 | record = GameRecord() 381 | print('> game num = ' + str(i + 1)) 382 | self.run(use_stochastic_policy=False, record=record) 383 | human_data_set.add_record(record) 384 | human_data_set.save(self._conf['human_play_data_path']) 385 | 386 | def collect_human_vs_ai_data(self): 387 | data_set = DataSet() 388 | data_set.load(self._conf['human_play_data_path']) 389 | 390 | for i in range(self._games_num): 391 | record = GameRecord() 392 | print('> game num = ' + str(i + 1)) 393 | self.run(use_stochastic_policy=False, record=record) 394 | data_set.add_record(record) 395 | if i % 10 == 0: 396 | data_set.save(self._conf['human_play_data_path']) 397 | 398 | data_set.save(self._conf['human_play_data_path']) 399 | 400 | def collect_self_play_data(self): 401 | name = os.getenv('computername') + str(os.getpid()) 402 | for epoch in range(self._epoch): 403 | print('> epoch = ' + str(epoch + 1)) 404 | data_set = DataSet() 405 | path = self._conf['self_play_data_path'] + str(epoch + 1) + '_' + str(name) + '_' 406 | for i in range(self._games_num): 407 | record = GameRecord() 408 | print('> game num = ' + str(i + 1)) 409 | self.run(use_stochastic_policy=True, record=record) 410 | data_set.add_record(record) 411 | data_set.save(path) 412 | data_set.save(path) 413 | 414 | def pack_external_data_set(self, name): 415 | path = self._conf['self_play_data_path'] + str(0) + '_' + str(name) + '_' 416 | data_set = self.get_external_data_set() 417 | data_set.save(path) 418 | 419 | def get_external_data_set(self): 420 | root, prefix = os.path.split(self._conf['self_play_data_path']) 421 | postfix_pattern = r'self\_play\_15\_\d+\_[0-9a-zA-Z\_\-]+\_col\.npy' 422 | last_path = '' 423 | external_data_set = DataSet() 424 | count = 0 425 | obs_list, col_list, last_move_list, pi_list, z_list = [], [], [], [], [] 426 | for filename in os.listdir(root): 427 | if re.match(postfix_pattern, filename): 428 | path = root + '/' + filename 429 | path = path[0:-7] 430 | if path != last_path: 431 | print('> data no.' + str(count + 1)) 432 | count += 1 433 | print('> external data path = ' + path) 434 | last_path = path 435 | external_data_set.load(path) 436 | new_obs, new_col, new_last_move, new_pi, new_z = external_data_set.get_sample(1) 437 | obs_list.extend(new_obs) 438 | col_list.extend(new_col) 439 | last_move_list.extend(new_last_move) 440 | pi_list.extend(new_pi) 441 | z_list.extend(new_z) 442 | external_data_set.clear() 443 | record = GameRecord() 444 | record.add_list(obs_list, col_list, last_move_list, pi_list, z_list) 445 | external_data_set.add_record(record) 446 | return external_data_set 447 | 448 | def train_on_external_data(self): 449 | external_data_set = self.get_external_data_set() 450 | obs, col, last_move, pi, z = external_data_set.get_sample(1) 451 | self._agent_1.train(obs, col, last_move, pi, z) 452 | self._agent_1.save_model() 453 | latest_version = self.backup_model() 454 | print('> current version: ' + str(latest_version)) 455 | 456 | def _obs(self): 457 | return self._board.board() 458 | 459 | def _current_agent(self): 460 | if self._board.current_player() == BLACK: 461 | return self._agent_1 462 | else: 463 | return self._agent_2 464 | 465 | def _check_rules(self, action): 466 | return self._rules.check_rules(self._board.board(), action, self._board.current_player()) 467 | 468 | # step 1. train on generated game record 469 | # step 2. train on self-play data generated by fast AI 470 | # step 3. if MCTS Agent is stronger than fast AI, then begin to train on self-play games 471 | # if MCTS Agent degenerated, go back to step 2 472 | 473 | def get_generated_data_set(self, sample_num=20000): 474 | gen = Generator(self._conf['board_size'], max_noise_stone_num=64) 475 | gen_data_set = DataSet() 476 | 477 | record_1 = gen.generate_live_3_oo_o_attack(sample_num=sample_num) 478 | gen_data_set.add_record(record_1) 479 | record_2 = gen.generate_live_3_oo_o_defend(sample_num=sample_num) 480 | gen_data_set.add_record(record_2) 481 | record_3 = gen.generate_live_3_ooo_attack(sample_num=sample_num) 482 | gen_data_set.add_record(record_3) 483 | record_4 = gen.generate_live_3_ooo_defend(sample_num=sample_num) 484 | gen_data_set.add_record(record_4) 485 | record_5 = gen.generate_live_4_attack(sample_num=sample_num) 486 | gen_data_set.add_record(record_5) 487 | record_6 = gen.generate_live_4_defend(sample_num=sample_num) 488 | gen_data_set.add_record(record_6) 489 | record_7 = gen.generate_dead_4_oo_oo_defend(sample_num=sample_num) 490 | gen_data_set.add_record(record_7) 491 | record_8 = gen.generate_dead_4_ooo_o_defend(sample_num=sample_num) 492 | gen_data_set.add_record(record_8) 493 | 494 | gen_data_set.save(self._conf['generated_data_path']) 495 | return gen_data_set 496 | 497 | def train_on_generated_data(self): 498 | gen_data_set = DataSet() 499 | # gen_data_set.load(self._conf['generated_data_path']) 500 | gen = Generator(self._conf['board_size'], max_noise_stone_num=128) 501 | record_2 = gen.generate_live_3_oo_o_defend(sample_num=10000) 502 | record_4 = gen.generate_live_3_ooo_defend(sample_num=10000) 503 | gen_data_set.add_record(record_4) 504 | gen_data_set.add_record(record_2) 505 | obs, col, last_move, pi, z = gen_data_set.get_sample(0.1, shuffle=True) 506 | self._agent_1.train(obs, col, last_move, pi, z) 507 | self._agent_1.save_model() 508 | 509 | def compare(self, game_num=20): 510 | agent_1_win_num, agent_2_win_num = 0, 0 511 | 512 | for i in range(int(game_num / 2)): 513 | result = self.run(use_stochastic_policy=False) 514 | if result == BLACK: 515 | agent_1_win_num += 1 516 | if result == WHITE: 517 | agent_2_win_num += 1 518 | 519 | self._agent_1, self._agent_2 = self._agent_2, self._agent_1 520 | 521 | for i in range(int(game_num / 2)): 522 | result = self.run(use_stochastic_policy=False) 523 | if result == WHITE: 524 | agent_2_win_num += 1 525 | if result == BLACK: 526 | agent_1_win_num += 1 527 | 528 | result = "> ver." + str(self._agent_1_ver) + " : ver." + str(self._agent_2_ver) + ' = ' + str( 529 | agent_1_win_num) + " : " + str(agent_2_win_num) 530 | print(result) 531 | send_email_report(to_addr="kennyxz@live.cn", content=result) 532 | 533 | def _get_model_version(self): 534 | root = 'AlphaGomoku/network/model/' 535 | postfix_pattern = r'model\_b\_15\_ver\_[0-9]+\.h5' 536 | max_num = 0 537 | for filename in os.listdir(root): 538 | if re.match(postfix_pattern, filename): 539 | num = int(filename[15:-3]) 540 | if num > max_num: 541 | max_num = num 542 | return max_num 543 | 544 | def backup_model(self): 545 | root = 'AlphaGomoku/network/model/' 546 | max_num = self._get_model_version() 547 | path_ob = root + 'model_b_15.h5' 548 | path_ow = root + 'model_w_15.h5' 549 | path_b = root + 'model_b_15_ver_' + str(max_num + 1) + '.h5' 550 | path_w = root + 'model_w_15_ver_' + str(max_num + 1) + '.h5' 551 | shutil.copy(path_ob, path_b) 552 | shutil.copy(path_ow, path_w) 553 | info = '> model ver. ' + str(max_num + 1) + ' saved.' 554 | print(info) 555 | send_email_report(to_addr="kennyxz@live.cn", content=info) 556 | return max_num + 1 557 | 558 | def self_play_and_train(self): 559 | while True: 560 | self.collect_self_play_data() 561 | self.train_on_external_data() 562 | 563 | def temp(self): 564 | mcts_win_num, fast_win_num = 0, 0 565 | for i in range(50): 566 | result = self.run(use_stochastic_policy=False, record=None) 567 | if result is BLACK: 568 | mcts_win_num += 1 569 | if result is WHITE: 570 | fast_win_num += 1 571 | print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num) 572 | self._agent_1 = FastAgent(color=BLACK) 573 | self._agent_2 = MCTSAgent(self._conf, color=WHITE, use_stochastic_policy=False) 574 | for i in range(50): 575 | result = self.run(use_stochastic_policy=False, record=None) 576 | if result is BLACK: 577 | fast_win_num += 1 578 | if result is WHITE: 579 | mcts_win_num += 1 580 | print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num) 581 | print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num) 582 | -------------------------------------------------------------------------------- /AlphaGomoku/network/__init__.py: -------------------------------------------------------------------------------- 1 | from .network import * 2 | -------------------------------------------------------------------------------- /AlphaGomoku/network/history/log_15_2_512.txt: -------------------------------------------------------------------------------- 1 | {'activation_9_loss': [3.551663814544678, 2.9409815425872803], 'loss': [4.1677450180053714, 3.250189147949219], 'activation_12_loss': [0.5576915459632874, 0.2508242372274399]}{'activation_21_loss': [3.0998087692260743, 1.9231944646835326], 'activation_24_loss': [0.5244228768348694, 0.21378136962652206], 'loss': [3.681065465927124, 2.1938106298446653]} -------------------------------------------------------------------------------- /AlphaGomoku/network/history/save training history here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/history/save training history here.txt -------------------------------------------------------------------------------- /AlphaGomoku/network/model/log.txt: -------------------------------------------------------------------------------- 1 | ver 11: add decay of winning rate -------------------------------------------------------------------------------- /AlphaGomoku/network/model/model_b_15.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/model/model_b_15.h5 -------------------------------------------------------------------------------- /AlphaGomoku/network/model/model_w_15.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/model/model_w_15.h5 -------------------------------------------------------------------------------- /AlphaGomoku/network/network.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Input 2 | from keras.engine.training import Model 3 | from keras.layers import add 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.core import Activation, Dense, Flatten 6 | from keras.layers.normalization import BatchNormalization 7 | from keras.regularizers import l2 8 | from keras.optimizers import SGD 9 | from ..rules import * 10 | from ..utils import * 11 | import numpy as np 12 | import os 13 | 14 | 15 | class Network: 16 | def __init__(self, conf): 17 | # All hyperparameters used in the model 18 | self._board_size = conf['board_size'] # the size of the playing board 19 | self._lr = conf['learning_rate'] # learning rate of SGD (2e-3) 20 | self._momentum = conf['momentum'] # nesterov momentum (1e-1) 21 | self._l2_coef = conf['l2'] # coefficient of L2 penalty (1e-4) 22 | self._mini_batch_size = conf['mini_batch_size'] # the size of batch when training the network 23 | self._fit_epochs = conf['fit_epochs'] # the number of iteration 24 | 25 | # Define Network 26 | self._build_network() 27 | 28 | # the following three lines are for a special bug, see also: https://www.jianshu.com/p/c84ae0527a3f 29 | temp_board = np.array([[0 for _ in range(conf['board_size'])] for _ in range(conf['board_size'])]) 30 | temp_board[0][0] = BLACK 31 | print('> testing network ...') 32 | self.predict(board=temp_board, color=WHITE, last_move=(0, 0)) 33 | print('> test finished.') 34 | 35 | # The location of the file which stores the parameters of the network 36 | self._net_para_file = conf['net_para_file'] 37 | self._fit_history_file = conf['fit_history_file'] 38 | 39 | # Whether we use previous model or not 40 | self._use_previous_model = conf['use_previous_model'] 41 | if self._use_previous_model: 42 | if os.path.exists(self._net_para_file): 43 | self._model.load_weights(self._net_para_file) 44 | else: 45 | print('> error: [use_previous_model] = True, ' + self._net_para_file + ' not found') 46 | 47 | @log 48 | def _build_network(self): 49 | # Input_Layer 50 | init_x = Input((3, self._board_size, self._board_size)) # the input is a tensor with the shape 3*(15*15) 51 | x = init_x 52 | 53 | # First Convolutional Layer with 32 filters 54 | x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', 55 | data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) 56 | x = BatchNormalization()(x) 57 | x = Activation('relu')(x) 58 | 59 | # Three Residual Blocks 60 | for _ in range(3): 61 | x = self._residual_block(x) 62 | 63 | # Policy Head for generating prior probability vector for each action 64 | policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same', 65 | data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) 66 | policy = BatchNormalization()(policy) 67 | policy = Activation('relu')(policy) 68 | policy = Flatten()(policy) 69 | policy = Dense(self._board_size * self._board_size, kernel_regularizer=l2(self._l2_coef))(policy) 70 | self._policy = Activation('softmax')(policy) 71 | 72 | # Value Head for generating value of each action 73 | value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same', 74 | data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) 75 | value = BatchNormalization()(value) 76 | value = Activation('relu')(value) 77 | value = Flatten()(value) 78 | value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value) 79 | value = Activation('relu')(value) 80 | value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value) 81 | self._value = Activation('tanh')(value) 82 | 83 | # Define Network 84 | self._model = Model(inputs=init_x, outputs=[self._policy, self._value]) 85 | 86 | # Define the Loss Function 87 | opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True) # stochastic gradient descend with momentum 88 | losses_type = ['categorical_crossentropy', 'mean_squared_error'] # cross-entrophy and MSE are weighted equally 89 | self._model.compile(optimizer=opt, loss=losses_type) 90 | 91 | def _residual_block(self, x): 92 | x_shortcut = x 93 | x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', 94 | data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) 95 | x = BatchNormalization()(x) 96 | x = Activation('relu')(x) 97 | x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', 98 | data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) 99 | x = BatchNormalization()(x) 100 | x = add([x, x_shortcut]) # Skip Connection 101 | x = Activation('relu')(x) 102 | return x 103 | 104 | def predict(self, board, color, last_move): 105 | if sum(sum(board)) == 0 and color == WHITE: 106 | print('error: network.predict') 107 | if sum(sum(board)) == 1 and color == BLACK: 108 | print('error: network.predict') 109 | tensor = board2tensor(board, color, last_move) 110 | policy, value_tensor = self._model.predict_on_batch(tensor) 111 | value = value_tensor[0][0] 112 | return policy, value 113 | 114 | def train(self, board_list, color_list, last_move_list, pi_list, z_list): 115 | size = len(color_list) 116 | for i in range(size): 117 | if sum(sum(board_list[i])) == 0 and color_list[i] == WHITE: 118 | print('error: network.train') 119 | print(board_list[i]) 120 | print(color_list[i]) 121 | if sum(sum(board_list[i])) == 1 and color_list[i] == BLACK: 122 | print('error: network.train') 123 | print(board_list[i]) 124 | print(color_list[i]) 125 | 126 | # Data Augmentation through symmetric and self-rotation transformation 127 | board_aug = [] 128 | color_aug = [] 129 | last_move_aug = [] 130 | pi_aug = [] 131 | z_aug = [] 132 | for i in range(len(board_list)): 133 | new_board, new_color, new_last_move, new_pi, new_z = \ 134 | data_augmentation(board_list[i], color_list[i], last_move_list[i], pi_list[i], z_list[i]) 135 | board_aug.extend(new_board) 136 | color_aug.extend(new_color) 137 | last_move_aug.extend(new_last_move) 138 | pi_aug.extend(new_pi) 139 | z_aug.extend(new_z) 140 | board_list.extend(board_aug) 141 | color_list.extend(color_aug) 142 | last_move_list.extend(last_move_aug) 143 | pi_list.extend(pi_aug) 144 | z_list.extend(z_aug) 145 | 146 | # Regularize Data 147 | board_list = np.array([board2tensor(board_list[i], color_list[i], last_move_list[i], reshape_flag=False) 148 | for i in range(len(board_list))]) 149 | pi_list = np.array(pi_list) 150 | z_list = np.array(z_list) 151 | 152 | # Training 153 | hist = self._model.fit(board_list, [pi_list, z_list], epochs=self._fit_epochs, batch_size=self._mini_batch_size, 154 | verbose=1) 155 | hist_path = self._fit_history_file + '_' + str(self._fit_epochs) + '_' + str(self._mini_batch_size) + '.txt' 156 | with open(hist_path, 'a') as f: 157 | f.write(str(hist.history)) 158 | return hist.history['loss'][0] # only sample loss of first epoch 159 | 160 | def get_para(self): 161 | net_para = self._model.get_weights() 162 | return net_para 163 | 164 | def save_model(self): 165 | """ save model para to file """ 166 | self._model.save_weights(self._net_para_file) 167 | 168 | def load_model(self): 169 | if os.path.exists(self._net_para_file): 170 | self._model.load_weights(self._net_para_file) 171 | else: 172 | print('> error: ' + self._net_para_file + ' not found') 173 | 174 | 175 | # Transform a board(matrix) to a tensor 176 | def board2tensor(board, color, last_move, reshape_flag=True): 177 | # Current-Stone Layer 178 | cur = np.array(np.array(board) == color, dtype=np.int) 179 | 180 | # Enemy-Stone Layer 181 | e = np.array(np.array(board) == -color, dtype=np.int) 182 | 183 | # Last Step Layer 184 | l = np.zeros((board.shape[0], board.shape[1])) 185 | if last_move is not None: 186 | l[last_move[0]][last_move[1]] = 1 187 | 188 | # Color Layer 189 | # flag = (1 if color == BLACK else 0) 190 | # c = flag * np.ones((board.shape[0], board.shape[1])) 191 | 192 | # Stack cur,e,c into tensor 193 | tensor = np.array([cur, e, l]) 194 | if reshape_flag: 195 | tensor = tensor.reshape(1, tensor.shape[0], tensor.shape[1], tensor.shape[2]) 196 | return tensor 197 | 198 | 199 | # Augment the training data pool through plane transformation 200 | def data_augmentation(board, color, last_move, pi, z): 201 | new_board = [] 202 | new_color = [color] * 7 203 | new_last_move = [] 204 | new_pi = [] 205 | new_z = [z] * 7 206 | for type in range(1, 8): 207 | board_t = board_transform(board, type, flag=1) 208 | last_move_t = coordinate_transform(last_move, type, board.shape[0], flag=1) 209 | pi_t = input_encode(pi, type, board.shape[0]) 210 | new_board.append(board_t) 211 | new_last_move.append(last_move_t) 212 | new_pi.append(pi_t) 213 | return new_board, new_color, new_last_move, new_pi, new_z 214 | 215 | 216 | # Transform the input vector given transformation type 217 | def input_encode(vec, num, size): 218 | mat = np.reshape(vec, (size, size)) # reshape vector into matrix 219 | mat = board_transform(mat, num, flag=1) 220 | vec = np.reshape(mat, (1, size ** 2)) 221 | return vec[0] 222 | 223 | 224 | # Transform the output vector to its initial shape given the transformation type 225 | def output_decode(vec, num, size): 226 | mat = np.reshape(vec, (size, size)) # reshape vector into matrix 227 | inv_mat = board_transform(mat, num, flag=2) 228 | vec = np.reshape(inv_mat, (1, size ** 2)) 229 | return vec[0] 230 | 231 | 232 | def coordinate_transform(move, type, size, flag): 233 | if move is None: 234 | return None 235 | board = np.zeros((size, size)) 236 | board[move[0]][move[1]] = 1 237 | board_t = board_transform(board, type, flag) 238 | temp = np.where(board_t == 1) 239 | new_move = (temp[0][0], temp[1][0]) 240 | return new_move 241 | 242 | 243 | # Transform the input board by simple plane transformation 244 | def board_transform(mat, num, flag=0): 245 | def R0(mat): 246 | return mat 247 | 248 | def R1(mat): 249 | mat = np.rot90(mat, 1) 250 | return mat 251 | 252 | def R2(mat): 253 | mat = np.rot90(mat, 2) 254 | return mat 255 | 256 | def R3(mat): 257 | mat = np.rot90(mat, 3) 258 | return mat 259 | 260 | def S(mat): 261 | mat = R0(np.fliplr(mat)) 262 | return mat 263 | 264 | def SR1(mat): 265 | mat = R1(np.fliplr(mat)) 266 | return mat 267 | 268 | def SR2(mat): 269 | mat = R2(np.fliplr(mat)) 270 | return mat 271 | 272 | def SR3(mat): 273 | mat = R3(np.fliplr(mat)) 274 | return mat 275 | 276 | # Random Transformation 277 | if flag == 0: 278 | num = int(np.random.randint(8, size=1)) 279 | total_type = ['R0', 'R1', 'R2', 'R3', 'S', 'SR1', 'SR2', 'SR3'] 280 | real_type = total_type[num] 281 | return eval(real_type)(mat), num 282 | 283 | # Encode 284 | elif flag == 1: # encode 285 | total_type = ['R0', 'R1', 'R2', 'R3', 'S', 'SR1', 'SR2', 'SR3'] 286 | real_type = total_type[num] 287 | return eval(real_type)(mat) 288 | 289 | # Decode 290 | else: 291 | inv_total_type = ['R0', 'R3', 'R2', 'R1', 'S', 'SR1', 'SR2', 'SR3'] 292 | real_type = inv_total_type[num] 293 | return eval(real_type)(mat) 294 | -------------------------------------------------------------------------------- /AlphaGomoku/rules.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | BLACK = 1 4 | WHITE = -1 5 | 6 | 7 | class Rules: 8 | 9 | def __init__(self, conf): 10 | self._board_size = conf['board_size'] 11 | self._board = [[0 for j in range(conf['board_size'])] for i in range(conf['board_size'])] 12 | self._conf = conf 13 | 14 | # The list that records the locations of all live3 and live4 15 | self._live3_list = [] 16 | self._live4_list = [] 17 | 18 | self._stone_number = 0 19 | 20 | def _read(self, board): 21 | self._board = board 22 | 23 | def board(self): 24 | return self._board 25 | 26 | def _count_on_direction(self, i, j, i_direction, j_direction, color): 27 | # Note: idirection has three options , i.e. -1,0,and 1 ,where -1 for up 28 | # 1 for down and 0 for unchange. jdirection also has three options -1,0,1 29 | # ,where -1 for left, 1 for right and 0 for unchange. We should mention that 30 | # idirection and jdirection can't equal to 0 at the same time. 31 | # We count the number of consecutive stones with the color given to function 32 | # in the given direction. 33 | count = 0 34 | for step in range(1,5): # We only needs to consider stones within 4 steps 35 | if i + step * i_direction < 0 or i + step * i_direction > self._board_size - 1 or j + step * j_direction < 0 or j + step * j_direction > self._board_size - 1: 36 | break 37 | if self._board[i + step * i_direction][j + step * j_direction] == color: 38 | count += 1 39 | else: 40 | break 41 | return count 42 | 43 | # find the maximal number of consecutive stones 44 | def _count_consecutive(self, i, j, color): 45 | # The purpose of the function is to check if the consecutive number reaches 5 so that 46 | # the Renju can end or if it is more than 5 so that the forbidden overline is formed(Black can't overline) 47 | lines = [[[0, 1], [0, -1]], [[1, -1], [-1, 1]], [[1, 0], [-1, 0]], [[-1, -1], [1, 1]]] 48 | counts = [] 49 | for line in lines: 50 | count = 1 51 | for direction in line: 52 | count += self._count_on_direction(i, j, direction[0], direction[1], color) 53 | counts.append(count) 54 | return 5 if 5 in counts else max(counts) 55 | # Note: the reason why we return this is that once 5 is found, then overline is ignored 56 | 57 | def _update_live3_list(self): 58 | M = -10 59 | N = -20 60 | L = -50 61 | feature_A = [M, 1, 1, 1, N] 62 | feature_B = [M, 1, 1, N, 1, L] 63 | feature_C = [M, 1, N, 1, 1, L] 64 | 65 | self._live3_list.clear() 66 | 67 | # A,horizontal and vertical 68 | for i in range(15): 69 | for j in range(11): 70 | u = self._board[i][j : j + 5] 71 | v = [self._board[k][i] for k in range(j, j + 5)] 72 | flag_H = self._dot(u, feature_A) 73 | flag_V = self._dot(v, feature_A) 74 | if flag_H == 3: 75 | pos = [[i, k] for k in range(j + 1, j + 4)] 76 | self._live3_list.append(pos) 77 | if flag_V == 3: 78 | pos = [[k, i] for k in range(j + 1, j + 4)] 79 | self._live3_list.append(pos) 80 | 81 | # A, diagonal 82 | for i in range(11): 83 | for j in range(11): 84 | u = [self._board[i + k][j + k] for k in range(5)] 85 | v = [self._board[i + k][14 - j - k] for k in range(5)] 86 | flag_L = self._dot(u, feature_A) 87 | flag_R = self._dot(v, feature_A) 88 | if flag_L == 3: 89 | pos = [[i + k, j + k] for k in range(1, 4)] 90 | self._live3_list.append(pos) 91 | if flag_R == 3: 92 | pos = [[i + k, 14 - j - k] for k in range(1, 4)] 93 | self._live3_list.append(pos) 94 | 95 | # B, horizontal and vertical 96 | for i in range(15): 97 | for j in range(10): 98 | u = self._board[i][j: j + 6] 99 | v = [self._board[k][i] for k in range(j, j + 6)] 100 | flag_H = self._dot(u, feature_B) 101 | flag_V = self._dot(v, feature_B) 102 | if flag_H == 3: 103 | pos = [[i, j + 1], [i, j + 2], [i, j + 4]] 104 | self._live3_list.append(pos) 105 | if flag_V == 3: 106 | pos = [[j + 1, i], [j + 2, i], [j + 4, i]] 107 | self._live3_list.append(pos) 108 | 109 | # B, diagonal 110 | for i in range(10): 111 | for j in range(10): 112 | u = [self._board[i + k][j + k] for k in range(6)] 113 | v = [self._board[i + k][14 - j - k] for k in range(6)] 114 | flag_L = self._dot(u, feature_B) 115 | flag_R = self._dot(v, feature_B) 116 | if flag_L == 3: 117 | pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 4, j + 4]] 118 | self._live3_list.append(pos) 119 | if flag_R == 3: 120 | pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 4, 10 - j]] 121 | self._live3_list.append(pos) 122 | 123 | # C, horizontal and vertical 124 | for i in range(15): 125 | for j in range(10): 126 | u = self._board[i][j: j + 6] 127 | v = [self._board[k][i] for k in range(j, j + 6)] 128 | flag_H = self._dot(u, feature_C) 129 | flag_V = self._dot(v, feature_C) 130 | if flag_H == 3: 131 | pos = [[i, j + 1], [i, j + 3], [i, j + 4]] 132 | self._live3_list.append(pos) 133 | if flag_V == 3: 134 | pos = [[j + 1, i], [j + 3, i], [j + 4, i]] 135 | self._live3_list.append(pos) 136 | 137 | # C, diagonal 138 | for i in range(10): 139 | for j in range(10): 140 | u = [self._board[i + k][j + k] for k in range(6)] 141 | v = [self._board[i + k][14 - j - k] for k in range(6)] 142 | flag_L = self._dot(u, feature_C) 143 | flag_R = self._dot(v, feature_C) 144 | if flag_L == 3: 145 | pos = [[i + 1, j + 1], [i + 3, j + 3], [i + 4, j + 4]] 146 | self._live3_list.append(pos) 147 | if flag_R == 3: 148 | pos = [[i + 1, 13 - j], [i + 3, 11 - j], [i + 4, 10 - j]] 149 | self._live3_list.append(pos) 150 | 151 | def _update_live4_list(self): 152 | M = -100 153 | feature_A = [M, 1, 1, 1, 1, M] # 4 or 104 154 | feature_B = [0, 1, 1, M, 1, 1, 0] # 4 only 155 | feature_C = [0, 1, M, 1, 1, 1, 0] # 4 only 156 | feature_D = [0, 1, 1, 1, M, 1, 0] # 4 only 157 | feature_E = [1, 1, 1, 1, M] # 4 only 158 | feature_F = [M, 1, 1, 1, 1] # 4 only 159 | feature_G = [1, 1, 1, M, 1] # 4 only 160 | feature_H = [1, M, 1, 1, 1] # 4 only 161 | feature_I = [1, 1, M, 1, 1] # 4 only 162 | 163 | self._live4_list.clear() 164 | 165 | # A,horizontal and vertical, feature_A = [M, 1, 1, 1, 1, M] 166 | for i in range(15): 167 | for j in range(10): 168 | u = self._board[i][j: j + 6] 169 | v = [self._board[k][i] for k in range(j, j + 6)] 170 | flag_H = self._dot(u, feature_A) 171 | flag_V = self._dot(v, feature_A) 172 | if flag_H == 4 or flag_H == 104: 173 | pos = [[i, k] for k in range(j + 1, j + 5)] 174 | self._live4_list.append(pos) 175 | if flag_V == 4 or flag_V == 104: 176 | pos = [[k, i] for k in range(j + 1, j + 5)] 177 | self._live4_list.append(pos) 178 | 179 | # A, diagonal 180 | for i in range(10): 181 | for j in range(10): 182 | u = [self._board[i + k][j + k] for k in range(6)] 183 | v = [self._board[i + k][14 - j - k] for k in range(6)] 184 | flag_L = self._dot(u, feature_A) 185 | flag_R = self._dot(v, feature_A) 186 | if flag_L == 4 or flag_L == 104: 187 | pos = [[i + k, j + k] for k in range(1, 5)] 188 | self._live4_list.append(pos) 189 | if flag_R == 4 or flag_R == 104: 190 | pos = [[i + k, 14 - j - k] for k in range(1, 5)] 191 | self._live4_list.append(pos) 192 | 193 | # B, horizontal and vertical, feature_B = [0, 1, 1, M, 1, 1, 0] 194 | for i in range(15): 195 | for j in range(9): 196 | u = self._board[i][j: j + 7] 197 | v = [self._board[k][i] for k in range(j, j + 7)] 198 | flag_H = self._dot(u, feature_B) 199 | flag_V = self._dot(v, feature_B) 200 | if flag_H == 4: 201 | pos = [[i, j + 1], [i, j + 2], [i, j + 4], [i, j + 5]] 202 | self._live4_list.append(pos) 203 | if flag_V == 4: 204 | pos = [[j + 1, i], [j + 2, i], [j + 4, i], [j + 5, i]] 205 | self._live4_list.append(pos) 206 | 207 | # B, diagonal 208 | for i in range(9): 209 | for j in range(9): 210 | u = [self._board[i + k][j + k] for k in range(7)] 211 | v = [self._board[i + k][14 - j - k] for k in range(7)] 212 | flag_L = self._dot(u, feature_B) 213 | flag_R = self._dot(v, feature_B) 214 | if flag_L == 4: 215 | pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 4, j + 4], [i + 5, j + 5]] 216 | self._live4_list.append(pos) 217 | if flag_R == 4: 218 | pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 4, 10 - j], [i + 5, 9 - j]] 219 | self._live4_list.append(pos) 220 | 221 | # C, horizontal and vertical, feature_C = [0, 1, M, 1, 1, 1, 0] 222 | for i in range(15): 223 | for j in range(9): 224 | u = self._board[i][j: j + 7] 225 | v = [self._board[k][i] for k in range(j, j + 7)] 226 | flag_H = self._dot(u, feature_C) 227 | flag_V = self._dot(v, feature_C) 228 | if flag_H == 4: 229 | pos = [[i, j + 1], [i, j + 3], [i, j + 4], [i, j + 5]] 230 | self._live4_list.append(pos) 231 | if flag_V == 4: 232 | pos = [[j + 1, i], [j + 3, i], [j + 4, i], [j + 5, i]] 233 | self._live4_list.append(pos) 234 | 235 | # C, diagonal 236 | for i in range(9): 237 | for j in range(9): 238 | u = [self._board[i + k][j + k] for k in range(7)] 239 | v = [self._board[i + k][14 - j - k] for k in range(7)] 240 | flag_L = self._dot(u, feature_C) 241 | flag_R = self._dot(v, feature_C) 242 | if flag_L == 4: 243 | pos = [[i + 1, j + 1], [i + 3, j + 3], [i + 4, j + 4], [i + 5, j + 5]] 244 | self._live4_list.append(pos) 245 | if flag_R == 4: 246 | pos = [[i + 1, 13 - j], [i + 3, 11 - j], [i + 4, 10 - j], [i + 5, 9 - j]] 247 | self._live4_list.append(pos) 248 | 249 | # D, horizontal and vertical, feature_D = [0, 1, 1, 1, M, 1, 0] 250 | for i in range(15): 251 | for j in range(9): 252 | u = self._board[i][j: j + 7] 253 | v = [self._board[k][i] for k in range(j, j + 7)] 254 | flag_H = self._dot(u, feature_D) 255 | flag_V = self._dot(v, feature_D) 256 | if flag_H == 4: 257 | pos = [[i, j + 1], [i, j + 2], [i, j + 3], [i, j + 5]] 258 | self._live4_list.append(pos) 259 | if flag_V == 4: 260 | pos = [[j + 1, i], [j + 2, i], [j + 3, i], [j + 5, i]] 261 | self._live4_list.append(pos) 262 | 263 | # D, diagonal 264 | for i in range(9): 265 | for j in range(9): 266 | u = [self._board[i + k][j + k] for k in range(7)] 267 | v = [self._board[i + k][14 - j - k] for k in range(7)] 268 | flag_L = self._dot(u, feature_D) 269 | flag_R = self._dot(v, feature_D) 270 | if flag_L == 4: 271 | pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 3, j + 3], [i + 5, j + 5]] 272 | self._live4_list.append(pos) 273 | if flag_R == 4: 274 | pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 3, 11 - j], [i + 5, 9 - j]] 275 | self._live4_list.append(pos) 276 | 277 | # E feature_E = [1, 1, 1, 1, M] 278 | # horizontal and vertical, boundry 279 | for i in range(15): 280 | u1 = self._board[i][0:5] 281 | u2 = [self._board[j][i] for j in range(5)] 282 | if self._dot(u1, feature_E) == 4: 283 | pos = [[i, k] for k in range(4)] 284 | self._live4_list.append(pos) 285 | if self._dot(u2, feature_E) == 4: 286 | pos = [[k, i] for k in range(4)] 287 | self._live4_list.append(pos) 288 | 289 | # diagonal, from row 1 to row 5. Noted that there is no need for the row 11 to row 15 since 290 | # it can be detected by feature (diagonal) except two special position. Similarly, for 291 | # feature F there is no need for row 1 to row 5 except two special position. 292 | # diagonal (up border) 293 | for i in range(11): 294 | u3 = [self._board[k][i + k] for k in range(5)] 295 | u4 = [self._board[k][i - k + 4] for k in range(5)] 296 | if self._dot(u3, feature_E) == 4: 297 | pos = [[k, i + k] for k in range(4)] 298 | self._live4_list.append(pos) 299 | if self._dot(u4, feature_E) == 4: 300 | pos = [[k, i - k + 4] for k in range(4)] 301 | self._live4_list.append(pos) 302 | 303 | # diagonal (left and right border) 304 | for i in range(1, 11): 305 | u5 = [self._board[i + k][k] for k in range(5)] 306 | u6 = [self._board[i + k][14 - k] for k in range(5)] 307 | if self._dot(u5, feature_E) == 4: 308 | pos = [[i + k, k] for k in range(4)] 309 | self._live4_list.append(pos) 310 | if self._dot(u6, feature_E) == 4: 311 | pos = [[i + k, 14 - k] for k in range(4)] 312 | self._live4_list.append(pos) 313 | 314 | # F feature_F = [M, 1, 1, 1, 1], similar to feature_E 315 | # horizontal and vertical, right and bottom border 316 | for i in range(15): 317 | u1 = self._board[i][10:15] 318 | u2 = [self._board[j][i] for j in range(10, 15)] 319 | if self._dot(u1, feature_F) == 4: 320 | pos = [[i, k] for k in range(11, 15)] 321 | self._live4_list.append(pos) 322 | if self._dot(u2, feature_F) == 4: 323 | pos = [[k, i] for k in range(11, 15)] 324 | self._live4_list.append(pos) 325 | 326 | # diagonal, bottom border 327 | for i in range(11): 328 | u3 = [self._board[k + 10][i + k] for k in range(5)] 329 | u4 = [self._board[k + 10][i - k + 4] for k in range(5)] 330 | if self._dot(u3, feature_F) == 4: 331 | pos = [[k + 10, i + k] for k in range(1, 5)] 332 | self._live4_list.append(pos) 333 | if self._dot(u4, feature_F) == 4: 334 | pos = [[k + 10, i - k + 4] for k in range(1, 5)] 335 | self._live4_list.append(pos) 336 | 337 | # diagonal, left and right border 338 | for i in range(1, 11): 339 | u5 = [self._board[i + k][10 + k] for k in range(5)] 340 | u6 = [self._board[i + k][4 - k] for k in range(5)] 341 | if self._dot(u5, feature_F) == 4: 342 | pos = [[i + k, 10 + k] for k in range(1, 5)] 343 | self._live4_list.append(pos) 344 | if self._dot(u6, feature_F) == 4: 345 | pos = [[i + k, 4 - k] for k in range(1, 5)] 346 | self._live4_list.append(pos) 347 | 348 | # G feature_G = [1, 1, 1, M, 1] 349 | # horizontal and vertical 350 | for i in range(15): 351 | u1 = self._board[i][0:5] 352 | u2 = self._board[i][10:15] 353 | u3 = [self._board[k][i] for k in range(5)] 354 | u4 = [self._board[k][i] for k in range(10, 15)] 355 | if self._dot(u1, feature_G) == 4: 356 | pos = [[i, 0], [i, 1], [i, 2], [i, 4]] 357 | self._live4_list.append(pos) 358 | if self._dot(u2, feature_G) == 4: 359 | pos = [[i, 10], [i, 11], [i, 12], [i, 14]] 360 | self._live4_list.append(pos) 361 | if self._dot(u3, feature_G) == 4: 362 | pos = [[0, i], [1, i], [2, i], [4, i]] 363 | self._live4_list.append(pos) 364 | if self._dot(u4, feature_G) == 4: 365 | pos = [[10, i], [11, i], [12, i], [14, i]] 366 | self._live4_list.append(pos) 367 | 368 | # diagonal. repetition exists so it's better to pick them out: u13-u16 369 | for i in range(10): 370 | u5 = [self._board[i + k + 1][k] for k in range(5)] 371 | u6 = [self._board[k][i + k + 1] for k in range(5)] 372 | u7 = [self._board[i + k][k + 10] for k in range(5)] 373 | u8 = [self._board[k + 10][i + k] for k in range(5)] 374 | u9 = [self._board[k][i + 4 - k] for k in range(5)] 375 | u10 = [self._board[i + k + 1][14 - k] for k in range(5)] 376 | u11 = [self._board[k + 10][i + 5 - k] for k in range(5)] 377 | u12 = [self._board[i + k][4 - k] for k in range(5)] 378 | if self._dot(u5, feature_G) == 4: 379 | pos = [[i + 1, 0],[i + 2, 1], [i + 3, 2], [i + 5, 4]] 380 | self._live4_list.append(pos) 381 | if self._dot(u6, feature_G) == 4: 382 | pos = [[0, i + 1],[1, i + 2], [2, i + 3], [4, i + 5]] 383 | self._live4_list.append(pos) 384 | if self._dot(u7, feature_G) == 4: 385 | pos = [[i, 10], [i + 1, 11], [i + 2, 12], [i + 4, 14]] 386 | self._live4_list.append(pos) 387 | if self._dot(u8, feature_G) == 4: 388 | pos = [[10, i], [11, i + 1], [12, i + 2], [14, i + 4]] 389 | self._live4_list.append(pos) 390 | if self._dot(u9, feature_G) == 4: 391 | pos = [[0, i + 4], [1, i + 3], [2, i + 2], [4, i]] 392 | self._live4_list.append(pos) 393 | if self._dot(u10, feature_G) == 4: 394 | pos = [[i + 1, 14], [i + 2, 13], [i + 3, 12], [i + 5, 10]] 395 | self._live4_list.append(pos) 396 | if self._dot(u11, feature_G) == 4: 397 | pos = [[10, i + 5], [11, i + 4], [12, i + 3], [14, i + 1]] 398 | self._live4_list.append(pos) 399 | if self._dot(u12, feature_G) == 4: 400 | pos = [[i, 4], [i + 1, 3], [i + 2, 2], [i + 4, 0]] 401 | self._live4_list.append(pos) 402 | 403 | u13 = [self._board[k][k] for k in range(5)] 404 | u14 = [self._board[k][k] for k in range(10, 15)] 405 | u15 = [self._board[k][14 - k] for k in range(5)] 406 | u16 = [self._board[k][14 - k] for k in range(10, 15)] 407 | 408 | if self._dot(u13, feature_G) == 4: 409 | pos = [[0, 0], [1, 1], [2, 2], [4, 4]] 410 | self._live4_list.append(pos) 411 | if self._dot(u14, feature_G) == 4: 412 | pos = [[10, 10], [11, 11], [12, 12], [14, 14]] 413 | self._live4_list.append(pos) 414 | if self._dot(u15, feature_G) == 4: 415 | pos = [[0, 14], [1, 13], [2, 12], [4, 10]] 416 | self._live4_list.append(pos) 417 | if self._dot(u16, feature_G) == 4: 418 | pos = [[10, 4], [11, 3], [12, 2], [14, 0]] 419 | self._live4_list.append(pos) 420 | 421 | # H feature_H = [1, M, 1, 1, 1] 422 | for i in range(15): 423 | u1 = self._board[i][0:5] 424 | u2 = self._board[i][10:15] 425 | u3 = [self._board[k][i] for k in range(5)] 426 | u4 = [self._board[k][i] for k in range(10, 15)] 427 | if self._dot(u1, feature_H) == 4: 428 | pos = [[i, 0], [i, 2], [i, 3], [i, 4]] 429 | self._live4_list.append(pos) 430 | if self._dot(u2, feature_H) == 4: 431 | pos = [[i, 10], [i, 12], [i, 13], [i, 14]] 432 | self._live4_list.append(pos) 433 | if self._dot(u3, feature_H) == 4: 434 | pos = [[0, i], [2, i], [3, i], [4, i]] 435 | self._live4_list.append(pos) 436 | if self._dot(u4, feature_H) == 4: 437 | pos = [[10, i], [12, i], [13, i], [14, i]] 438 | self._live4_list.append(pos) 439 | 440 | for i in range(10): 441 | u5 = [self._board[i + k + 1][k] for k in range(5)] 442 | u6 = [self._board[k][i + k + 1] for k in range(5)] 443 | u7 = [self._board[i + k][k + 10] for k in range(5)] 444 | u8 = [self._board[k + 10][i + k] for k in range(5)] 445 | u9 = [self._board[k][i + 4 - k] for k in range(5)] 446 | u10 = [self._board[i + k + 1][14 - k] for k in range(5)] 447 | u11 = [self._board[k + 10][i + 5 - k] for k in range(5)] 448 | u12 = [self._board[i + k][4 - k] for k in range(5)] 449 | if self._dot(u5, feature_H) == 4: 450 | pos = [[i + 1, 0],[i + 3, 2], [i + 4, 3], [i + 5, 4]] 451 | self._live4_list.append(pos) 452 | if self._dot(u6, feature_H) == 4: 453 | pos = [[0, i + 1],[2, i + 3], [3, i + 4], [4, i + 5]] 454 | self._live4_list.append(pos) 455 | if self._dot(u7, feature_H) == 4: 456 | pos = [[i, 10], [i + 2, 12], [i + 3, 13], [i + 4, 14]] 457 | self._live4_list.append(pos) 458 | if self._dot(u8, feature_H) == 4: 459 | pos = [[10, i], [12, i + 2], [13, i + 3], [14, i + 4]] 460 | self._live4_list.append(pos) 461 | if self._dot(u9, feature_H) == 4: 462 | pos = [[0, i + 4], [2, i + 2], [3, i + 1], [4, i]] 463 | self._live4_list.append(pos) 464 | if self._dot(u10, feature_H) == 4: 465 | pos = [[i + 1, 14], [i + 3, 12], [i + 4, 11], [i + 5, 10]] 466 | self._live4_list.append(pos) 467 | if self._dot(u11, feature_H) == 4: 468 | pos = [[10, i + 5], [12, i + 3], [13, i + 2], [14, i + 1]] 469 | self._live4_list.append(pos) 470 | if self._dot(u12, feature_H) == 4: 471 | pos = [[i, 4], [i + 2, 2], [i + 3, 1], [i + 4, 0]] 472 | self._live4_list.append(pos) 473 | 474 | u13 = [self._board[k][k] for k in range(5)] 475 | u14 = [self._board[k][k] for k in range(10, 15)] 476 | u15 = [self._board[k][14 - k] for k in range(5)] 477 | u16 = [self._board[k][14 - k] for k in range(10, 15)] 478 | 479 | if self._dot(u13, feature_H) == 4: 480 | pos = [[0, 0], [2, 2], [3, 3], [4, 4]] 481 | self._live4_list.append(pos) 482 | if self._dot(u14, feature_H) == 4: 483 | pos = [[10, 10], [12, 12], [13, 13], [14, 14]] 484 | self._live4_list.append(pos) 485 | if self._dot(u15, feature_H) == 4: 486 | pos = [[0, 14], [2, 12], [3, 11], [4, 10]] 487 | self._live4_list.append(pos) 488 | if self._dot(u16, feature_H) == 4: 489 | pos = [[10, 4], [12, 2], [13, 1], [14, 0]] 490 | self._live4_list.append(pos) 491 | 492 | for i in range(15): 493 | u1 = self._board[i][0:5] 494 | u2 = self._board[i][10:15] 495 | u3 = [self._board[k][i] for k in range(5)] 496 | u4 = [self._board[k][i] for k in range(10, 15)] 497 | if self._dot(u1, feature_I) == 4: 498 | pos = [[i, 0], [i, 1], [i, 3], [i, 4]] 499 | self._live4_list.append(pos) 500 | if self._dot(u2, feature_I) == 4: 501 | pos = [[i, 10], [i, 11], [i, 13], [i, 14]] 502 | self._live4_list.append(pos) 503 | if self._dot(u3, feature_I) == 4: 504 | pos = [[0, i], [1, i], [3, i], [4, i]] 505 | self._live4_list.append(pos) 506 | if self._dot(u4, feature_I) == 4: 507 | pos = [[10, i], [11, i], [13, i], [14, i]] 508 | self._live4_list.append(pos) 509 | 510 | for i in range(10): 511 | u5 = [self._board[i + k + 1][k] for k in range(5)] 512 | u6 = [self._board[k][i + k + 1] for k in range(5)] 513 | u7 = [self._board[i + k][k + 10] for k in range(5)] 514 | u8 = [self._board[k + 10][i + k] for k in range(5)] 515 | u9 = [self._board[k][i + 4 - k] for k in range(5)] 516 | u10 = [self._board[i + k + 1][14 - k] for k in range(5)] 517 | u11 = [self._board[k + 10][i + 5 - k] for k in range(5)] 518 | u12 = [self._board[i + k][4 - k] for k in range(5)] 519 | if self._dot(u5, feature_I) == 4: 520 | pos = [[i + 1, 0],[i + 2, 1], [i + 4, 3], [i + 5, 4]] 521 | self._live4_list.append(pos) 522 | if self._dot(u6, feature_I) == 4: 523 | pos = [[0, i + 1],[1, i + 2], [3, i + 4], [4, i + 5]] 524 | self._live4_list.append(pos) 525 | if self._dot(u7, feature_I) == 4: 526 | pos = [[i, 10], [i + 1, 11], [i + 3, 13], [i + 4, 14]] 527 | self._live4_list.append(pos) 528 | if self._dot(u8, feature_I) == 4: 529 | pos = [[10, i], [11, i + 1], [13, i + 3], [14, i + 4]] 530 | self._live4_list.append(pos) 531 | if self._dot(u9, feature_I) == 4: 532 | pos = [[0, i + 4], [1, i + 3], [3, i + 1], [4, i]] 533 | self._live4_list.append(pos) 534 | if self._dot(u10, feature_I) == 4: 535 | pos = [[i + 1, 14], [i + 2, 13], [i + 4, 11], [i + 5, 10]] 536 | self._live4_list.append(pos) 537 | if self._dot(u11, feature_I) == 4: 538 | pos = [[10, i + 5], [11, i + 4], [13, i + 2], [14, i + 1]] 539 | self._live4_list.append(pos) 540 | if self._dot(u12, feature_I) == 4: 541 | pos = [[i, 4], [i + 1, 3], [i + 3, 1], [i + 4, 0]] 542 | self._live4_list.append(pos) 543 | 544 | u13 = [self._board[k][k] for k in range(5)] 545 | u14 = [self._board[k][k] for k in range(10, 15)] 546 | u15 = [self._board[k][14 - k] for k in range(5)] 547 | u16 = [self._board[k][14 - k] for k in range(10, 15)] 548 | 549 | if self._dot(u13, feature_I) == 4: 550 | pos = [[0, 0], [1, 1], [3, 3], [4, 4]] 551 | self._live4_list.append(pos) 552 | if self._dot(u14, feature_I) == 4: 553 | pos = [[10, 10], [11, 11], [13, 13], [14, 14]] 554 | self._live4_list.append(pos) 555 | if self._dot(u15, feature_I) == 4: 556 | pos = [[0, 14], [1, 13], [3, 11], [4, 10]] 557 | self._live4_list.append(pos) 558 | if self._dot(u16, feature_I) == 4: 559 | pos = [[10, 4], [11, 3], [13, 1], [14, 0]] 560 | self._live4_list.append(pos) 561 | 562 | def _check_forbidden_moves(self): 563 | if not self._conf['forbidden_moves']: 564 | return False 565 | self._update_live3_list() 566 | self._update_live4_list() 567 | count_valid_live3 = len(self._live3_list) 568 | count_valid_live4 = len(self._live4_list) 569 | for live3 in self._live3_list: 570 | for live4 in self._live4_list: 571 | if any([live3 == live4[i:i+len(live3)] for i in range(len(live4)-len(live3)+1)]): # live3 in live4! should be viewed as live4 572 | count_valid_live3 -= 1 573 | else: 574 | continue 575 | if (count_valid_live3 >= 2) or (count_valid_live4 >= 2): #Found double-three or double-four! Forbidden for Black! 576 | return True 577 | else: 578 | return False 579 | 580 | def check_rules(self, board, action, color): 581 | self._read(board) 582 | i = action[0] 583 | j = action[1] 584 | 585 | if self._board[i][j] != 0: 586 | return 'occupied' 587 | 588 | self._board[i][j] = color 589 | 590 | if color == BLACK: # Black Player 591 | # Check overline and winning pattern 592 | count = self._count_consecutive(i, j, color) # Count the maximal consecutive number 593 | if count >= 5: 594 | if count == 5: 595 | # Winning pattern for Black 596 | # print("live3 = " + str(self._live3_list)) 597 | # print("live4 = " + str(self._live4_list)) 598 | # print("C5") 599 | return 'blackwins' 600 | else: 601 | # Overline forbidden move for Black, Black loses the Game 602 | # print("live3 = " + str(self._live3_list)) 603 | # print("live4 = " + str(self._live4_list)) 604 | if self._conf['forbidden_moves']: 605 | # print("Forbidden Move: C6+") 606 | return 'whitewins' 607 | else: 608 | # print('C5') 609 | return 'blackwins' 610 | # Check double three and double four 611 | signal = self._check_forbidden_moves() 612 | if signal: 613 | # If we find the forbidden moves, then White wins 614 | # print("live3 = " + str(self._live3_list)) 615 | # print("live4 = " + str(self._live4_list)) 616 | # print("Forbidden Move: D3 D4") 617 | return 'whitewins' 618 | else: # White Player , i.e. color == -1 619 | count = self._count_consecutive(i, j, color) # Count the maximal consecutive number 620 | if count >= 5: 621 | # Winning pattern for White 622 | # print("live3 = " + str(self._live3_list)) 623 | # print("live4 = " + str(self._live4_list)) 624 | # print("C5+") 625 | return 'whitewins' 626 | 627 | # If the board is full while we still don't have the winner , then draw 628 | if sum(sum(np.array(np.array(board) == 0, dtype=int))) == 0: 629 | return 'draw' 630 | 631 | return 'continue' 632 | 633 | def _dot(self, x, y): 634 | if len(x) != len(y): 635 | return 'error' 636 | s = 0 637 | for i in range(len(x)): 638 | s += x[i] * y[i] 639 | return s -------------------------------------------------------------------------------- /AlphaGomoku/ui/__init__.py: -------------------------------------------------------------------------------- 1 | from .board import * 2 | from .renderer import * 3 | -------------------------------------------------------------------------------- /AlphaGomoku/ui/board.py: -------------------------------------------------------------------------------- 1 | import time 2 | from ..rules import * 3 | import numpy as np 4 | 5 | 6 | class Board: 7 | def __init__(self, renderer, board_size=15): 8 | self._board = [[0 for i in range(board_size)] for i in range(board_size)] 9 | self._board_size = board_size 10 | self._player = BLACK 11 | self._winner = 0 12 | self._round = 0 13 | self._last_move = None 14 | 15 | if renderer is None: 16 | self._display = False 17 | else: 18 | self._display = True 19 | self._renderer = renderer 20 | 21 | def __str__(self): 22 | print('round = ' + str(self.round())) 23 | print('last move = ' + str(self.last_move())) 24 | if self.current_player() == BLACK: 25 | return 'current_player = BLACK' 26 | else: 27 | return 'current_player = WHITE' 28 | 29 | # return the board 30 | def board(self): 31 | return np.copy(self._board) 32 | 33 | # player take an action(coordinate) 34 | def move(self, player, action, info=None): 35 | x = action[0] # row 36 | y = action[1] # col 37 | 38 | # waiting until renderer is initialized 39 | while self._display and (not self._renderer.is_initialized()): 40 | time.sleep(.2) 41 | 42 | if not isinstance(x, int) or not isinstance(y, int): 43 | print("> error: x, y should be an integer:", x, y) 44 | return 1, self.board() 45 | if x < 0 or x > self._board_size - 1 or y < 0 or y > self._board_size - 1: 46 | print("> error: x, y should be in [0, 14]", x, y) 47 | return 1, self.board() 48 | 49 | num_str = str(self.stone_num() + 1) 50 | if info is not None: 51 | info = info + '_' + num_str 52 | 53 | if player == BLACK: 54 | if self._display: 55 | self._renderer.move(player, (x, y), info) 56 | self._board[x][y] = BLACK 57 | self._player = WHITE 58 | self._round += 1 59 | else: 60 | if self._display: 61 | self._renderer.move(player, (x, y), info) 62 | self._board[x][y] = WHITE 63 | self._player = BLACK 64 | 65 | self._last_move = action 66 | 67 | def clear(self): 68 | self._board = [[0 for i in range(self._board_size)] for i in range(self._board_size)] 69 | self._player = BLACK 70 | self._winner = 0 71 | self._round = 0 72 | self._last_move = None 73 | if self._display: 74 | self._renderer.paint_background() 75 | while not self._renderer.is_initialized(): 76 | time.sleep(.1) 77 | 78 | def read(self, new_board): 79 | self.clear() 80 | black_num = 0 81 | white_num = 0 82 | 83 | for row in range(self._board_size): 84 | for col in range(self._board_size): 85 | if new_board[row][col] == BLACK: 86 | self.move(1, (row, col)) 87 | black_num += 1 88 | elif new_board[row][col] == WHITE: 89 | self.move(-1, (row, col)) 90 | white_num += 1 91 | 92 | self._round = black_num 93 | if black_num == white_num: 94 | self._player = BLACK 95 | elif black_num == white_num + 1: 96 | self._player = WHITE 97 | else: 98 | print("> error: illegal stone num") 99 | print('> black_num = ' + str(black_num)) 100 | print('> white_num = ' + str(white_num)) 101 | 102 | def round(self): 103 | return self._round 104 | 105 | def current_player(self): 106 | return self._player 107 | 108 | def last_move(self): 109 | return self._last_move 110 | 111 | def stone_num(self): 112 | if self._player == BLACK: 113 | return 2*self._round 114 | else: 115 | return 2*self._round - 1 116 | 117 | def legal_moves(self): 118 | legal_moves = [] 119 | for i in range(self._board_size): 120 | for j in range(self._board_size): 121 | if self._board[i][j] == 0: 122 | legal_moves.append((i, j)) 123 | return legal_moves 124 | 125 | def show_scores(self, action_list, score_list): 126 | if self._renderer is not None: 127 | self._renderer.show_score(self.board(), action_list, score_list) 128 | -------------------------------------------------------------------------------- /AlphaGomoku/ui/image/black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/black.png -------------------------------------------------------------------------------- /AlphaGomoku/ui/image/desk.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/desk.jpg -------------------------------------------------------------------------------- /AlphaGomoku/ui/image/white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/white.png -------------------------------------------------------------------------------- /AlphaGomoku/ui/renderer.py: -------------------------------------------------------------------------------- 1 | from sys import exit 2 | from ..rules import * 3 | import time 4 | import threading 5 | 6 | display_mode = True 7 | use_dialog = True 8 | try: 9 | import easygui 10 | except ImportError: 11 | print('> error: module [easygui] not found') 12 | use_dialog = False 13 | try: 14 | import pygame 15 | except ImportError: 16 | print('> error: module [pygame] not found') 17 | display_mode = False 18 | 19 | image_path = 'AlphaGomoku/ui/image/' 20 | 21 | 22 | class Renderer(threading.Thread): 23 | 24 | # Noted that some functions have both public and private versions such as 'move', 'read', 'paint_background' 25 | # private ones are for Renderer thread, which will finish the rendering while the public func play the role in 26 | # sending signals to Renderer thread. (by updating some boolean variables, since Renderer Thread is listening 27 | # these variables in an endless loop) 28 | 29 | # Since all rendering must be done in Renderer thread, we have to take an indirect way. 30 | 31 | def __init__(self, screen_size, board_size=15): 32 | super(Renderer, self).__init__() 33 | self._screen_size = screen_size 34 | self._board_size = board_size 35 | self._spacing = int(self._screen_size[1] / (board_size + 1)) 36 | self._screen = None 37 | self._background = None 38 | self._stone_black = None 39 | self._stone_white = None 40 | 41 | self._init = False 42 | 43 | self._update_move = False 44 | self._next_pos = None 45 | self._next_player = 0 46 | 47 | self._update_read = False 48 | self._new_board = None 49 | 50 | self._update_clear = False 51 | 52 | self._update_info = False 53 | self._update_score = False 54 | self._info_surface_cache = [] 55 | self._info_rect_cache = [] 56 | self._score_surface_cache = [] 57 | self._score_rect_cache = [] 58 | 59 | self._is_waiting_for_click = False 60 | self._mouse_click_pos = None 61 | 62 | self.setDaemon(True) 63 | self.start() 64 | 65 | def run(self): 66 | pygame.init() 67 | self._screen = pygame.display.set_mode(self._screen_size, 0, 32) 68 | self._background = pygame.image.load(image_path + 'desk.jpg').convert() 69 | self._stone_black = pygame.image.load(image_path + 'black.png').convert_alpha() 70 | self._stone_white = pygame.image.load(image_path + 'white.png').convert_alpha() 71 | self._stone_black = pygame.transform.smoothscale(self._stone_black, (self._spacing, self._spacing)) 72 | self._stone_white = pygame.transform.smoothscale(self._stone_white, (self._spacing, self._spacing)) 73 | self.paint_background() 74 | while True: 75 | for event in pygame.event.get(): 76 | if event.type == pygame.QUIT: 77 | print("> exit") 78 | pygame.quit() 79 | exit() 80 | if self._is_waiting_for_click and event.type == pygame.MOUSEBUTTONDOWN: 81 | mouse_position = pygame.mouse.get_pos() 82 | y = int(mouse_position[0] / self._spacing - 0.5) 83 | x = int(mouse_position[1] / self._spacing - 0.5) 84 | if x in range(self._board_size) and y in range(self._board_size): 85 | self._is_waiting_for_click = False 86 | self._mouse_click_pos = (x, y) 87 | print("> click " + str(self._mouse_click_pos)) 88 | if self._update_clear: 89 | self._paint_background() 90 | if self._update_read: 91 | self._read(self._new_board) 92 | if self._update_move: 93 | self._move(self._next_player, self._next_pos) 94 | if self._update_info: 95 | self._show_info() 96 | if self._update_score: 97 | self._show_score() 98 | 99 | def paint_background(self): 100 | self._update_clear = True 101 | self._update_move = False 102 | self._update_read = False 103 | self._init = False 104 | 105 | def _paint_background(self): 106 | self._screen.blit(self._background, (0, 0)) 107 | black_color = (0, 0, 0) 108 | 109 | for i in range(1, self._board_size + 1): 110 | start_horizontal = (self._spacing, i * self._spacing) 111 | end_horizontal = (self._screen_size[1] - self._spacing, i * self._spacing) 112 | start_vertical = (i * self._spacing, self._spacing) 113 | end_vertical = (i * self._spacing, self._screen_size[1] - self._spacing) 114 | 115 | if i == 1 or i == self._board_size + 1: 116 | pygame.draw.line(self._screen, black_color, start_horizontal, end_horizontal, 3) 117 | pygame.draw.line(self._screen, black_color, start_vertical, end_vertical, 3) 118 | else: 119 | pygame.draw.line(self._screen, black_color, start_horizontal, end_horizontal, 2) 120 | pygame.draw.line(self._screen, black_color, start_vertical, end_vertical, 2) 121 | 122 | if self._board_size % 2 == 1: 123 | mid = (self._board_size + 1) / 2 124 | start_pos = (self._spacing * int(mid) - 2, self._spacing * int(mid) - 2) 125 | size = (6, 6) 126 | pygame.draw.rect(self._screen, black_color, pygame.rect.Rect(start_pos, size)) 127 | 128 | pygame.display.update() 129 | self._update_clear = False 130 | self._init = True 131 | 132 | def move(self, player, action, info=None): 133 | while self._update_move: 134 | time.sleep(1e-4) 135 | self._next_player = player 136 | self._next_pos = action 137 | self._update_move = True 138 | if info is not None: 139 | self.show_info(info, player, action) 140 | 141 | def _move(self, player, action): 142 | position = (int((action[1] + 0.5) * self._spacing), int((action[0] + 0.5) * self._spacing)) 143 | if player == BLACK: 144 | self._screen.blit(self._stone_black, position) 145 | elif player == -1: 146 | self._screen.blit(self._stone_white, position) 147 | 148 | self._update_move = False 149 | 150 | def read(self, new_board): 151 | while self._update_read: 152 | time.sleep(1e-4) 153 | self._new_board = new_board 154 | self._update_read = True 155 | 156 | def _read(self, new_board): 157 | self._paint_background() 158 | self._update_read = False 159 | for row in range(self._board_size): 160 | for col in range(self._board_size): 161 | if new_board[row][col] == 1: 162 | self._move(1, (row, col)) 163 | elif new_board[row][col] == -1: 164 | self._move(-1, (row, col)) 165 | 166 | def ask_for_click(self): 167 | self._is_waiting_for_click = True 168 | while self._is_waiting_for_click: 169 | time.sleep(1e-4) 170 | return self._mouse_click_pos 171 | 172 | def show_score(self, board, action_list, score_list): 173 | self.read(board) 174 | time.sleep(1e-2) 175 | large_font = pygame.font.SysFont('Calibri', size=20) 176 | red = (255, 0, 0) 177 | 178 | for a_s in list(zip(action_list, score_list)): 179 | action, score = a_s[0], a_s[1] 180 | if self._board_size == 8: 181 | position = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing)) 182 | if self._board_size == 15: 183 | position = (int((action[1] + 0.80) * self._spacing), int((action[0] + 0.72) * self._spacing)) 184 | 185 | self._score_surface_cache.append(large_font.render(str(round(score, 2)), True, red)) 186 | self._score_rect_cache.append(position) 187 | 188 | self._update_score = True 189 | 190 | def _show_score(self): 191 | size = len(self._score_rect_cache) 192 | for i in range(size): 193 | self._screen.blit(self._score_surface_cache[i], self._score_rect_cache[i]) 194 | self._score_surface_cache = [] 195 | self._score_rect_cache = [] 196 | 197 | pygame.display.update() 198 | self._update_score = False 199 | 200 | def show_info(self, info, player, action): 201 | infos = info.split('_') 202 | # p = 'p = ' + infos[0] 203 | v = infos[1] 204 | num = infos[2] 205 | 206 | if self._board_size == 8: 207 | # position_1 = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing)) 208 | if float(infos[1]) >= 0: 209 | position_2 = (int((action[1] + 0.62) * self._spacing), int((action[0] + 0.78) * self._spacing)) 210 | else: 211 | position_2 = (int((action[1] + 0.61) * self._spacing), int((action[0] + 0.78) * self._spacing)) 212 | 213 | if int(num) < 10: 214 | position_3 = (int((action[1] + 0.90) * self._spacing), int((action[0] + 0.96) * self._spacing)) 215 | else: 216 | position_3 = (int((action[1] + 0.82) * self._spacing), int((action[0] + 0.96) * self._spacing)) 217 | 218 | small_font = pygame.font.SysFont('Calibri', size=16) 219 | large_font = pygame.font.SysFont('Calibri', size=32) 220 | 221 | if self._board_size == 15: 222 | # position_1 = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing)) 223 | if float(infos[1]) >= 0: 224 | position_2 = (int((action[1] + 0.72) * self._spacing), int((action[0] + 0.75) * self._spacing)) 225 | else: 226 | position_2 = (int((action[1] + 0.70) * self._spacing), int((action[0] + 0.75) * self._spacing)) 227 | 228 | if int(num) < 10: 229 | position_3 = (int((action[1] + 0.90) * self._spacing), int((action[0] + 0.96) * self._spacing)) 230 | else: 231 | position_3 = (int((action[1] + 0.82) * self._spacing), int((action[0] + 0.96) * self._spacing)) 232 | 233 | small_font = pygame.font.SysFont('Calibri', size=10) 234 | large_font = pygame.font.SysFont('Calibri', size=20) 235 | 236 | color = (255, 0, 0) 237 | if player == BLACK: 238 | color = (255, 255, 255) 239 | if player == WHITE: 240 | color = (0, 0, 0) 241 | 242 | # self._info_surface_cache.append(small_font.render(p, True, color)) 243 | # self._info_rect_cache.append(position_1) 244 | 245 | if infos[1] != '2': 246 | self._info_surface_cache.append(small_font.render(v, True, color)) 247 | self._info_rect_cache.append(position_2) 248 | 249 | self._info_surface_cache.append(large_font.render(num, True, color)) 250 | self._info_rect_cache.append(position_3) 251 | self._update_info = True 252 | 253 | def _show_info(self): 254 | size = len(self._info_rect_cache) 255 | for i in range(size): 256 | self._screen.blit(self._info_surface_cache[i], self._info_rect_cache[i]) 257 | self._info_surface_cache = [] 258 | self._info_rect_cache = [] 259 | 260 | pygame.display.update() 261 | self._update_info = False 262 | 263 | def is_initialized(self): 264 | return self._init 265 | 266 | 267 | def ask_for_draw(): 268 | if display_mode and use_dialog: 269 | return easygui.ccbox(title='Request', msg='AlphaRenju requests a draw.', choices=['draw', 'continue']) 270 | else: 271 | print('> AlphaRenju requests a draw.') 272 | return 0 273 | 274 | 275 | def show_result(mode, result): 276 | if display_mode and use_dialog and mode in [2, 2.5, 3, 9]: 277 | info = '' 278 | if result == 'blackwins': 279 | info = 'Black wins!' 280 | if result == 'whitewins': 281 | info = 'White wins!' 282 | if result == 'draw': 283 | info = 'Draw!' 284 | easygui.msgbox(title='Result', msg=info) 285 | else: 286 | print(result) 287 | -------------------------------------------------------------------------------- /AlphaGomoku/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from email.mime.text import MIMEText 4 | from email.mime.multipart import MIMEMultipart 5 | from email.header import Header 6 | import smtplib 7 | 8 | 9 | from_addr = "reposter@sina.com" 10 | password = "" 11 | 12 | 13 | def send_email_report(to_addr, content): 14 | try: 15 | msg = MIMEMultipart() 16 | msg['Subject'] = Header('Gomoku AI Report', 'utf-8') 17 | msg['From'] = Header(from_addr) 18 | msg['To'] = Header(to_addr) 19 | msg['Reply-to'] = Header(from_addr) 20 | 21 | msg.attach(MIMEText(content, 'plain', 'utf-8')) 22 | 23 | smtp_server = "smtp.sina.com" 24 | server = smtplib.SMTP(smtp_server, 25) 25 | 26 | server.set_debuglevel(1) 27 | server.starttls() 28 | 29 | server.login(from_addr, password) 30 | server.sendmail(from_addr, [to_addr], msg.as_string()) 31 | server.quit() 32 | except: 33 | pass 34 | 35 | 36 | def log(func): 37 | def wrapper(*args, **kwargs): 38 | start = time.clock() 39 | print('>> calling %s()' % func.__name__) 40 | result = func(*args, **kwargs) 41 | end = time.clock() 42 | print('>> %s() time = %s' % (func.__name__, str(round(end - start, 3)))) 43 | return result 44 | 45 | return wrapper 46 | 47 | 48 | def index2coordinate(index, size): 49 | row = index // size 50 | col = index % size 51 | return int(row), int(col) 52 | 53 | 54 | def coordinate2index(cor, size): 55 | return size * cor[0] + cor[1] 56 | 57 | 58 | def board2legalvec(board): 59 | vec = np.array(np.array(board) == 0, dtype=np.int) 60 | return vec.flatten() 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | *** 2 | # 15 by 15 AlphaGomoku 3 | 4 | Introduction 5 | ==== 6 | - This is a Gomoku AI based on curriculum learning and AlphaGo methods. 7 | 8 | 12 | 13 | 14 | Demonstration 15 | ==== 16 | Human vs AlphaGomoku (15 by 15 board) 17 | ------- 18 | AI adopts deterministic policy with 400 simulations per move. 19 |
20 |
21 |
22 |
25 |
26 |
27 |
46 |
47 |