├── .gitignore
├── AlphaGomoku
    ├── __init__.py
    ├── agent
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── ai.py
    │   ├── human.py
    │   ├── mcts.py
    │   └── node.py
    ├── config.py
    ├── dataset
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── generator.py
    │   ├── human_play_data
    │   │   ├── human_15_col.npy
    │   │   ├── human_15_last_move.npy
    │   │   ├── human_15_obs.npy
    │   │   ├── human_15_pi.npy
    │   │   ├── human_15_z.npy
    │   │   └── save human play data here.txt
    │   └── self_play_data
    │   │   └── save self play data here.txt
    ├── env.py
    ├── network
    │   ├── __init__.py
    │   ├── history
    │   │   ├── log_15_20_512.txt
    │   │   ├── log_15_2_512.txt
    │   │   ├── log_8_20_512.txt
    │   │   └── save training history here.txt
    │   ├── model
    │   │   ├── log.txt
    │   │   ├── model_b_15.h5
    │   │   └── model_w_15.h5
    │   └── network.py
    ├── rules.py
    ├── ui
    │   ├── __init__.py
    │   ├── board.py
    │   ├── image
    │   │   ├── black.png
    │   │   ├── desk.jpg
    │   │   └── white.png
    │   └── renderer.py
    └── utils.py
├── LICENSE
├── README.md
├── requirements.txt
└── run.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .idea/
 3 | __pycache__/
 4 | *.pyc
 5 | workspace.xml
 6 | venv/
 7 | gen_col.npy
 8 | gen_last_move.npy
 9 | gen_obs.npy
10 | gen_pi.npy
11 | gen_z.npy
12 | self_play_15_0_temp_col.npy
13 | self_play_15_0_temp_last_move.npy
14 | self_play_15_0_temp_obs.npy
15 | self_play_15_0_temp_pi.npy
16 | self_play_15_0_temp_z.npy
17 | utils.py


--------------------------------------------------------------------------------
/AlphaGomoku/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import *
2 | from .dataset import *
3 | from .network import *
4 | from .ui import *
5 | from .config import *
6 | from .env import *
7 | from .rules import *
8 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/__init__.py:
--------------------------------------------------------------------------------
1 | from .human import HumanAgent
2 | from .ai import *
3 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/agent.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from ..rules import *
 3 | 
 4 | 
 5 | class Agent:
 6 |     def __init__(self, color):
 7 |         if color != BLACK or color != WHITE:
 8 |             self._color = BLACK
 9 |         else:
10 |             self._color = color
11 | 
12 |     @abstractmethod
13 |     def play(self, *args, **kwargs):
14 |         pass
15 | 
16 |     @property
17 |     def color(self):
18 |         return self._color
19 | 
20 |     @color.setter
21 |     def color(self, value):
22 |         if value != BLACK and value != WHITE:
23 |             self._color = BLACK
24 |         else:
25 |             self._color = value
26 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/ai.py:
--------------------------------------------------------------------------------
  1 | from .agent import Agent
  2 | from ..network.network import *
  3 | from .mcts import *
  4 | from ..utils import *
  5 | 
  6 | MIN = -99999999
  7 | MAX = 99999999
  8 | 
  9 | score_5 = 5
 10 | score_4_live = 4.5
 11 | score_4_and_3_live = 4.3
 12 | score_4 = 4
 13 | score_double_3_live = 3.8
 14 | score_3_live = 3.5
 15 | score_3 = 3
 16 | score_double_2_live = 3
 17 | score_2_live = 2.5
 18 | score_2 = 2
 19 | 
 20 | 
 21 | class AI(Agent):
 22 |     def __init__(self, color):
 23 |         Agent.__init__(self, color)
 24 | 
 25 |     def play(self, *args, **kwargs):
 26 |         pass
 27 | 
 28 | 
 29 | class MCTSAgent(AI):
 30 |     def __init__(self, conf, color, use_stochastic_policy, specify_model_ver=-1):
 31 |         AI.__init__(self, color)
 32 |         black_model_path = 'AlphaGomoku/network/model/model_b_' + str(conf['board_size'])
 33 |         white_model_path = 'AlphaGomoku/network/model/model_w_' + str(conf['board_size'])
 34 |         if specify_model_ver != -1:
 35 |             black_model_path = black_model_path + '_ver_' + str(specify_model_ver)
 36 |             white_model_path = white_model_path + '_ver_' + str(specify_model_ver)
 37 |         black_model_path = black_model_path + '.h5'
 38 |         white_model_path = white_model_path + '.h5'
 39 | 
 40 |         conf.update(net_para_file=black_model_path)
 41 |         black_net = Network(conf)
 42 |         conf.update(net_para_file=white_model_path)
 43 |         white_net = Network(conf)
 44 | 
 45 |         self._mcts = MCTS(conf, black_net, white_net, color, use_stochastic_policy)
 46 |         self._black_net = black_net
 47 |         self._white_net = white_net
 48 |         self._board_size = conf['board_size']
 49 | 
 50 |     def play(self, obs, action, stone_num):
 51 |         act_ind, pi, prior_prob, value = self._mcts.action(obs, action, stone_num)
 52 |         act_cor = index2coordinate(act_ind, self._board_size)
 53 |         return act_cor, pi, prior_prob, value
 54 | 
 55 |     def set_self_play(self, is_self_play):
 56 |         self._mcts.set_self_play(is_self_play)
 57 | 
 58 |     def set_stochastic_policy(self, use_stochastic_policy):
 59 |         self._mcts.set_stochastic_policy(use_stochastic_policy)
 60 | 
 61 |     def reset_mcts(self):
 62 |         self._mcts.reset()
 63 | 
 64 |     @log
 65 |     def train(self, obs, color, last_move, pi, z):
 66 |         obs_b, obs_w = obs[0::2], obs[1::2]
 67 |         color_b, color_w = color[0::2], color[1::2]
 68 |         last_move_b, last_move_w = last_move[0::2], last_move[1::2]
 69 |         pi_b, pi_w = pi[0::2], pi[1::2]
 70 |         z_b, z_w = z[0::2], z[1::2]
 71 | 
 72 |         loss_b = self._black_net.train(obs_b, color_b, last_move_b, pi_b, z_b)
 73 |         loss_w = self._white_net.train(obs_w, color_w, last_move_w, pi_w, z_w)
 74 |         return loss_b, loss_w
 75 | 
 76 |     def save_model(self):
 77 |         self._black_net.save_model()
 78 |         self._white_net.save_model()
 79 |         print('> model saved')
 80 | 
 81 |     def load_model(self):
 82 |         self._black_net.load_model()
 83 |         self._white_net.load_model()
 84 | 
 85 | 
 86 | class FastAgent(AI):
 87 |     def __init__(self, color, depth=1):  # depth must be even
 88 |         AI.__init__(self, color)
 89 |         self._action_list = []
 90 |         self._score_list = []
 91 |         self._depth = depth
 92 |         self._cut_count = 0
 93 |         self._last_move_list = []
 94 |         self._atk_def_ratio = 0.1
 95 |         self._show_info = False
 96 | 
 97 |     def play(self, obs, action, stone_num, *args):
 98 |         self._action_list = []
 99 |         self._score_list = []
100 |         if action is not None:
101 |             self._last_move_list.append(action)
102 | 
103 |         size = obs.shape[0]
104 |         if sum(sum(abs(obs))) == 0:  # 若AI执黑，第一步一定下在棋盘中央位置
105 |             pi = [0 for _ in range(size * size)]
106 |             pi[int((size * size) / 2)] = 1
107 |             self._last_move_list.append((7, 7))
108 |             return (7, 7), pi, None, None
109 | 
110 |         pos_list = self.generate(obs, all=True)
111 |         if self._show_info:
112 |             print('position generated: ', pos_list)
113 |         alpha, beta = MIN, MAX
114 |         score_dict = dict()
115 |         thread_list = []
116 | 
117 |         for i, j in pos_list:
118 |             new_obs = obs.copy()
119 |             new_obs[i][j] = self.color
120 |             target = self._get_thread_target(obs=new_obs, last_move=(i, j), alpha=alpha, beta=beta,
121 |                                              depth=self._depth - 1, score_dict=score_dict)
122 |             thr = threading.Thread(target=target, name='thread ' + str((i, j)))
123 |             thread_list.append(thr)
124 |             thr.start()
125 | 
126 |         for thr in thread_list:
127 |             thr.join()
128 | 
129 |         best_action_list = get_best_action_list(score_dict)
130 |         if self._show_info:
131 |             print('best action list:', best_action_list, ' score = ', score_dict[best_action_list[0]])
132 | 
133 |         ind = np.random.choice([i for i in range(len(best_action_list))])
134 |         action = best_action_list[ind]
135 | 
136 |         pi = [0 for _ in range(size * size)]
137 |         pi[coordinate2index(action, size)] = 1
138 | 
139 |         self._last_move_list.append(action)
140 |         return action, pi, best_action_list, score_dict
141 | 
142 |     def _get_thread_target(self, obs, last_move, alpha, beta, depth, score_dict):
143 |         def _min():
144 |             _beta = beta
145 |             self._last_move_list.append(last_move)
146 |             if depth == 0:
147 |                 score_atk, score_def = self.evaluate(obs)
148 |                 self._last_move_list.pop()
149 |                 # 对于只搜一层的情况下，必须要教会AI防守活三和冲四。这里的做法是手动提高对方活三和冲四的分数
150 |                 if score_def < score_3_live:
151 |                     if score_atk > score_def:
152 |                         score = score_atk - self._atk_def_ratio * score_def
153 |                     else:
154 |                         score = -score_def + self._atk_def_ratio * score_atk
155 |                 else:
156 |                     if score_def == score_3_live:
157 |                         if score_atk >= score_4:
158 |                             score = score_atk - self._atk_def_ratio * score_def
159 |                         else:
160 |                             score = -score_4
161 |                     else:
162 |                         # 为了防止AI在对方有活四的情况下放弃治疗
163 |                         if score_def >= score_4_live:
164 |                             score = score_5 if score_atk == score_5 else -score_5
165 |                         else:
166 |                             score = score_5 if score_atk == score_5 else -score_4_live
167 |                 x, y = int(last_move[0]), int(last_move[1])
168 |                 score_dict[(x, y)] = score
169 |                 if self._show_info:
170 |                     print((x, y), 'atk=', score_atk, 'def=', score_def, 'total=', score)
171 |                 return score
172 | 
173 |             pos_list = self.generate(obs)
174 |             for i, j in pos_list:
175 |                 obs[i][j] = -self.color
176 |                 value = self._max(obs, (i, j), alpha, _beta, depth - 1)
177 |                 if value < _beta:
178 |                     _beta = value
179 |                 obs[i][j] = 0
180 |                 if alpha > _beta:
181 |                     break
182 |                     # this indicates that the parent node (belongs to max layer) will select a node with value
183 |                     # no less than alpha, however, the value of child selected in this node (belongs to min layer)
184 |                     # will no more than beta <= alpha, so there is no need to search this node
185 | 
186 |             self._last_move_list.pop()
187 |             x, y = int(last_move[0]), int(last_move[1])
188 |             score_dict[(x, y)] = _beta
189 |             self._action_list.append((x, y))
190 | 
191 |         return _min
192 | 
193 |     # if an obs is in max layer, then the agent is supposed to select the action with max score
194 |     # alpha represents the lower bound of the value of this node
195 |     def _max(self, obs, last_move, alpha, beta, depth):
196 |         self._last_move_list.append(last_move)
197 |         if depth == 0:
198 |             score_atk, score_def = self.evaluate(obs)
199 |             self._last_move_list.pop()
200 |             score = score_atk if score_atk > score_def else -score_def
201 |             return score
202 | 
203 |         pos_list = self.generate(obs)
204 | 
205 |         for i, j in pos_list:
206 |             obs[i][j] = self.color
207 |             value = self._min(obs, (i, j), alpha, beta, depth - 1)
208 |             if value > alpha:
209 |                 alpha = value
210 |             obs[i][j] = 0
211 |             if alpha > beta:
212 |                 break
213 | 
214 |         self._last_move_list.pop()
215 |         return alpha
216 | 
217 |     # if an obs is in min layer, then the agent is supposed to select the action with min scores
218 |     # beta represents the upper bound of the value of this node
219 |     def _min(self, obs, last_move, alpha, beta, depth):
220 |         self._last_move_list.append(last_move)
221 |         if depth == 0:
222 |             score_atk, score_def = self.evaluate(obs)
223 |             self._last_move_list.pop()
224 |             score = score_atk if score_atk > score_def else -score_def
225 |             return score
226 | 
227 |         pos_list = self.generate(obs)
228 | 
229 |         for i, j in pos_list:
230 |             obs[i][j] = -self.color
231 |             value = self._max(obs, (i, j), alpha, beta, depth - 1)
232 |             # print((i, j), value)
233 |             if value < beta:
234 |                 beta = value
235 |             obs[i][j] = 0
236 |             if alpha > beta:
237 |                 break
238 |                 # this indicates that the parent node (belongs to max layer) will select a node with value
239 |                 # no less than alpha, however, the value of child selected in this node (belongs to min layer)
240 |                 # will no more than beta <= alpha, so there is no need to search this node
241 | 
242 |         self._last_move_list.pop()
243 |         return beta
244 | 
245 |     def evaluate(self, obs):
246 |         pos_ind = np.where(obs)
247 |         pos_set = [(pos_ind[0][i], pos_ind[1][i]) for i in range(len(pos_ind[0]))]
248 | 
249 |         score_atk, score_def = 0, 0
250 |         for x, y in pos_set:
251 |             c = obs[x][y]
252 |             pt_score = self.evaluate_point(obs, (x, y))
253 |             if c != self.color:
254 |                 score_def = max(score_def, pt_score)
255 |             else:
256 |                 score_atk = max(score_atk, pt_score)
257 | 
258 |         return score_atk, score_def
259 | 
260 |     def evaluate_point(self, obs, pos):
261 |         i, j = pos[0], pos[1]
262 |         color = obs[i][j]
263 |         dir_set = [(1, 0), (0, 1), (1, 1), (1, -1)]
264 |         max_count = 0
265 |         max_consecutive_count = 0
266 |         max_score = 0
267 | 
268 |         for dir in dir_set:
269 |             score = 0
270 |             count_1, count_2 = 1, 1
271 |             consecutive_count_1, consecutive_count_2 = 1, 1
272 |             space_1, space_2 = 0, 0
273 |             block_1, block_2 = 0, 0
274 |             consecutive_flag = True
275 | 
276 |             for k in range(1, 5):
277 |                 if i + k * dir[0] in range(0, 15) and j + k * dir[1] in range(0, 15):
278 |                     if obs[i + k * dir[0]][j + k * dir[1]] == color:
279 |                         if space_1 == 2:
280 |                             break
281 |                         count_1 += 1
282 |                         if consecutive_flag:
283 |                             consecutive_count_1 += 1
284 |                     if obs[i + k * dir[0]][j + k * dir[1]] == -color:
285 |                         block_1 = 1
286 |                         break
287 |                     if obs[i + k * dir[0]][j + k * dir[1]] == 0:
288 |                         space_1 += 1
289 |                         consecutive_flag = False
290 |                         if space_1 == 3:
291 |                             break
292 |                 else:
293 |                     block_1 = 1
294 |                     break
295 | 
296 |             consecutive_flag = True
297 | 
298 |             for k in range(1, 5):
299 |                 if i - k * dir[0] in range(0, 15) and j - k * dir[1] in range(0, 15):
300 |                     if obs[i - k * dir[0]][j - k * dir[1]] == color:
301 |                         if space_2 == 2:
302 |                             break
303 |                         count_2 += 1
304 |                         if consecutive_flag:
305 |                             consecutive_count_2 += 1
306 |                     if obs[i - k * dir[0]][j - k * dir[1]] == -color:
307 |                         block_2 = 1
308 |                         break
309 |                     if obs[i - k * dir[0]][j - k * dir[1]] == 0:
310 |                         space_2 += 1
311 |                         consecutive_flag = False
312 |                         if space_2 == 3:
313 |                             break
314 |                 else:
315 |                     block_2 = 1
316 |                     break
317 | 
318 |             # there are several cases:
319 |             # 1. ooox: block=1, space=0, count=consecutive_count
320 |             # 2. ooo__: block=0, space=2, count=consecutive_count
321 |             # 3. ooo_x: block=1, space=1, count=consecutive_count
322 |             # 4. oo_ox: block=1, space=1, count>consecutive_count
323 | 
324 |             count = max(count_1 + consecutive_count_2, count_2 + consecutive_count_1) - 1
325 | 
326 |             consecutive_count = consecutive_count_1 + consecutive_count_2 - 1
327 | 
328 |             if consecutive_count >= 5:
329 |                 return score_5
330 | 
331 |             if count == 4:
332 |                 if consecutive_count == 4:  # ??oooo??
333 |                     if space_1 >= 1 and space_2 >= 1:  # ?_oooo_?
334 |                         score = score_4_live
335 |                     else:
336 |                         if space_1 == 0 and space_2 == 0:  # xoooox
337 |                             pass
338 |                         else:  # xoooo_
339 |                             score = score_4
340 |                 else:
341 |                     if consecutive_count == 3:  # ??ooo_o??
342 |                         score = score_4
343 |                     else:  # (consecutive_count == 2) ??oo_oo??
344 |                         score = score_4
345 | 
346 |             if count == 3:
347 |                 if consecutive_count == 3:  # ??ooo??
348 |                     if space_1 >= 1 and space_2 >= 1:  # ?_ooo_?
349 |                         score = score_3_live
350 |                     else:
351 |                         if space_1 == 0 and space_2 == 0:  # xooox
352 |                             pass
353 |                         else:  # xooo_
354 |                             score = score_3
355 |                 else:  # (consecutive_count == 2) ??oo_o??
356 |                     if consecutive_count_1 == 2:
357 |                         if space_1 >= 1 and space_2 >= 2:  # ?_oo_o_?
358 |                             score = score_3_live
359 |                         else:
360 |                             if space_1 == 0 and space_2 == 1:  # xoo_ox
361 |                                 pass
362 |                             else:
363 |                                 score = score_3
364 |                     else:  # (consecutive_count_2 == 2)
365 |                         if space_2 >= 1 and space_1 >= 2:  # ?_o_oo_?
366 |                             score = score_3_live
367 |                         else:
368 |                             if space_1 == 1 and space_2 == 0:  # xo_oox
369 |                                 pass
370 |                             else:
371 |                                 score = score_3
372 | 
373 |             if count == 2:
374 |                 if consecutive_count == 2:  # ??oo??
375 |                     if space_1 <= 1 and space_2 <= 1:  # x?oo?x
376 |                         pass
377 |                     else:
378 |                         if space_1 == 0 or space_2 == 0:  # xoo__?
379 |                             if space_1 == 3 or space_2 == 3:  # xoo___
380 |                                 score = score_2
381 |                             else:
382 |                                 pass
383 |                         else:  # ?__oo_??
384 |                             score = score_2_live
385 | 
386 |                 else:  # ??o_o??
387 |                     if space_1 + space_2 < 3:
388 |                         pass
389 |                     else:
390 |                         if count_1 == 2:
391 |                             if space_2 == 0:  # (space_1 == 3) __o_ox
392 |                                 score = score_2
393 |                             else:
394 |                                 score = score_2_live
395 |                         else:  # (count_2 == 2)
396 |                             if space_1 == 0:  # (space_2 == 3) xo_o__
397 |                                 score = score_2
398 |                             else:
399 |                                 score = score_2_live
400 | 
401 |             # bonus
402 |             if max_score == score_2_live and score == score_2_live:
403 |                 score = score_double_2_live
404 |             if max_score == score_3_live and score == score_3_live:
405 |                 score = score_double_3_live
406 |             if max_score == score_4 and score == score_3_live:
407 |                 score = score_4_and_3_live
408 |             if max_score == score_3_live and score == score_4:
409 |                 score = score_4_and_3_live
410 | 
411 |             if count > max_count:
412 |                 max_count = count
413 |             if consecutive_count > max_consecutive_count:
414 |                 max_consecutive_count = consecutive_count
415 | 
416 |             if score > max_score:
417 |                 max_score = score
418 | 
419 |         return max_score
420 | 
421 |     def generate(self, obs, all=False):
422 |         good_pts = []
423 |         good_scores = []
424 |         pts = []
425 |         scores = []
426 |         dir_set = [(1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1), (0, 1), (1, 1)]
427 | 
428 |         if all:
429 |             indices = np.where(obs)
430 |             check_list = [(indices[0][i], indices[1][i]) for i in range(len(indices[0]))]
431 |         else:
432 |             if len(self._last_move_list) > 7:
433 |                 check_list = self._last_move_list[-7:]
434 |             else:
435 |                 check_list = self._last_move_list
436 | 
437 |         for x0, y0 in check_list:
438 |             for dir in dir_set:
439 |                 if x0 + dir[0] in range(0, 15) and y0 + dir[1] in range(0, 15):
440 |                     pos = (x0 + dir[0], y0 + dir[1])
441 |                     if obs[pos[0]][pos[1]] == 0 and pos not in pts:
442 |                         obs[pos[0]][pos[1]] = self.color
443 |                         score_atk = self.evaluate_point(obs, pos)
444 |                         obs[pos[0]][pos[1]] = -self.color
445 |                         score_def = self.evaluate_point(obs, pos)
446 |                         score = max(score_atk, score_def)
447 |                         if score >= score_3_live:
448 |                             good_pts.append(pos)
449 |                             good_scores.append(score)
450 |                             if score_atk == score_5:
451 |                                 break
452 |                         pts.append(pos)
453 |                         scores.append(score)
454 |                         obs[pos[0]][pos[1]] = 0
455 | 
456 |         if len(good_pts) > 0 and max(good_scores) >= score_4:
457 |             # print('good')
458 |             pts = good_pts
459 |             scores = good_scores
460 |         lst = np.array([pts, scores])
461 |         pts = lst[:, lst[1].argsort()][0]
462 |         pos_list = list(pts)
463 | 
464 |         pos_list.reverse()
465 |         return pos_list
466 | 
467 | 
468 | def get_best_action_list(score_dict):
469 |     best_action_list = []
470 |     max_score = MIN
471 |     for key in score_dict:
472 |         if max_score < score_dict[key]:
473 |             best_action_list = [key]
474 |             max_score = score_dict[key]
475 |         elif max_score == score_dict[key]:
476 |             best_action_list.append(key)
477 |     return best_action_list
478 | 
479 | 
480 | def print_score_dict(score_dict):
481 |     for key in score_dict:
482 |         print(str(key) + ': ' + str(score_dict[key]))
483 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/human.py:
--------------------------------------------------------------------------------
 1 | from .agent import Agent
 2 | from .mcts import coordinate2index
 3 | import numpy as np
 4 | 
 5 | 
 6 | class HumanAgent(Agent):
 7 |     def __init__(self, renderer, color, board_size):
 8 |         self._renderer = renderer
 9 |         self._color = color
10 |         self._board_size = board_size
11 | 
12 |     def set_renderer(self, renderer):
13 |         self._renderer = renderer
14 | 
15 |     def play(self, obs, action, stone_num, *args):
16 |         x, y = self._renderer.ask_for_click()
17 |         ind = coordinate2index((x, y), self._board_size)
18 |         pi = np.zeros(self._board_size * self._board_size)
19 |         pi[ind] = 1
20 |         return (x, y), pi, None, None
21 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/mcts.py:
--------------------------------------------------------------------------------
  1 | from .node import Node
  2 | import numpy as np
  3 | from ..rules import *
  4 | from ..utils import *
  5 | import time
  6 | import threading
  7 | from ..config import tau_decay
  8 | 
  9 | 
 10 | class MCTS:
 11 |     def __init__(self, conf, black_net, white_net, color, use_stochastic_policy):
 12 |         # hyperparameters
 13 |         self._c_puct = conf['c_puct']  # PUCT
 14 |         self._simulation_times = conf['simulation_times']  # number of simulation
 15 |         self._initial_tau = conf['initial_tau']  # temperature parameter
 16 |         self._tau = self._initial_tau
 17 |         self._epsilon = conf['epsilon']  # proportion of dirichlet noise
 18 |         self._use_dirichlet = conf['use_dirichlet']
 19 |         self._alpha = conf['alpha']
 20 |         self._board_size = conf['board_size']
 21 |         self._color = color  # MCTS Agent's color ( 1 for black; -1 for white)
 22 | 
 23 |         self._root = Node(1.0, None, BLACK, conf['virtual_loss'])  # Monte Carlo tree
 24 | 
 25 |         self._black_net = black_net
 26 |         self._white_net = white_net
 27 | 
 28 |         self._is_self_play = conf['is_self_play']
 29 |         self._use_stochastic_policy = use_stochastic_policy
 30 |         self._careful_stage = conf['careful_stage']
 31 | 
 32 |         self._threading_num = conf['threading_num']
 33 |         self._virtual_loss = conf['virtual_loss']
 34 |         self._expanding_list = []
 35 | 
 36 |     def set_self_play(self, is_self_play):
 37 |         self._is_self_play = is_self_play
 38 | 
 39 |     def set_stochastic_policy(self, use_stochastic_policy):
 40 |         self._use_stochastic_policy = use_stochastic_policy
 41 | 
 42 |     def reset(self):
 43 |         self._root = Node(1.0, None, BLACK, self._virtual_loss)
 44 | 
 45 |     def action(self, board, last_action, stage):
 46 |         # step 1: rebase tree
 47 |         # so far the root corresponds to the last board = board - last_action
 48 |         # thus we need to find out the node that correspond to the argument [board]
 49 | 
 50 |         # if the current root is a leaf node, then we should simulate in advance
 51 |         if self._root.is_leaf():
 52 |             last_board = np.copy(board)
 53 |             # A special case: if the board is empty, then last_action is None
 54 |             if last_action is not None:
 55 |                 row, col = last_action[0], last_action[1]
 56 |                 last_board[row][col] = 0
 57 | 
 58 |             # now the last_board is correspond to the root
 59 |             self._simulate(last_board, last_action)
 60 | 
 61 |         # if the current root is not a leaf, then we can move the root to the child node correspond
 62 |         # to the board directly
 63 |         if last_action is not None:
 64 |             # last action might be None (when the board is empty)
 65 |             last_action_ind = coordinate2index(last_action, self._board_size)
 66 |             self._root = self._root.children()[last_action_ind]
 67 | 
 68 |         # now the root corresponds to the board
 69 |         # update tau
 70 |         if self._tau < 0.04:
 71 |             self._careful_stage = 0
 72 |         else:
 73 |             self._tau = self._initial_tau * (tau_decay ** int(stage / 2))
 74 | 
 75 |         original_pi, pi = self._predict(board, last_action)
 76 | 
 77 |         # action decision
 78 |         if self._use_stochastic_policy and stage <= self._careful_stage:  # stochastic policy
 79 |             position_list = [i for i in range(self._board_size * self._board_size)]
 80 |             action = np.random.choice(position_list, p=pi)
 81 |         else:  # deterministic policy
 82 |             action = np.argmax(pi)
 83 | 
 84 |         next_node = self._root.children()[action]
 85 |         prior_prob = next_node.P()
 86 |         value = next_node.value
 87 | 
 88 |         # adjust the root node and discard the remainder of the tree
 89 |         if not self._is_self_play:
 90 |             self._root = self._root.children()[action]
 91 | 
 92 |         return action, original_pi, prior_prob, value
 93 |         # return pi for training use
 94 | 
 95 |     def _predict(self, board, last_move):
 96 |         # now board correspond to the root, last_move is the last move of the board
 97 |         self._simulate(board, last_move)
 98 |         # generate the action distribution
 99 |         original_pi = np.array([node.N * 1.0 for node in self._root.children()])
100 |         try:
101 |             pi = np.array([node.N ** (1 / self._tau) for node in self._root.children()])
102 |         except:
103 |             pi = original_pi
104 |             self._tau = 0.03
105 | 
106 |         if len(pi) != len(board) ** 2:
107 |             print('>> error: MCTS._predict')
108 |             print(len(pi))
109 |             return
110 |         original_pi /= sum(original_pi)
111 |         pi /= sum(pi)
112 | 
113 |         return original_pi, pi
114 | 
115 |     # ROOT BOARD MUST CORRESPOND TO THE ROOT NODE!!!
116 |     def _get_simulate_thread_target(self, root_board, last_move):
117 |         def _simulate_thread():
118 |             legal_vec_root = board2legalvec(root_board)
119 |             each_simulation_times = int(self._simulation_times / self._threading_num)
120 | 
121 |             for epoch in range(each_simulation_times):
122 |                 # initiate the current node as the root node and initiate the current color as the color of the root
123 |                 current_node = self._root
124 |                 current_color = self._root.color
125 | 
126 |                 legal_vec_current = np.copy(legal_vec_root)  # deep copy
127 |                 current_board = np.copy(root_board)
128 | 
129 |                 # initiate select_action as last_move
130 |                 select_action = last_move
131 | 
132 |                 # so far, root node might be a leaf (eg: root_board is empty)
133 | 
134 |                 # if the root node is not a leaf, then it will enter the following loop
135 |                 while not current_node.is_leaf():
136 |                     current_node, select_action_ind = current_node.select(self._c_puct, legal_vec_current)
137 | 
138 |                     # add virtual loss in order to make other threads avoid this node
139 |                     current_node.select_num += 1
140 |                     current_node.N += self._virtual_loss
141 | 
142 |                     # update legal vector
143 |                     legal_vec_current[select_action_ind] = 0
144 | 
145 |                     # update current board
146 |                     row, col = index2coordinate(select_action_ind, self._board_size)
147 |                     current_board[row][col] = current_color
148 |                     select_action = (row, col)
149 | 
150 |                     # update current color
151 |                     current_color = -current_color
152 | 
153 |                     # if current node is not a leaf node, then it can't be in expanding list.
154 |                     # if current node is a leaf node, it may be expanding in other thread, so here we wait until it
155 |                     # is expanded (so that it is no longer a leaf node)
156 |                     while current_node in self._expanding_list:
157 |                         time.sleep(1e-4)
158 | 
159 |                 # so far, current node must be a leaf node (including end node)
160 |                 if current_node.is_end:
161 |                     current_node.backup(-current_node.value)
162 |                     continue
163 | 
164 |                 # add current node to expanding list
165 |                 if current_node not in self._expanding_list:
166 |                     self._expanding_list.append(current_node)
167 |                 else:
168 |                     continue
169 | 
170 |                 # calculate the prior probabilities and value
171 |                 if current_color is BLACK:
172 |                     net = self._black_net
173 |                 else:
174 |                     net = self._white_net
175 |                 p, v = net.predict(board=current_board,
176 |                                    color=current_color,
177 |                                    last_move=select_action)
178 |                 current_node.value = v
179 |                 prior_prob = p[0]
180 | 
181 |                 if self._use_dirichlet:
182 |                     alpha = [self._alpha] * (self._board_size * self._board_size)
183 |                     noise = np.random.dirichlet(alpha)
184 |                     prior_prob = (1 - self._epsilon) * prior_prob + self._epsilon * noise
185 | 
186 |                 # now check whether this leaf node is an end node or not
187 |                 if select_action is not None:
188 |                     end_flag = check_rules(current_board, select_action, -current_color)
189 |                     if end_flag == 'blackwins' or end_flag == 'whitewins' or end_flag == 'full':
190 |                         current_node.is_end = True
191 |                         if end_flag == 'full':
192 |                             current_node.value = 0
193 |                         else:
194 |                             current_node.value = -1
195 |                     else:
196 |                         current_node.expand(prior_prob, self._board_size)
197 |                 else:
198 |                     # if action is None, then the root node must be a leaf
199 |                     current_node.expand(prior_prob, self._board_size)
200 | 
201 |                 self._expanding_list.remove(current_node)
202 | 
203 |                 # backup
204 |                 current_node.backup(-current_node.value)
205 | 
206 |         return _simulate_thread
207 | 
208 |     def _simulate(self, root_board, last_move):
209 |         target = self._get_simulate_thread_target(root_board, last_move)
210 |         thread_list = []
211 |         for i in range(self._threading_num):
212 |             thr = threading.Thread(target=target, name='thread_' + str(i + 1))
213 |             thr.start()
214 |             thread_list.append(thr)
215 |             time.sleep(1e-3)
216 |         for thr in thread_list:
217 |             thr.join()
218 | 
219 | 
220 | def check_rules(board, action_cor, color):
221 |     stone_num = sum(sum(np.abs(board)))
222 |     if stone_num <= 8:  # Impossible to end since the maximal length of consecutive lines with the same color is four.
223 |         return 'continue'
224 |     else:
225 |         if stone_num == board.shape[0] * board.shape[0]:
226 |             return 'full'
227 |         else:  # Greedy Match
228 |             # cor = index2coordinate(action, board.shape[0])
229 |             # Horizontal Check
230 |             count = 1
231 |             for i in range(1, 5):
232 |                 if action_cor[1] + i <= board.shape[0] - 1:
233 |                     if board[action_cor[0]][action_cor[1] + i] == color:
234 |                         count += 1
235 |                     else:
236 |                         break
237 |                 else:
238 |                     break
239 |             for i in range(1, 5):
240 |                 if action_cor[1] - i >= 0:
241 |                     if board[action_cor[0]][action_cor[1] - i] == color:
242 |                         count += 1
243 |                     else:
244 |                         break
245 |                 else:
246 |                     break
247 |             if count >= 5:
248 |                 if color == 1:
249 |                     return 'blackwins'
250 |                 else:
251 |                     return 'whitewins'
252 |             # Vertical Check
253 |             count = 1
254 |             for i in range(1, 5):
255 |                 if action_cor[0] + i <= board.shape[0] - 1:
256 |                     if board[action_cor[0] + i][action_cor[1]] == color:
257 |                         count += 1
258 |                     else:
259 |                         break
260 |                 else:
261 |                     break
262 |             for i in range(1, 5):
263 |                 if action_cor[0] - i >= 0:
264 |                     if board[action_cor[0] - i][action_cor[1]] == color:
265 |                         count += 1
266 |                     else:
267 |                         break
268 |                 else:
269 |                     break
270 |             if count >= 5:
271 |                 if color == 1:
272 |                     return 'blackwins'
273 |                 else:
274 |                     return 'whitewins'
275 |             # Diagonal Check
276 |             count = 1
277 |             for i in range(1, 5):
278 |                 if (action_cor[0] + i <= board.shape[0] - 1) and (action_cor[1] + i <= board.shape[0] - 1):
279 |                     if board[action_cor[0] + i][action_cor[1] + i] == color:
280 |                         count += 1
281 |                     else:
282 |                         break
283 |                 else:
284 |                     break
285 |             for i in range(1, 5):
286 |                 if (action_cor[0] - i >= 0) and (action_cor[1] - i >= 0):
287 |                     if board[action_cor[0] - i][action_cor[1] - i] == color:
288 |                         count += 1
289 |                     else:
290 |                         break
291 |                 else:
292 |                     break
293 |             if count >= 5:
294 |                 if color == 1:
295 |                     return 'blackwins'
296 |                 else:
297 |                     return 'whitewins'
298 |             # Anti-Diagonal Check
299 |             count = 1
300 |             for i in range(1, 5):
301 |                 if (action_cor[0] + i <= board.shape[0] - 1) and (action_cor[1] - i >= 0):
302 |                     if board[action_cor[0] + i][action_cor[1] - i] == color:
303 |                         count += 1
304 |                     else:
305 |                         break
306 |                 else:
307 |                     break
308 |             for i in range(1, 5):
309 |                 if (action_cor[0] - i >= 0) and (action_cor[1] + i <= board.shape[0] - 1):
310 |                     if board[action_cor[0] - i][action_cor[1] + i] == color:
311 |                         count += 1
312 |                     else:
313 |                         break
314 |                 else:
315 |                     break
316 |             if count >= 5:
317 |                 if color == 1:
318 |                     return 'blackwins'
319 |                 else:
320 |                     return 'whitewins'
321 | 


--------------------------------------------------------------------------------
/AlphaGomoku/agent/node.py:
--------------------------------------------------------------------------------
 1 | from math import sqrt
 2 | import numpy as np
 3 | from ..config import *
 4 | 
 5 | 
 6 | class Node:
 7 |     count = 0
 8 |     backup_count = 0
 9 |     conflict_count = 0
10 | 
11 |     def __init__(self, prior_prob, parent, color, virtual_loss):
12 | 
13 |         # actually N, Q, W, U are properties of edge
14 |         self.N = 0  # Number of visits
15 |         self._Q = 0  # Quality of the edge
16 |         self.W = 0  # Intermediate value for Q update
17 |         self._P = prior_prob  # Prior probability predicted by network
18 |         self._U = 0
19 | 
20 |         self._virtual_loss = virtual_loss
21 |         self.select_num = 0
22 | 
23 |         self._parent = parent
24 |         self._children = []  # if self._children is an empty list, it is viewed as a leaf node
25 | 
26 |         # when it is an end leaf
27 |         self.is_end = False
28 |         self.value = 0
29 | 
30 |         self.color = color  # color of next player
31 |         self.num = Node.count
32 |         Node.count += 1
33 | 
34 |     def P(self):
35 |         return self._P
36 | 
37 |     def Q(self):
38 |         return self._Q
39 | 
40 |     def U(self):
41 |         return self._U
42 | 
43 |     def parent(self):
44 |         return self._parent
45 | 
46 |     def children(self):
47 |         return self._children
48 | 
49 |     def is_root(self):
50 |         return self._parent is None
51 | 
52 |     def is_leaf(self):
53 |         return self._children == []
54 | 
55 |     def upper_confidence_bound(self, c_puct):
56 |         try:
57 |             self._U = c_puct * self._P * sqrt(self._parent.N) / (1 + self.N)
58 |         except ValueError:
59 |             print('> valueError: Node.upper_confidence_bound')
60 |             print(self._U, self._P, self._parent.N, self.N)
61 |         return self._U + self._Q
62 | 
63 |     def select(self, c_puct, legal_vec_current):
64 |         ucb_list = np.array([node.upper_confidence_bound(c_puct) for node in self._children])
65 |         ind = np.argsort(ucb_list)
66 |         for i in range(len(ind)):
67 |             if legal_vec_current[ind[-(i + 1)]] == 1:
68 |                 action = ind[-(i + 1)]
69 |                 break
70 |         next_node = self._children[action]
71 |         return next_node, action
72 | 
73 |     def expand(self, prior_prob, board_size=15):
74 |         if not self.is_leaf():
75 |             print('> error: node.expand')
76 |             return
77 |         for i in range(board_size * board_size):
78 |             prob = prior_prob[i]
79 |             self._children.append(Node(prob, self, -self.color, self._virtual_loss))
80 | 
81 |     def backup(self, value):
82 |         # remove virtual loss
83 |         if self.select_num > 0:
84 |             self.select_num -= 1
85 |             self.N -= self._virtual_loss
86 |             if self.N < 0:
87 |                 self.N += self._virtual_loss
88 | 
89 |         self.N += 1
90 |         self.W += value
91 |         self._Q = self.W / self.N
92 |         if not self.is_root():
93 |             self._parent.backup(-value_decay * value)
94 | 


--------------------------------------------------------------------------------
/AlphaGomoku/config.py:
--------------------------------------------------------------------------------
  1 | value_decay = 0.95
  2 | 
  3 | tau_decay = 0.8
  4 | 
  5 | 
  6 | class Config(dict):
  7 |     def __init__(self, **kwargs):
  8 |         # mode   1: training mode, 2: AI vs Human, 3: Human vs Human, 0: Debug
  9 |         self['mode'] = 1
 10 | 
 11 |         # display mode
 12 |         self['display'] = False
 13 | 
 14 |         # screen size of renderer
 15 |         self['screen_size'] = (720, 720)
 16 | 
 17 |         # self play mode
 18 |         self['is_self_play'] = True
 19 | 
 20 |         # true: 3-3, 4-4, 6+ are not allowed for black
 21 |         self['forbidden_moves'] = False
 22 | 
 23 |         # PUCT: when c_puct gets smaller, the simulation becomes deeper
 24 |         self['c_puct'] = 5
 25 | 
 26 |         # simulation times
 27 |         self['simulation_times'] = 400
 28 | 
 29 |         # initial tau
 30 |         self['initial_tau'] = 1
 31 | 
 32 |         # proportion of dirichlet noise
 33 |         self['epsilon'] = 0.25
 34 | 
 35 |         # coef of dirichlet noise
 36 |         self['alpha'] = 0.03
 37 | 
 38 |         # use dirichlet
 39 |         self['use_dirichlet'] = False
 40 | 
 41 |         # board size
 42 |         self['board_size'] = 15
 43 | 
 44 |         # epoch: number of games played to train
 45 |         self['epoch'] = 20
 46 | 
 47 |         # sample percentage
 48 |         self['sample_percentage'] = 1
 49 | 
 50 |         # number of games in each training epoch
 51 |         self['games_num'] = 30
 52 | 
 53 |         # learning rate
 54 |         self['learning_rate'] = 2e-3
 55 | 
 56 |         # momentum
 57 |         self['momentum'] = 9e-1
 58 | 
 59 |         # coefficient of l2 penalty
 60 |         self['l2'] = 1e-4
 61 | 
 62 |         # path of network parameters
 63 |         self['net_para_file'] = 'AlphaGomoku/network/model/model_' + str(self['board_size']) + '.h5'
 64 | 
 65 |         # path of history of fitting
 66 |         self['fit_history_file'] = 'AlphaGomoku/network/history/log_' + str(self['board_size'])
 67 | 
 68 |         # human play data path
 69 |         self['human_play_data_path'] = 'AlphaGomoku/dataset/human_play_data/human_' + str(self['board_size']) + '_'
 70 | 
 71 |         # self play data path
 72 |         self['self_play_data_path'] = 'AlphaGomoku/dataset/self_play_data/self_play_' + str(
 73 |             self['board_size']) + '_'
 74 | 
 75 |         # generated data path
 76 |         self['generated_data_path'] = 'AlphaGomoku/dataset/generated_data/gen_'
 77 | 
 78 |         # use previous model
 79 |         self['use_previous_model'] = True
 80 | 
 81 |         # number of games played for evaluation, must be an even number!!!
 82 |         self['evaluate_games_num'] = 20
 83 | 
 84 |         # epoch from which evaluation starts
 85 |         self['evaluate_start_epoch'] = 1
 86 | 
 87 |         # Mini-Batch Size
 88 |         self['mini_batch_size'] = 512
 89 | 
 90 |         # fit epochs, number of each sample used
 91 |         self['fit_epochs'] = 10
 92 | 
 93 |         # use supervised learning
 94 |         self['is_supervised'] = False
 95 | 
 96 |         # careful stage
 97 |         self['careful_stage'] = 6
 98 | 
 99 |         # number of threads
100 |         self['threading_num'] = 8
101 | 
102 |         # virtual loss
103 |         self['virtual_loss'] = 10
104 | 
105 |         # show evaluation score given by agent
106 |         self['show_score'] = True
107 | 
108 |         self.update(**kwargs)
109 | 
110 |     def update(self, **kwargs):
111 |         for key in kwargs:
112 |             self[key] = kwargs[key]
113 | 
114 |     def set_mode(self, mode):
115 |         if mode not in [1, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0]:
116 |             print('> Error: mode not found!')
117 |             mode = 1
118 |         if mode == 1:
119 |             self['display'] = False
120 |             self['is_self_play'] = True
121 |             self['mode'] = 1
122 |             self['show_score'] = False
123 |             print('> Training mode')
124 |         if mode == 2:
125 |             self['display'] = True
126 |             self['is_self_play'] = False
127 |             self['mode'] = 2
128 |             self['simulation_times'] = 800
129 |             self['show_score'] = False
130 |             print('> AI vs Human mode')
131 |         if mode == 2.5:
132 |             self['display'] = True
133 |             self['is_self_play'] = False
134 |             self['mode'] = 2.5
135 |             self['simulation_times'] = 800
136 |             self['show_score'] = False
137 |             print('> AI vs Human mode')
138 |         if mode == 3:
139 |             self['display'] = True
140 |             self['is_self_play'] = False
141 |             self['mode'] = 3
142 |             print('> Human vs Human mode')
143 |         if mode == 4:
144 |             self['display'] = False
145 |             self['is_self_play'] = False
146 |             self['show_score'] = False
147 |             self['mode'] = 4
148 |             self['simulation_times'] = 400
149 |             print('> AI vs AI mode')
150 |         if mode == 5:
151 |             self['display'] = True
152 |             self['is_self_play'] = False
153 |             self['mode'] = 5
154 |             self['games_num'] = 100
155 |             print('> Collect human play data mode')
156 |         if mode == 6:
157 |             self['display'] = False
158 |             self['is_self_play'] = True
159 |             self['mode'] = 6
160 |             self['games_num'] = 20
161 |             self['epoch'] = 10
162 |             self['simulation_times'] = 1600
163 |             self['careful_stage'] = 226
164 |             self['show_score'] = False
165 |             print('> Collect self play data mode')
166 |         if mode == 7:
167 |             self['display'] = False
168 |             self['is_self_play'] = True
169 |             self['mode'] = 7
170 |             self['is_supervised'] = True
171 |             self['show_score'] = False
172 |             print('> Train on external data mode')
173 |         if mode == 8:
174 |             self['display'] = True
175 |             self['is_self_play'] = False
176 |             self['mode'] = 8
177 |             print('> Collect human vs AI play data mode')
178 |         if mode == 9:
179 |             self['display'] = True
180 |             self['is_self_play'] = False
181 |             self['mode'] = 9
182 |             print('> AI(NaiveAgent) vs Human mode')
183 |         if mode == 10:
184 |             self['display'] = False
185 |             self['is_self_play'] = False
186 |             self['mode'] = 10
187 |             self['show_score'] = False
188 |             print('> AI vs AI(NaiveAgent) mode')
189 |         if mode == 11:
190 |             self['display'] = False
191 |             self['is_self_play'] = False
192 |             self['mode'] = 11
193 |             print('> Train on generated data mode')
194 |             self['simulation_times'] = 1600
195 |             self['games_num'] = 50
196 |             self['epoch'] = 100
197 |             self['show_score'] = False
198 |         if mode == 12:
199 |             self['display'] = False
200 |             self['is_self_play'] = False
201 |             self['mode'] = 12
202 |             self['games_num'] = 100
203 |             self['epoch'] = 20
204 |             self['show_score'] = True
205 |             print('> Collect self play data mode')
206 |         if mode == 13:
207 |             self['display'] = False
208 |             self['is_self_play'] = True
209 |             self['show_score'] = False
210 |             self['epoch'] = 10
211 |             self['games_num'] = 60
212 |             self['simulation_times'] = 1600
213 |             self['careful_stage'] = 226   # disable careful stage
214 |             self['mode'] = 13
215 |             print('> Self play and train mode')
216 |         if mode == 0:
217 |             self['display'] = True
218 |             self['is_self_play'] = True
219 |             self['mode'] = 0
220 |             self['simulation_times'] = 100
221 |             self['games_num'] = 3
222 |             self['epoch'] = 2
223 |             self['show_score'] = True
224 |             print('> Debug mode')
225 | 
226 |     def print_current_config(self):
227 |         print('------------------')
228 |         print('> CURRENT CONFIG:')
229 |         for key in self:
230 |             print('{}: {}'.format(key, self[key]))
231 |         print('------------------')
232 | 


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import *
2 | from .generator import *


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/dataset.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import os
  4 | from ..config import value_decay
  5 | 
  6 | 
  7 | class DataSet:
  8 |     def __init__(self):
  9 |         self._game_record = []
 10 | 
 11 |     def clear(self):
 12 |         self._game_record = []
 13 | 
 14 |     def add_record(self, record):
 15 |         self._game_record.append(record)
 16 | 
 17 |     def get_sample(self, percentage, shuffle=True):
 18 |         obs = []
 19 |         col = []
 20 |         last_move = []
 21 |         pi = []
 22 |         z = []
 23 |         for record in self._game_record:
 24 |             a, b, c, d, e = record.get_sample(percentage, shuffle)
 25 |             obs.extend(a)
 26 |             col.extend(b)
 27 |             last_move.extend(c)
 28 |             pi.extend(d)
 29 |             z.extend(e)
 30 |         return obs, col, last_move, pi, z
 31 | 
 32 |     def record_num(self):
 33 |         return len(self._game_record)
 34 | 
 35 |     def save(self, path):
 36 |         obs, col, last_move, pi, z = self.get_sample(1)
 37 | 
 38 |         obs_path = path + 'obs'
 39 |         np.save(obs_path, obs)
 40 | 
 41 |         col_path = path + 'col'
 42 |         np.save(col_path, col)
 43 | 
 44 |         last_move_path = path + 'last_move'
 45 |         np.save(last_move_path, last_move)
 46 | 
 47 |         pi_path = path + 'pi'
 48 |         np.save(pi_path, pi)
 49 | 
 50 |         z_path = path + 'z'
 51 |         np.save(z_path, z)
 52 | 
 53 |         print('> ' + str(len(z)) + ' positions of data saved')
 54 | 
 55 |     def load(self, path):
 56 |         if not os.path.exists(path + 'obs.npy'):
 57 |             print('> error: model ' + path + 'obs.npy' + 'not found')
 58 |             return
 59 | 
 60 |         obs_path = path + 'obs.npy'
 61 |         obs = np.load(obs_path)
 62 | 
 63 |         col_path = path + 'col.npy'
 64 |         col = np.load(col_path)
 65 | 
 66 |         last_move_path = path + 'last_move.npy'
 67 |         last_move = np.load(last_move_path)
 68 | 
 69 |         pi_path = path + 'pi.npy'
 70 |         pi = np.load(pi_path)
 71 | 
 72 |         z_path = path + 'z.npy'
 73 |         z = np.load(z_path)
 74 | 
 75 |         size = len(z)
 76 |         record = GameRecord()
 77 |         for i in range(size):
 78 |             record.add(obs[i], col[i], last_move[i], pi[i], z[i])
 79 |         self.add_record(record)
 80 | 
 81 | 
 82 | class GameRecord:
 83 |     def __init__(self):
 84 |         self._obs_list = []
 85 |         self._color_list = []
 86 |         self._last_move_list = []
 87 |         self._pi_list = []
 88 |         self._z_list = []
 89 |         self._total_num = 0
 90 | 
 91 |     def add(self, obs, color, last_move, pi, z=None):
 92 |         self._obs_list.append(obs)
 93 |         self._color_list.append(color)
 94 |         self._last_move_list.append(last_move)
 95 |         self._pi_list.append(pi)
 96 |         self._z_list.append(z)
 97 |         self._total_num += 1
 98 | 
 99 |     def add_list(self, obs, color, last_move, pi, z):
100 |         self._obs_list.extend(obs)
101 |         self._color_list.extend(color)
102 |         self._last_move_list.extend(last_move)
103 |         self._pi_list.extend(pi)
104 |         self._z_list.extend(z)
105 |         self._total_num += len(z)
106 | 
107 |     # the method to define the value of z
108 |     def set_z(self, result):
109 |         if result == 0:
110 |             self._z_list = [0 for _ in range(self._total_num)]
111 |             return
112 |         for i in range(self._total_num):
113 |             if result == self._color_list[i]:
114 |                 self._z_list[i] = 1 * value_decay ** (self._total_num - i - 1)
115 |             else:
116 |                 self._z_list[i] = -1 * value_decay ** (self._total_num - i - 1)
117 | 
118 |     def get_sample(self, percentage, shuffle=True):
119 |         if shuffle:
120 |             sample_num = int(self._total_num * percentage)
121 |             indices = random.sample([i for i in range(self._total_num)], sample_num)
122 |             obs_sample = [self._obs_list[index] for index in indices]
123 |             color_sample = [self._color_list[index] for index in indices]
124 |             last_move_sample = [self._last_move_list[index] for index in indices]
125 |             pi_sample = [self._pi_list[index] for index in indices]
126 |             z_sample = [self._z_list[index] for index in indices]
127 |             return obs_sample, color_sample, last_move_sample, pi_sample, z_sample
128 |         else:
129 |             return self._obs_list, self._color_list, self._last_move_list, self._pi_list, self._z_list
130 | 


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/generator.py:
--------------------------------------------------------------------------------
  1 | from ..ui import *
  2 | from ..config import *
  3 | from ..utils import *
  4 | from .dataset import *
  5 | 
  6 | 
  7 | class Generator:
  8 |     def __init__(self, board_size, max_noise_stone_num):
  9 |         self._board_size = board_size
 10 |         self._max_noise_stone_num = max_noise_stone_num
 11 | 
 12 |     @log
 13 |     def generate_live_4_attack(self, sample_num=10000):
 14 |         record = GameRecord()
 15 |         i = 0
 16 |         while i < sample_num:
 17 |             color = np.random.random_integers(0, 1) * 2 - 1
 18 |             board = self._empty_board()
 19 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4)
 20 |             if len(fix_pos_list) == 0:
 21 |                 continue
 22 | 
 23 |             for x, y in pos_list:
 24 |                 board[x][y] = color
 25 | 
 26 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
 27 |             if len(fix_pos_list) == 2:
 28 |                 ind_1 = coordinate2index(fix_pos_list[0], self._board_size)
 29 |                 ind_2 = coordinate2index(fix_pos_list[1], self._board_size)
 30 |                 pi[ind_1], pi[ind_2] = 0.5, 0.5
 31 |             if len(fix_pos_list) == 1:
 32 |                 ind = coordinate2index(fix_pos_list[0], self._board_size)
 33 |                 pi[ind] = 1
 34 | 
 35 |             self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num,
 36 |                             fix_pos_list=fix_pos_list)
 37 | 
 38 |             record.add(obs=board, color=color, last_move=pos_list[0], pi=pi, z=1)
 39 |             i += 1
 40 |         return record
 41 | 
 42 |     @log
 43 |     def generate_live_4_defend(self, sample_num=10000):
 44 |         record = GameRecord()
 45 |         i = 0
 46 |         while i < sample_num:
 47 |             color = np.random.random_integers(0, 1) * 2 - 1
 48 |             board = self._empty_board()
 49 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4)
 50 |             if len(fix_pos_list) == 0:
 51 |                 continue
 52 | 
 53 |             for x, y in pos_list:
 54 |                 board[x][y] = color
 55 | 
 56 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
 57 |             if len(fix_pos_list) == 2:
 58 |                 ind_1 = coordinate2index(fix_pos_list[0], self._board_size)
 59 |                 ind_2 = coordinate2index(fix_pos_list[1], self._board_size)
 60 |                 pi[ind_1], pi[ind_2] = 0.5, 0.5
 61 |             if len(fix_pos_list) == 1:
 62 |                 ind = coordinate2index(fix_pos_list[0], self._board_size)
 63 |                 pi[ind] = 1
 64 | 
 65 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
 66 |                             fix_pos_list=fix_pos_list)
 67 | 
 68 |             record.add(obs=board, color=-color, last_move=pos_list[0], pi=pi, z=-1)
 69 |             i += 1
 70 |         return record
 71 | 
 72 |     @log
 73 |     def generate_dead_4_oooo_defend(self, sample_num=10000):
 74 |         record = GameRecord()
 75 |         i = 0
 76 |         while i < sample_num:
 77 |             color = np.random.random_integers(0, 1) * 2 - 1
 78 |             board = self._empty_board()
 79 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4)
 80 |             if len(fix_pos_list) == 0:
 81 |                 continue
 82 | 
 83 |             for x, y in pos_list:
 84 |                 board[x][y] = color
 85 | 
 86 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
 87 |             if len(fix_pos_list) == 2:
 88 |                 ind = coordinate2index(fix_pos_list[0], self._board_size)
 89 |                 pi[ind] = 1
 90 |                 fx, fy = fix_pos_list[1][0], fix_pos_list[1][1]
 91 |                 board[fx][fy] = -color
 92 |             if len(fix_pos_list) == 1:
 93 |                 ind = coordinate2index(fix_pos_list[0], self._board_size)
 94 |                 pi[ind] = 1
 95 | 
 96 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
 97 |                             fix_pos_list=fix_pos_list)
 98 | 
 99 |             record.add(obs=board, color=-color, last_move=pos_list[0], pi=pi,
100 |                        z=0)  # last move should be next to an empty position
101 |             i += 1
102 |         return record
103 | 
104 |     @log
105 |     def generate_dead_4_ooo_o_defend(self, sample_num=10000):
106 |         record = GameRecord()
107 |         for _ in range(sample_num):
108 |             color = np.random.random_integers(0, 1) * 2 - 1
109 |             board = self._empty_board()
110 |             pos_list, _ = self._generate_consecutive_line(consecutive_num=5)
111 |             fix_pos_list = [pos_list[3]]
112 | 
113 |             for x, y in pos_list:
114 |                 board[x][y] = color
115 |             board[pos_list[3][0]][pos_list[3][1]] = 0
116 | 
117 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
118 | 
119 |             ind = coordinate2index(pos_list[3], self._board_size)
120 |             pi[ind] = 1
121 | 
122 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
123 |                             fix_pos_list=fix_pos_list)
124 | 
125 |             record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi,
126 |                        z=0)  # last move should be next to an empty position
127 |         return record
128 | 
129 |     @log
130 |     def generate_dead_4_oo_oo_defend(self, sample_num=10000):
131 |         record = GameRecord()
132 |         for _ in range(sample_num):
133 |             color = np.random.random_integers(0, 1) * 2 - 1
134 |             board = self._empty_board()
135 |             pos_list, _ = self._generate_consecutive_line(consecutive_num=5)
136 |             fix_pos_list = [pos_list[2]]
137 | 
138 |             for x, y in pos_list:
139 |                 board[x][y] = color
140 |             board[pos_list[2][0]][pos_list[2][1]] = 0
141 | 
142 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
143 | 
144 |             ind = coordinate2index(pos_list[2], self._board_size)
145 |             pi[ind] = 1
146 | 
147 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
148 |                             fix_pos_list=fix_pos_list)
149 | 
150 |             record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi,
151 |                        z=0)  # last move should be next to an empty position
152 |         return record
153 | 
154 |     @log
155 |     def generate_live_3_ooo_attack(self, sample_num=10000):
156 |         record = GameRecord()
157 |         i = 0
158 |         while i < sample_num:
159 |             color = np.random.random_integers(0, 1) * 2 - 1
160 |             board = self._empty_board()
161 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=3)
162 |             if len(fix_pos_list) == 0 or len(fix_pos_list) == 1:
163 |                 continue
164 | 
165 |             for x, y in pos_list:
166 |                 board[x][y] = color
167 | 
168 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
169 |             ind_1 = coordinate2index(fix_pos_list[0], self._board_size)
170 |             ind_2 = coordinate2index(fix_pos_list[1], self._board_size)
171 |             pi[ind_1], pi[ind_2] = 0.5, 0.5
172 | 
173 |             self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num,
174 |                             fix_pos_list=fix_pos_list)
175 | 
176 |             record.add(obs=board, color=color, last_move=pos_list[1], pi=pi, z=1)
177 |             i += 1
178 |         return record
179 | 
180 |     @log
181 |     def generate_live_3_oo_o_attack(self, sample_num=10000):
182 |         record = GameRecord()
183 |         i = 0
184 |         while i < sample_num:
185 |             color = np.random.random_integers(0, 1) * 2 - 1
186 |             board = self._empty_board()
187 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4)
188 |             if len(fix_pos_list) == 0 or len(fix_pos_list) == 1:
189 |                 continue
190 | 
191 |             fix_pos_list.append(list(pos_list[2]))
192 | 
193 |             for x, y in pos_list:
194 |                 board[x][y] = color
195 |             board[pos_list[2][0]][pos_list[2][1]] = 0
196 | 
197 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
198 |             ind = coordinate2index(pos_list[2], self._board_size)
199 |             pi[ind] = 1
200 | 
201 |             self._add_noise(board=board, next_player=color, max_stone_num=self._max_noise_stone_num,
202 |                             fix_pos_list=fix_pos_list)
203 | 
204 |             record.add(obs=board, color=color, last_move=pos_list[1], pi=pi, z=1)
205 |             i += 1
206 |         return record
207 | 
208 |     @log
209 |     def generate_live_3_ooo_defend(self, sample_num=10000):
210 |         record = GameRecord()
211 |         i = 0
212 |         while i < sample_num:
213 |             color = np.random.random_integers(0, 1) * 2 - 1
214 |             board = self._empty_board()
215 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=3)
216 |             if len(fix_pos_list) == 0 or len(fix_pos_list) == 1:
217 |                 continue
218 | 
219 |             for x, y in pos_list:
220 |                 board[x][y] = color
221 | 
222 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
223 |             ind_1 = coordinate2index(fix_pos_list[0], self._board_size)
224 |             ind_2 = coordinate2index(fix_pos_list[1], self._board_size)
225 |             pi[ind_1], pi[ind_2] = 0.5, 0.5
226 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
227 |                             fix_pos_list=fix_pos_list)
228 | 
229 |             record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi,
230 |                        z=0)  # last move should be next to an empty position
231 |             i += 1
232 |         return record
233 | 
234 |     @log
235 |     def generate_live_3_oo_o_defend(self, sample_num=10000):
236 |         record = GameRecord()
237 |         i = 0
238 |         while i < sample_num:
239 |             color = np.random.random_integers(0, 1) * 2 - 1
240 |             board = self._empty_board()
241 |             pos_list, fix_pos_list = self._generate_consecutive_line(consecutive_num=4)
242 |             if len(fix_pos_list) == 0 or len(fix_pos_list) == 1:
243 |                 continue
244 | 
245 |             fix_pos_list.append(list(pos_list[2]))
246 | 
247 |             for x, y in pos_list:
248 |                 board[x][y] = color
249 |             board[pos_list[2][0]][pos_list[2][1]] = 0
250 | 
251 |             pi = np.array([0.0 for _ in range(self._board_size ** 2)])
252 |             ind_1 = coordinate2index(fix_pos_list[0], self._board_size)
253 |             ind_2 = coordinate2index(fix_pos_list[1], self._board_size)
254 |             ind_3 = coordinate2index(fix_pos_list[2], self._board_size)
255 |             pi[ind_1], pi[ind_2], pi[ind_3] = 0.25, 0.25, 0.5
256 | 
257 |             self._add_noise(board=board, next_player=-color, max_stone_num=self._max_noise_stone_num,
258 |                             fix_pos_list=fix_pos_list)
259 | 
260 |             record.add(obs=board, color=-color, last_move=pos_list[1], pi=pi,
261 |                        z=0)  # last move should be next to an empty position
262 |             i += 1
263 |         return record
264 | 
265 |     def _generate_consecutive_line(self, consecutive_num):
266 |         start_pos = np.random.random_integers(0, self._board_size - 1, 2)
267 |         end_pos = [-1, -1]
268 |         while end_pos[0] < 0 or end_pos[0] > 14 or end_pos[1] < 0 or end_pos[1] > 14:
269 |             dx, dy = list(np.random.random_integers(-1, 1, 2))
270 |             if dx == 0 and dy == 0:
271 |                 continue
272 |             end_pos[0] = start_pos[0] + (consecutive_num - 1) * dx
273 |             end_pos[1] = start_pos[1] + (consecutive_num - 1) * dy
274 |         fix_pos_list = []
275 |         if dx == 0:
276 |             x_list = [start_pos[0]] * consecutive_num
277 |         else:
278 |             x_list = list(range(start_pos[0], end_pos[0] + dx, dx))
279 |         if dy == 0:
280 |             y_list = [start_pos[1]] * consecutive_num
281 |         else:
282 |             y_list = list(range(start_pos[1], end_pos[1] + dy, dy))
283 | 
284 |         fp_1 = [start_pos[0] - dx, start_pos[1] - dy]
285 |         if fp_1[0] in list(range(0, self._board_size)) and fp_1[1] in list(range(0, self._board_size)):
286 |             fix_pos_list.append(fp_1)
287 |         fp_2 = [end_pos[0] + dx, end_pos[1] + dy]
288 |         if fp_2[0] in list(range(0, self._board_size)) and fp_2[1] in list(range(0, self._board_size)):
289 |             fix_pos_list.append(fp_2)
290 | 
291 |         pos_list = list(zip(x_list, y_list))
292 |         return pos_list, fix_pos_list
293 | 
294 |     def _empty_board(self):
295 |         empty_board = [[0 for _ in range(self._board_size)] for _ in range(self._board_size)]
296 |         return np.array(empty_board)
297 | 
298 |     def _add_noise(self, board, next_player, max_stone_num, fix_pos_list):
299 |         stone_num = np.random.random_integers(30, max_stone_num)
300 |         black_stone_ind = np.where(board == BLACK)
301 |         white_stone_ind = np.where(board == WHITE)
302 |         black_stone_num = len(black_stone_ind[0])
303 |         white_stone_num = len(white_stone_ind[0])
304 |         black_origin, white_origin = black_stone_num, white_stone_num
305 | 
306 |         delta = black_stone_num - white_stone_num
307 |         # 假设下一步轮到黑棋走，要放x个黑棋，y个白棋，则x+b=y+w, x+y=stone_num
308 |         # x-y=-delta, 2x=stone_num-delta
309 |         # 假设下一步轮到白棋走，要放x个黑棋，y个白棋，则x+b+1=y+w, x+y=stone_num
310 |         # x-y=-delta-1, 2x=stone_num-delta-1
311 | 
312 |         if next_player == BLACK:
313 |             black_stone_num = int((stone_num - delta) / 2)
314 |             white_stone_num = black_stone_num + delta
315 |             if black_stone_num + black_origin > white_stone_num + white_origin:
316 |                 white_stone_num += 1
317 |         else:
318 |             black_stone_num = int((stone_num - delta - 1) / 2)
319 |             white_stone_num = black_stone_num + delta
320 |             if black_stone_num + black_origin == white_stone_num + white_origin:
321 |                 black_stone_num += 1
322 | 
323 |         while white_stone_num > 0:
324 |             pos = list(np.random.random_integers(0, self._board_size - 1, 2))
325 |             if board[pos[0]][pos[1]] == 0 and pos not in fix_pos_list:
326 |                 white_stone_num -= 1
327 |                 board[pos[0]][pos[1]] = WHITE
328 | 
329 |         while black_stone_num > 0:
330 |             pos = list(np.random.random_integers(0, self._board_size - 1, 2))
331 |             if board[pos[0]][pos[1]] == 0 and pos not in fix_pos_list:
332 |                 black_stone_num -= 1
333 |                 board[pos[0]][pos[1]] = BLACK
334 | 


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/human_15_col.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_col.npy


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/human_15_last_move.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_last_move.npy


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/human_15_obs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_obs.npy


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/human_15_pi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_pi.npy


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/human_15_z.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/human_15_z.npy


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/human_play_data/save human play data here.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/human_play_data/save human play data here.txt


--------------------------------------------------------------------------------
/AlphaGomoku/dataset/self_play_data/save self play data here.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/dataset/self_play_data/save self play data here.txt


--------------------------------------------------------------------------------
/AlphaGomoku/env.py:
--------------------------------------------------------------------------------
  1 | from . import *
  2 | from .dataset.dataset import *
  3 | # import matplotlib.pyplot as plt
  4 | import os
  5 | import re
  6 | from .rules import *
  7 | from .utils import *
  8 | import shutil
  9 | 
 10 | 
 11 | class Env:
 12 |     def __init__(self, conf):
 13 |         if not display_mode:
 14 |             conf['display'] = False
 15 |             print('> error: display mode is not available (requires pygame and threading)')
 16 | 
 17 |         self._conf = conf
 18 |         self._is_self_play = conf['is_self_play']
 19 |         self._show_score = conf['show_score']
 20 | 
 21 |         self._value_list = []
 22 |         self._loss_list = []
 23 | 
 24 |         self._network_version = self._get_model_version()
 25 |         self._agent_1_ver = 0
 26 |         self._agent_2_ver = 0
 27 | 
 28 |         self._evaluator_agent = FastAgent(color=BLACK)
 29 |         self._epoch = conf['epoch']
 30 |         self._sample_percentage = conf['sample_percentage']
 31 |         self._games_num = conf['games_num']
 32 |         self._evaluate_games_num = conf['evaluate_games_num']
 33 |         self._renderer = None
 34 | 
 35 |         # Training
 36 |         if conf['mode'] in [0, 1, 6, 7, 13]:
 37 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=True)
 38 |             self._agent_2 = None
 39 |         # AI vs Human
 40 |         if conf['mode'] == 2:
 41 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False)
 42 |             self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size'])
 43 | 
 44 |         if conf['mode'] == 2.5:
 45 |             self._agent_1 = HumanAgent(self._renderer, color=BLACK, board_size=conf['board_size'])
 46 |             self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False)
 47 |         # Human vs Human
 48 |         if conf['mode'] == 3 or conf['mode'] == 5:
 49 |             self._agent_1 = HumanAgent(self._renderer, color=BLACK, board_size=conf['board_size'])
 50 |             self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size'])
 51 | 
 52 |         if conf['mode'] == 4:
 53 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False)
 54 |             # self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False)
 55 |             self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False)
 56 |             # self._agent_1, self._agent_2 = self._agent_2, self._agent_1
 57 | 
 58 |         if conf['mode'] == 8:
 59 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False)
 60 |             self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size'])
 61 | 
 62 |         if conf['mode'] == 9:
 63 |             self._agent_1 = FastAgent(color=BLACK)
 64 |             self._agent_2 = HumanAgent(self._renderer, color=WHITE, board_size=conf['board_size'])
 65 | 
 66 |         if conf['mode'] == 10:
 67 |             # self._agent_1 = FastAgent(color=BLACK)
 68 |             # self._agent_2 = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False)
 69 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=False)
 70 |             self._agent_2 = FastAgent(color=WHITE)
 71 | 
 72 |         if conf['mode'] == 12:
 73 |             self._agent_1 = FastAgent(color=BLACK)
 74 |             self._agent_2 = FastAgent(color=WHITE)
 75 | 
 76 |         if conf['mode'] == 11:
 77 |             self._agent_1 = MCTSAgent(conf, color=BLACK, use_stochastic_policy=True)
 78 |             self._agent_2 = FastAgent(color=WHITE)
 79 | 
 80 |         if conf['mode'] in [0, 1, 7]:
 81 |             self._agent_eval = MCTSAgent(conf, color=WHITE, use_stochastic_policy=False)
 82 |             self._agent_eval.set_self_play(False)
 83 | 
 84 |         if self._is_self_play:
 85 |             self._agent_2 = self._agent_1
 86 | 
 87 |         self._rules = Rules(conf)
 88 |         self._renderer = Renderer(conf['screen_size'], conf['board_size']) if conf['display'] else None
 89 |         self._board = Board(self._renderer, conf['board_size'])
 90 |         self._conf = conf
 91 | 
 92 |         if type(self._agent_1) == HumanAgent:
 93 |             self._agent_1.set_renderer(renderer=self._renderer)
 94 |         if type(self._agent_2) == HumanAgent:
 95 |             self._agent_2.set_renderer(renderer=self._renderer)
 96 | 
 97 |     def start_mode(self):
 98 |         mode = self._conf['mode']
 99 |         if mode == 1 or mode == 0:
100 |             self.train()
101 |         if mode in [2, 2.5, 3, 9, 10]:
102 |             self.run(use_stochastic_policy=False)
103 |         if mode == 4:
104 |             self.compare(game_num=50)
105 |         if mode == 5:
106 |             self.collect_human_data()
107 |         if mode in [6, 12]:
108 |             self.collect_self_play_data()
109 |         if mode == 7:
110 |             self.train_on_external_data()
111 |         if mode == 8:
112 |             self.collect_human_vs_ai_data()
113 |         if mode == 11:
114 |             self.train_on_generated_data()
115 |         if mode == 13:
116 |             self.self_play_and_train()
117 | 
118 |     def set_mcts_agent_version(self, agent_1_ver, agent_2_ver):
119 |         self._agent_1_ver = agent_1_ver
120 |         self._agent_2_ver = agent_2_ver
121 |         self._agent_1 = MCTSAgent(self._conf, color=BLACK, use_stochastic_policy=False, specify_model_ver=agent_1_ver)
122 |         self._agent_2 = MCTSAgent(self._conf, color=WHITE, use_stochastic_policy=False, specify_model_ver=agent_2_ver)
123 | 
124 |     @log
125 |     def run(self, use_stochastic_policy, record=None):
126 |         if type(self._agent_1) == MCTSAgent:
127 |             self._agent_1.set_stochastic_policy(use_stochastic_policy)
128 |         if type(self._agent_2) == MCTSAgent:
129 |             self._agent_2.set_stochastic_policy(use_stochastic_policy)
130 | 
131 |         self._value_list = []
132 |         Node.count = 0
133 |         max_score = 0
134 | 
135 |         while True:
136 |             if self._is_self_play:
137 |                 self._agent_1.color = self._board.current_player()
138 | 
139 |             # input.obs: current board
140 |             # input.action: the last move of current board
141 |             # input.stone_num: the stone num of current board
142 |             # output.action: the action given by current agent
143 |             # output.pi: the action distribution given by current agent, it will be added in the game record
144 |             # output.prior_prob: the prior probability of this action given by the neural network of current agent
145 |             # output.value: the winning rate given by the current agent
146 |             action, pi, prior_prob, value = self._current_agent().play(obs=self._obs(), action=self._board.last_move(),
147 |                                                                        stone_num=self._board.stone_num())
148 | 
149 |             # show score: an agent will work as an evaluator, giving its evaluation of each possible position
150 |             if self._show_score:
151 |                 try:
152 |                     legal_moves = list(value.keys())  # here value is score_dict
153 |                     score_list = [value[legal_moves[i]] for i in range(len(legal_moves))]
154 |                     self._board.show_scores(action_list=legal_moves, score_list=score_list)
155 |                     prior_prob, value = None, None
156 |                 except AttributeError:
157 |                     print('> using evaluator agent')
158 |                     legal_moves = self._evaluator_agent.generate(obs=self._obs(), all=True)
159 |                     score_list = list()
160 |                     for i in range(len(legal_moves)):
161 |                         x, y = legal_moves[i]
162 |                         temp_board = np.copy(self._obs())
163 |                         temp_board[x][y] = self._current_agent().color
164 |                         self._evaluator_agent.color = self._current_agent().color
165 |                         score_atk, score_def = self._evaluator_agent.evaluate(temp_board)
166 |                         print('pos:', (x, y), ' atk:', score_atk, ' def:', score_def)
167 |                         score = score_atk if score_atk > score_def else -score_def
168 |                         score_list.append(score)
169 |                     self._board.show_scores(action_list=legal_moves, score_list=score_list)
170 |                 try:
171 |                     max_score = max(max(score_list), -min(score_list))
172 |                 except:
173 |                     max_score = 0
174 |             else:
175 |                 if type(value) is dict:
176 |                     prior_prob, value = None, None
177 | 
178 |             # show info
179 |             if prior_prob is None:
180 |                 info = '1_2'
181 |             else:
182 |                 prior_prob = str(round(float(prior_prob), 3))
183 |                 value = str(round(-value, 3))
184 |                 # now value indicates the winning rate of the last player of the current observation
185 |                 info = prior_prob + '_' + value
186 | 
187 |             result = self._check_rules(action)
188 | 
189 |             if self._conf['mode'] == 12 and self._board.stone_num() >= 30 and max_score < score_3_live:
190 |                 result = 'draw'
191 | 
192 |             if result == 'continue':
193 |                 if record is not None:
194 |                     record.add(self._obs(), self._board.current_player(), self._board.last_move(), pi)
195 | 
196 |                 # self._evaluator_agent.color = self._board.current_player()
197 |                 self._board.move(self._board.current_player(), action, info)
198 |                 # print(self._evaluator_agent.evaluate(self._obs()))
199 | 
200 |                 if value is not None:
201 |                     self._value_list.append(float(value))
202 |                 if len(self._value_list) >= 5 and self._board.stone_num() >= 30:
203 |                     if self._conf['mode'] in [2, 2.5] and sum(list(map(np.abs, self._value_list[-5:]))) < 0.06:
204 |                         self._value_list = []
205 |                         if ask_for_draw() == 1:
206 |                             show_result(2, 'draw')
207 |                             time.sleep(20)
208 |                             break
209 | 
210 |             if result == 'occupied':
211 |                 print(result + ': ' + str(action))
212 |                 continue
213 |             if result == 'blackwins' or result == 'whitewins' or result == 'draw':
214 |                 if record is not None:
215 |                     record.add(self._obs(), self._board.current_player(), self._board.last_move(), pi)
216 |                 self._board.move(self._board.current_player(), action, info)
217 |                 time.sleep(0.1)
218 |                 show_result(self._conf['mode'], result)
219 | 
220 |                 if record is not None:
221 |                     if result == 'blackwins':
222 |                         flag = BLACK
223 |                     if result == 'whitewins':
224 |                         flag = WHITE
225 |                     if result == 'draw':
226 |                         flag = 0
227 |                     record.set_z(flag)
228 |                 if self._conf['mode'] in [2, 2.5, 3, 4, 9]:
229 |                     time.sleep(30)
230 |                 break
231 |         self._board.clear()
232 |         print('> Node number of game tree = ' + str(Node.count))
233 |         if type(self._agent_1) == MCTSAgent:
234 |             self._agent_1.reset_mcts()
235 |         if type(self._agent_2) == MCTSAgent:
236 |             self._agent_2.reset_mcts()
237 |         if result == 'blackwins':
238 |             return BLACK
239 |         if result == 'whitewins':
240 |             return WHITE
241 |         return 0
242 | 
243 |     def train(self):
244 |         # use human play data to initialize network
245 |         if self._conf['is_supervised']:
246 |             human_play_data_set = DataSet()
247 |             human_play_data_set.load(self._conf['human_play_data_path'])
248 |             obs, col, last_move, pi, z = human_play_data_set.get_sample(1)
249 |             print('> ' + str(len(obs)) + ' positions of data loaded')
250 |             for i in range(50):
251 |                 print('supervise stage = ' + str(i + 1))
252 |                 new_obs = obs.copy()
253 |                 new_col = col.copy()
254 |                 new_last_move = last_move.copy()
255 |                 new_pi = pi.copy()
256 |                 new_z = z.copy()
257 |                 self._agent_1.train(new_obs, new_col, new_last_move, new_pi, new_z)
258 | 
259 |         self._agent_1.save_model()
260 | 
261 |         # training based on self-play
262 |         data_set = DataSet()
263 |         for epoch in range(self._epoch):
264 |             print('> epoch = ' + str(epoch + 1))
265 | 
266 |             # self-play
267 |             for i in range(self._games_num):
268 |                 record = GameRecord()
269 |                 print('> game num = ' + str(i + 1))
270 |                 self.run(use_stochastic_policy=True, record=record)
271 |                 data_set.add_record(record)
272 | 
273 |             # train
274 |             obs, col, last_move, pi, z = data_set.get_sample(self._sample_percentage)
275 |             loss = self._agent_1.train(obs, col, last_move, pi, z)
276 |             self._loss_list.append(loss)
277 | 
278 |             # evaluate
279 |             self.evaluate()
280 |             self._agent_1.save_model()
281 |             self._network_version += 1
282 |             data_set.clear()
283 |             print('> network version = ' + str(self._network_version))
284 |             print('*****************************************************')
285 | 
286 |         # save loss
287 |         hist_path = self._conf['fit_history_file'] + '_loss.txt'
288 |         with open(hist_path, 'a') as f:
289 |             f.write(str(self._loss_list))
290 |         # plot loss
291 |         # x = range(1, len(self._loss_list) + 1)
292 |         # y = self._loss_list
293 |         # plt.plot(x, y)
294 |         # plt.xlabel('epoch')
295 |         # plt.ylabel('loss')
296 |         # plt.savefig(self._conf['fit_history_file'] + str('.png'), dpi=300)
297 |         # plt.show()
298 | 
299 |     def evaluate(self):
300 |         print('> Evaluation begins:')
301 | 
302 |         # switch mode
303 |         self._is_self_play = False
304 |         self._agent_1.set_self_play(False)
305 |         self._agent_2 = self._agent_eval
306 |         self._agent_2.load_model()
307 | 
308 |         new_model_wins_num = 0
309 |         old_model_wins_num = 0
310 |         draw_num = 0
311 |         total_num = self._evaluate_games_num
312 |         end = False
313 | 
314 |         # new model plays BLACK
315 |         for i in range(int(total_num / 2)):
316 |             result = self.run(use_stochastic_policy=True, record=None)
317 |             if result == BLACK:
318 |                 new_model_wins_num += 1
319 |             if result == WHITE:
320 |                 old_model_wins_num += 1
321 |             if result == 0:
322 |                 draw_num += 1
323 |             print('> eval game ' + str(i + 1) + ' , score: ' + str(new_model_wins_num) + ':' + str(old_model_wins_num))
324 |             if new_model_wins_num > (total_num - draw_num) / 2:
325 |                 pass
326 |                 # end = True
327 |                 # break
328 |             if old_model_wins_num > (total_num - draw_num) / 2:
329 |                 pass
330 |                 # end = True
331 |                 # break
332 | 
333 |         # switch agents
334 |         self._agent_1, self._agent_2 = self._agent_2, self._agent_1
335 |         self._agent_1.color = BLACK
336 |         self._agent_2.color = WHITE
337 | 
338 |         if not end:
339 |             for i in range(int(total_num / 2)):
340 |                 result = self.run(use_stochastic_policy=True, record=None)
341 |                 if result == BLACK:
342 |                     old_model_wins_num += 1
343 |                 if result == WHITE:
344 |                     new_model_wins_num += 1
345 |                 if result == 0:
346 |                     draw_num += 1
347 |                 print('> eval game ' + str(i + 1 + int(total_num / 2)) + ' , score: ' + str(
348 |                     new_model_wins_num) + ':' + str(old_model_wins_num))
349 |                 if new_model_wins_num > (total_num - draw_num) / 2:
350 |                     pass
351 |                     # break
352 |                 if old_model_wins_num > (total_num - draw_num) / 2:
353 |                     pass
354 |                     # break
355 | 
356 |         # so far self._agent_1 -> self._agent_eval
357 | 
358 |         self._agent_1 = self._agent_2
359 |         self._agent_1.color = BLACK
360 |         self._agent_1.set_self_play(True)
361 |         self._is_self_play = True
362 | 
363 |         if new_model_wins_num == 0:
364 |             rate = 0
365 |         else:
366 |             rate = new_model_wins_num / (new_model_wins_num + old_model_wins_num)
367 |         print('> winning rate of new model = ' + str(rate))
368 |         if rate > 0.5:
369 |             print('> New model adopted')
370 |             return True
371 |         else:
372 |             print('> New model discarded')
373 |             return False
374 | 
375 |     def collect_human_data(self):
376 |         human_data_set = DataSet()
377 |         human_data_set.load(self._conf['human_play_data_path'])
378 | 
379 |         for i in range(self._games_num):
380 |             record = GameRecord()
381 |             print('> game num = ' + str(i + 1))
382 |             self.run(use_stochastic_policy=False, record=record)
383 |             human_data_set.add_record(record)
384 |             human_data_set.save(self._conf['human_play_data_path'])
385 | 
386 |     def collect_human_vs_ai_data(self):
387 |         data_set = DataSet()
388 |         data_set.load(self._conf['human_play_data_path'])
389 | 
390 |         for i in range(self._games_num):
391 |             record = GameRecord()
392 |             print('> game num = ' + str(i + 1))
393 |             self.run(use_stochastic_policy=False, record=record)
394 |             data_set.add_record(record)
395 |             if i % 10 == 0:
396 |                 data_set.save(self._conf['human_play_data_path'])
397 | 
398 |         data_set.save(self._conf['human_play_data_path'])
399 | 
400 |     def collect_self_play_data(self):
401 |         name = os.getenv('computername') + str(os.getpid())
402 |         for epoch in range(self._epoch):
403 |             print('> epoch = ' + str(epoch + 1))
404 |             data_set = DataSet()
405 |             path = self._conf['self_play_data_path'] + str(epoch + 1) + '_' + str(name) + '_'
406 |             for i in range(self._games_num):
407 |                 record = GameRecord()
408 |                 print('> game num = ' + str(i + 1))
409 |                 self.run(use_stochastic_policy=True, record=record)
410 |                 data_set.add_record(record)
411 |                 data_set.save(path)
412 |             data_set.save(path)
413 | 
414 |     def pack_external_data_set(self, name):
415 |         path = self._conf['self_play_data_path'] + str(0) + '_' + str(name) + '_'
416 |         data_set = self.get_external_data_set()
417 |         data_set.save(path)
418 | 
419 |     def get_external_data_set(self):
420 |         root, prefix = os.path.split(self._conf['self_play_data_path'])
421 |         postfix_pattern = r'self\_play\_15\_\d+\_[0-9a-zA-Z\_\-]+\_col\.npy'
422 |         last_path = ''
423 |         external_data_set = DataSet()
424 |         count = 0
425 |         obs_list, col_list, last_move_list, pi_list, z_list = [], [], [], [], []
426 |         for filename in os.listdir(root):
427 |             if re.match(postfix_pattern, filename):
428 |                 path = root + '/' + filename
429 |                 path = path[0:-7]
430 |                 if path != last_path:
431 |                     print('> data no.' + str(count + 1))
432 |                     count += 1
433 |                     print('> external data path = ' + path)
434 |                     last_path = path
435 |                     external_data_set.load(path)
436 |                     new_obs, new_col, new_last_move, new_pi, new_z = external_data_set.get_sample(1)
437 |                     obs_list.extend(new_obs)
438 |                     col_list.extend(new_col)
439 |                     last_move_list.extend(new_last_move)
440 |                     pi_list.extend(new_pi)
441 |                     z_list.extend(new_z)
442 |                     external_data_set.clear()
443 |         record = GameRecord()
444 |         record.add_list(obs_list, col_list, last_move_list, pi_list, z_list)
445 |         external_data_set.add_record(record)
446 |         return external_data_set
447 | 
448 |     def train_on_external_data(self):
449 |         external_data_set = self.get_external_data_set()
450 |         obs, col, last_move, pi, z = external_data_set.get_sample(1)
451 |         self._agent_1.train(obs, col, last_move, pi, z)
452 |         self._agent_1.save_model()
453 |         latest_version = self.backup_model()
454 |         print('> current version: ' + str(latest_version))
455 | 
456 |     def _obs(self):
457 |         return self._board.board()
458 | 
459 |     def _current_agent(self):
460 |         if self._board.current_player() == BLACK:
461 |             return self._agent_1
462 |         else:
463 |             return self._agent_2
464 | 
465 |     def _check_rules(self, action):
466 |         return self._rules.check_rules(self._board.board(), action, self._board.current_player())
467 | 
468 |     # step 1. train on generated game record
469 |     # step 2. train on self-play data generated by fast AI
470 |     # step 3. if MCTS Agent is stronger than fast AI, then begin to train on self-play games
471 |     #         if MCTS Agent degenerated, go back to step 2
472 | 
473 |     def get_generated_data_set(self, sample_num=20000):
474 |         gen = Generator(self._conf['board_size'], max_noise_stone_num=64)
475 |         gen_data_set = DataSet()
476 | 
477 |         record_1 = gen.generate_live_3_oo_o_attack(sample_num=sample_num)
478 |         gen_data_set.add_record(record_1)
479 |         record_2 = gen.generate_live_3_oo_o_defend(sample_num=sample_num)
480 |         gen_data_set.add_record(record_2)
481 |         record_3 = gen.generate_live_3_ooo_attack(sample_num=sample_num)
482 |         gen_data_set.add_record(record_3)
483 |         record_4 = gen.generate_live_3_ooo_defend(sample_num=sample_num)
484 |         gen_data_set.add_record(record_4)
485 |         record_5 = gen.generate_live_4_attack(sample_num=sample_num)
486 |         gen_data_set.add_record(record_5)
487 |         record_6 = gen.generate_live_4_defend(sample_num=sample_num)
488 |         gen_data_set.add_record(record_6)
489 |         record_7 = gen.generate_dead_4_oo_oo_defend(sample_num=sample_num)
490 |         gen_data_set.add_record(record_7)
491 |         record_8 = gen.generate_dead_4_ooo_o_defend(sample_num=sample_num)
492 |         gen_data_set.add_record(record_8)
493 | 
494 |         gen_data_set.save(self._conf['generated_data_path'])
495 |         return gen_data_set
496 | 
497 |     def train_on_generated_data(self):
498 |         gen_data_set = DataSet()
499 |         # gen_data_set.load(self._conf['generated_data_path'])
500 |         gen = Generator(self._conf['board_size'], max_noise_stone_num=128)
501 |         record_2 = gen.generate_live_3_oo_o_defend(sample_num=10000)
502 |         record_4 = gen.generate_live_3_ooo_defend(sample_num=10000)
503 |         gen_data_set.add_record(record_4)
504 |         gen_data_set.add_record(record_2)
505 |         obs, col, last_move, pi, z = gen_data_set.get_sample(0.1, shuffle=True)
506 |         self._agent_1.train(obs, col, last_move, pi, z)
507 |         self._agent_1.save_model()
508 | 
509 |     def compare(self, game_num=20):
510 |         agent_1_win_num, agent_2_win_num = 0, 0
511 | 
512 |         for i in range(int(game_num / 2)):
513 |             result = self.run(use_stochastic_policy=False)
514 |             if result == BLACK:
515 |                 agent_1_win_num += 1
516 |             if result == WHITE:
517 |                 agent_2_win_num += 1
518 | 
519 |         self._agent_1, self._agent_2 = self._agent_2, self._agent_1
520 | 
521 |         for i in range(int(game_num / 2)):
522 |             result = self.run(use_stochastic_policy=False)
523 |             if result == WHITE:
524 |                 agent_2_win_num += 1
525 |             if result == BLACK:
526 |                 agent_1_win_num += 1
527 | 
528 |         result = "> ver." + str(self._agent_1_ver) + " : ver." + str(self._agent_2_ver) + ' = ' + str(
529 |             agent_1_win_num) + " : " + str(agent_2_win_num)
530 |         print(result)
531 |         send_email_report(to_addr="kennyxz@live.cn", content=result)
532 | 
533 |     def _get_model_version(self):
534 |         root = 'AlphaGomoku/network/model/'
535 |         postfix_pattern = r'model\_b\_15\_ver\_[0-9]+\.h5'
536 |         max_num = 0
537 |         for filename in os.listdir(root):
538 |             if re.match(postfix_pattern, filename):
539 |                 num = int(filename[15:-3])
540 |                 if num > max_num:
541 |                     max_num = num
542 |         return max_num
543 | 
544 |     def backup_model(self):
545 |         root = 'AlphaGomoku/network/model/'
546 |         max_num = self._get_model_version()
547 |         path_ob = root + 'model_b_15.h5'
548 |         path_ow = root + 'model_w_15.h5'
549 |         path_b = root + 'model_b_15_ver_' + str(max_num + 1) + '.h5'
550 |         path_w = root + 'model_w_15_ver_' + str(max_num + 1) + '.h5'
551 |         shutil.copy(path_ob, path_b)
552 |         shutil.copy(path_ow, path_w)
553 |         info = '> model ver. ' + str(max_num + 1) + ' saved.'
554 |         print(info)
555 |         send_email_report(to_addr="kennyxz@live.cn", content=info)
556 |         return max_num + 1
557 | 
558 |     def self_play_and_train(self):
559 |         while True:
560 |             self.collect_self_play_data()
561 |             self.train_on_external_data()
562 | 
563 |     def temp(self):
564 |         mcts_win_num, fast_win_num = 0, 0
565 |         for i in range(50):
566 |             result = self.run(use_stochastic_policy=False, record=None)
567 |             if result is BLACK:
568 |                 mcts_win_num += 1
569 |             if result is WHITE:
570 |                 fast_win_num += 1
571 |             print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num)
572 |         self._agent_1 = FastAgent(color=BLACK)
573 |         self._agent_2 = MCTSAgent(self._conf, color=WHITE, use_stochastic_policy=False)
574 |         for i in range(50):
575 |             result = self.run(use_stochastic_policy=False, record=None)
576 |             if result is BLACK:
577 |                 fast_win_num += 1
578 |             if result is WHITE:
579 |                 mcts_win_num += 1
580 |             print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num)
581 |         print("MCTS Agent:Fast Agent = ", mcts_win_num, ":", fast_win_num)
582 | 


--------------------------------------------------------------------------------
/AlphaGomoku/network/__init__.py:
--------------------------------------------------------------------------------
1 | from .network import *
2 | 


--------------------------------------------------------------------------------
/AlphaGomoku/network/history/log_15_2_512.txt:
--------------------------------------------------------------------------------
1 | {'activation_9_loss': [3.551663814544678, 2.9409815425872803], 'loss': [4.1677450180053714, 3.250189147949219], 'activation_12_loss': [0.5576915459632874, 0.2508242372274399]}{'activation_21_loss': [3.0998087692260743, 1.9231944646835326], 'activation_24_loss': [0.5244228768348694, 0.21378136962652206], 'loss': [3.681065465927124, 2.1938106298446653]}


--------------------------------------------------------------------------------
/AlphaGomoku/network/history/save training history here.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/history/save training history here.txt


--------------------------------------------------------------------------------
/AlphaGomoku/network/model/log.txt:
--------------------------------------------------------------------------------
1 | ver 11: add decay of winning rate


--------------------------------------------------------------------------------
/AlphaGomoku/network/model/model_b_15.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/model/model_b_15.h5


--------------------------------------------------------------------------------
/AlphaGomoku/network/model/model_w_15.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/network/model/model_w_15.h5


--------------------------------------------------------------------------------
/AlphaGomoku/network/network.py:
--------------------------------------------------------------------------------
  1 | from keras.engine.topology import Input
  2 | from keras.engine.training import Model
  3 | from keras.layers import add
  4 | from keras.layers.convolutional import Conv2D
  5 | from keras.layers.core import Activation, Dense, Flatten
  6 | from keras.layers.normalization import BatchNormalization
  7 | from keras.regularizers import l2
  8 | from keras.optimizers import SGD
  9 | from ..rules import *
 10 | from ..utils import *
 11 | import numpy as np
 12 | import os
 13 | 
 14 | 
 15 | class Network:
 16 |     def __init__(self, conf):
 17 |         # All hyperparameters used in the model
 18 |         self._board_size = conf['board_size']  # the size of the playing board
 19 |         self._lr = conf['learning_rate']  # learning rate of SGD (2e-3)
 20 |         self._momentum = conf['momentum']  # nesterov momentum (1e-1)
 21 |         self._l2_coef = conf['l2']  # coefficient of L2 penalty (1e-4)
 22 |         self._mini_batch_size = conf['mini_batch_size']  # the size of batch when training the network
 23 |         self._fit_epochs = conf['fit_epochs']  # the number of iteration
 24 | 
 25 |         # Define Network
 26 |         self._build_network()
 27 | 
 28 |         # the following three lines are for a special bug, see also: https://www.jianshu.com/p/c84ae0527a3f
 29 |         temp_board = np.array([[0 for _ in range(conf['board_size'])] for _ in range(conf['board_size'])])
 30 |         temp_board[0][0] = BLACK
 31 |         print('> testing network ...')
 32 |         self.predict(board=temp_board, color=WHITE, last_move=(0, 0))
 33 |         print('> test finished.')
 34 | 
 35 |         # The location of the file which stores the parameters of the network
 36 |         self._net_para_file = conf['net_para_file']
 37 |         self._fit_history_file = conf['fit_history_file']
 38 | 
 39 |         # Whether we use previous model or not
 40 |         self._use_previous_model = conf['use_previous_model']
 41 |         if self._use_previous_model:
 42 |             if os.path.exists(self._net_para_file):
 43 |                 self._model.load_weights(self._net_para_file)
 44 |             else:
 45 |                 print('> error: [use_previous_model] = True, ' + self._net_para_file + ' not found')
 46 | 
 47 |     @log
 48 |     def _build_network(self):
 49 |         # Input_Layer
 50 |         init_x = Input((3, self._board_size, self._board_size))  # the input is a tensor with the shape 3*(15*15)
 51 |         x = init_x
 52 | 
 53 |         # First Convolutional Layer with 32 filters
 54 |         x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
 55 |                    data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
 56 |         x = BatchNormalization()(x)
 57 |         x = Activation('relu')(x)
 58 | 
 59 |         # Three Residual Blocks
 60 |         for _ in range(3):
 61 |             x = self._residual_block(x)
 62 | 
 63 |         # Policy Head for generating prior probability vector for each action
 64 |         policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same',
 65 |                         data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
 66 |         policy = BatchNormalization()(policy)
 67 |         policy = Activation('relu')(policy)
 68 |         policy = Flatten()(policy)
 69 |         policy = Dense(self._board_size * self._board_size, kernel_regularizer=l2(self._l2_coef))(policy)
 70 |         self._policy = Activation('softmax')(policy)
 71 | 
 72 |         # Value Head for generating value of each action
 73 |         value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same',
 74 |                        data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
 75 |         value = BatchNormalization()(value)
 76 |         value = Activation('relu')(value)
 77 |         value = Flatten()(value)
 78 |         value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value)
 79 |         value = Activation('relu')(value)
 80 |         value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value)
 81 |         self._value = Activation('tanh')(value)
 82 | 
 83 |         # Define Network
 84 |         self._model = Model(inputs=init_x, outputs=[self._policy, self._value])
 85 | 
 86 |         # Define the Loss Function
 87 |         opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True)  # stochastic gradient descend with momentum
 88 |         losses_type = ['categorical_crossentropy', 'mean_squared_error']  # cross-entrophy and MSE are weighted equally
 89 |         self._model.compile(optimizer=opt, loss=losses_type)
 90 | 
 91 |     def _residual_block(self, x):
 92 |         x_shortcut = x
 93 |         x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
 94 |                    data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
 95 |         x = BatchNormalization()(x)
 96 |         x = Activation('relu')(x)
 97 |         x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
 98 |                    data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
 99 |         x = BatchNormalization()(x)
100 |         x = add([x, x_shortcut])  # Skip Connection
101 |         x = Activation('relu')(x)
102 |         return x
103 | 
104 |     def predict(self, board, color, last_move):
105 |         if sum(sum(board)) == 0 and color == WHITE:
106 |             print('error: network.predict')
107 |         if sum(sum(board)) == 1 and color == BLACK:
108 |             print('error: network.predict')
109 |         tensor = board2tensor(board, color, last_move)
110 |         policy, value_tensor = self._model.predict_on_batch(tensor)
111 |         value = value_tensor[0][0]
112 |         return policy, value
113 | 
114 |     def train(self, board_list, color_list, last_move_list, pi_list, z_list):
115 |         size = len(color_list)
116 |         for i in range(size):
117 |             if sum(sum(board_list[i])) == 0 and color_list[i] == WHITE:
118 |                 print('error: network.train')
119 |                 print(board_list[i])
120 |                 print(color_list[i])
121 |             if sum(sum(board_list[i])) == 1 and color_list[i] == BLACK:
122 |                 print('error: network.train')
123 |                 print(board_list[i])
124 |                 print(color_list[i])
125 | 
126 |         # Data Augmentation through symmetric and self-rotation transformation
127 |         board_aug = []
128 |         color_aug = []
129 |         last_move_aug = []
130 |         pi_aug = []
131 |         z_aug = []
132 |         for i in range(len(board_list)):
133 |             new_board, new_color, new_last_move, new_pi, new_z = \
134 |                 data_augmentation(board_list[i], color_list[i], last_move_list[i], pi_list[i], z_list[i])
135 |             board_aug.extend(new_board)
136 |             color_aug.extend(new_color)
137 |             last_move_aug.extend(new_last_move)
138 |             pi_aug.extend(new_pi)
139 |             z_aug.extend(new_z)
140 |         board_list.extend(board_aug)
141 |         color_list.extend(color_aug)
142 |         last_move_list.extend(last_move_aug)
143 |         pi_list.extend(pi_aug)
144 |         z_list.extend(z_aug)
145 | 
146 |         # Regularize Data
147 |         board_list = np.array([board2tensor(board_list[i], color_list[i], last_move_list[i], reshape_flag=False)
148 |                                for i in range(len(board_list))])
149 |         pi_list = np.array(pi_list)
150 |         z_list = np.array(z_list)
151 | 
152 |         # Training
153 |         hist = self._model.fit(board_list, [pi_list, z_list], epochs=self._fit_epochs, batch_size=self._mini_batch_size,
154 |                                verbose=1)
155 |         hist_path = self._fit_history_file + '_' + str(self._fit_epochs) + '_' + str(self._mini_batch_size) + '.txt'
156 |         with open(hist_path, 'a') as f:
157 |             f.write(str(hist.history))
158 |             return hist.history['loss'][0]  # only sample loss of first epoch
159 | 
160 |     def get_para(self):
161 |         net_para = self._model.get_weights()
162 |         return net_para
163 | 
164 |     def save_model(self):
165 |         """ save model para to file """
166 |         self._model.save_weights(self._net_para_file)
167 | 
168 |     def load_model(self):
169 |         if os.path.exists(self._net_para_file):
170 |             self._model.load_weights(self._net_para_file)
171 |         else:
172 |             print('> error: ' + self._net_para_file + ' not found')
173 | 
174 | 
175 | # Transform a board(matrix) to a tensor
176 | def board2tensor(board, color, last_move, reshape_flag=True):
177 |     # Current-Stone Layer
178 |     cur = np.array(np.array(board) == color, dtype=np.int)
179 | 
180 |     # Enemy-Stone Layer
181 |     e = np.array(np.array(board) == -color, dtype=np.int)
182 | 
183 |     # Last Step Layer
184 |     l = np.zeros((board.shape[0], board.shape[1]))
185 |     if last_move is not None:
186 |         l[last_move[0]][last_move[1]] = 1
187 | 
188 |     # Color Layer
189 |     # flag = (1 if color == BLACK else 0)
190 |     # c = flag * np.ones((board.shape[0], board.shape[1]))
191 | 
192 |     # Stack cur,e,c into tensor
193 |     tensor = np.array([cur, e, l])
194 |     if reshape_flag:
195 |         tensor = tensor.reshape(1, tensor.shape[0], tensor.shape[1], tensor.shape[2])
196 |     return tensor
197 | 
198 | 
199 | # Augment the training data pool through plane transformation
200 | def data_augmentation(board, color, last_move, pi, z):
201 |     new_board = []
202 |     new_color = [color] * 7
203 |     new_last_move = []
204 |     new_pi = []
205 |     new_z = [z] * 7
206 |     for type in range(1, 8):
207 |         board_t = board_transform(board, type, flag=1)
208 |         last_move_t = coordinate_transform(last_move, type, board.shape[0], flag=1)
209 |         pi_t = input_encode(pi, type, board.shape[0])
210 |         new_board.append(board_t)
211 |         new_last_move.append(last_move_t)
212 |         new_pi.append(pi_t)
213 |     return new_board, new_color, new_last_move, new_pi, new_z
214 | 
215 | 
216 | # Transform the input vector given transformation type
217 | def input_encode(vec, num, size):
218 |     mat = np.reshape(vec, (size, size))  # reshape vector into matrix
219 |     mat = board_transform(mat, num, flag=1)
220 |     vec = np.reshape(mat, (1, size ** 2))
221 |     return vec[0]
222 | 
223 | 
224 | # Transform the output vector to its initial shape given the transformation type
225 | def output_decode(vec, num, size):
226 |     mat = np.reshape(vec, (size, size))  # reshape vector into matrix
227 |     inv_mat = board_transform(mat, num, flag=2)
228 |     vec = np.reshape(inv_mat, (1, size ** 2))
229 |     return vec[0]
230 | 
231 | 
232 | def coordinate_transform(move, type, size, flag):
233 |     if move is None:
234 |         return None
235 |     board = np.zeros((size, size))
236 |     board[move[0]][move[1]] = 1
237 |     board_t = board_transform(board, type, flag)
238 |     temp = np.where(board_t == 1)
239 |     new_move = (temp[0][0], temp[1][0])
240 |     return new_move
241 | 
242 | 
243 | # Transform the input board by simple plane transformation
244 | def board_transform(mat, num, flag=0):
245 |     def R0(mat):
246 |         return mat
247 | 
248 |     def R1(mat):
249 |         mat = np.rot90(mat, 1)
250 |         return mat
251 | 
252 |     def R2(mat):
253 |         mat = np.rot90(mat, 2)
254 |         return mat
255 | 
256 |     def R3(mat):
257 |         mat = np.rot90(mat, 3)
258 |         return mat
259 | 
260 |     def S(mat):
261 |         mat = R0(np.fliplr(mat))
262 |         return mat
263 | 
264 |     def SR1(mat):
265 |         mat = R1(np.fliplr(mat))
266 |         return mat
267 | 
268 |     def SR2(mat):
269 |         mat = R2(np.fliplr(mat))
270 |         return mat
271 | 
272 |     def SR3(mat):
273 |         mat = R3(np.fliplr(mat))
274 |         return mat
275 | 
276 |     # Random Transformation
277 |     if flag == 0:
278 |         num = int(np.random.randint(8, size=1))
279 |         total_type = ['R0', 'R1', 'R2', 'R3', 'S', 'SR1', 'SR2', 'SR3']
280 |         real_type = total_type[num]
281 |         return eval(real_type)(mat), num
282 | 
283 |     # Encode
284 |     elif flag == 1:  # encode
285 |         total_type = ['R0', 'R1', 'R2', 'R3', 'S', 'SR1', 'SR2', 'SR3']
286 |         real_type = total_type[num]
287 |         return eval(real_type)(mat)
288 | 
289 |     # Decode
290 |     else:
291 |         inv_total_type = ['R0', 'R3', 'R2', 'R1', 'S', 'SR1', 'SR2', 'SR3']
292 |         real_type = inv_total_type[num]
293 |         return eval(real_type)(mat)
294 | 


--------------------------------------------------------------------------------
/AlphaGomoku/rules.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | BLACK = 1
  4 | WHITE = -1
  5 | 
  6 | 
  7 | class Rules:
  8 | 
  9 |     def __init__(self, conf):
 10 |         self._board_size = conf['board_size']
 11 |         self._board = [[0 for j in range(conf['board_size'])] for i in range(conf['board_size'])]
 12 |         self._conf = conf
 13 | 
 14 |         # The list that records the locations of all live3 and live4
 15 |         self._live3_list = []
 16 |         self._live4_list = []
 17 | 
 18 |         self._stone_number = 0
 19 | 
 20 |     def _read(self, board):
 21 |         self._board = board
 22 | 
 23 |     def board(self):
 24 |         return self._board
 25 | 
 26 |     def _count_on_direction(self, i, j, i_direction, j_direction, color):
 27 |         # Note: idirection has three options , i.e. -1,0,and 1 ,where -1 for up
 28 |         # 1 for down and 0 for unchange. jdirection also has three options -1,0,1
 29 |         # ,where -1 for left, 1 for right and 0 for unchange. We should mention that
 30 |         # idirection and jdirection can't equal to 0 at the same time.
 31 |         # We count the number of consecutive stones with the color given to function
 32 |         # in the given direction.
 33 |         count = 0
 34 |         for step in range(1,5):  # We only needs to consider stones within 4 steps
 35 |             if i + step * i_direction < 0 or i + step * i_direction > self._board_size - 1 or j + step * j_direction < 0 or j + step * j_direction > self._board_size - 1:
 36 |                 break
 37 |             if self._board[i + step * i_direction][j + step * j_direction] == color:
 38 |                 count += 1
 39 |             else:
 40 |                 break
 41 |         return count
 42 |     
 43 |     # find the maximal number of consecutive stones
 44 |     def _count_consecutive(self, i, j, color):
 45 |         # The purpose of the function is to check if the consecutive number reaches 5 so that
 46 |         # the Renju can end or if it is more than 5 so that the forbidden overline is formed(Black can't overline)
 47 |         lines = [[[0, 1], [0, -1]], [[1, -1], [-1, 1]], [[1, 0], [-1, 0]], [[-1, -1], [1, 1]]]
 48 |         counts = []
 49 |         for line in lines:
 50 |             count = 1
 51 |             for direction in line:
 52 |                 count += self._count_on_direction(i, j, direction[0], direction[1], color)
 53 |             counts.append(count)
 54 |         return 5 if 5 in counts else max(counts)
 55 |         # Note: the reason why we return this is that once 5 is found, then overline is ignored
 56 | 
 57 |     def _update_live3_list(self):
 58 |         M = -10
 59 |         N = -20
 60 |         L = -50
 61 |         feature_A = [M, 1, 1, 1, N]
 62 |         feature_B = [M, 1, 1, N, 1, L]
 63 |         feature_C = [M, 1, N, 1, 1, L]
 64 | 
 65 |         self._live3_list.clear()
 66 | 
 67 |         # A，horizontal and vertical
 68 |         for i in range(15):
 69 |             for j in range(11):
 70 |                 u = self._board[i][j : j + 5]
 71 |                 v = [self._board[k][i] for k in range(j, j + 5)]
 72 |                 flag_H = self._dot(u, feature_A)
 73 |                 flag_V = self._dot(v, feature_A)
 74 |                 if flag_H == 3:
 75 |                     pos = [[i, k] for k in range(j + 1, j + 4)]
 76 |                     self._live3_list.append(pos)
 77 |                 if flag_V == 3:
 78 |                     pos = [[k, i] for k in range(j + 1, j + 4)]
 79 |                     self._live3_list.append(pos)
 80 | 
 81 |         # A, diagonal
 82 |         for i in range(11):
 83 |             for j in range(11):
 84 |                 u = [self._board[i + k][j + k] for k in range(5)]
 85 |                 v = [self._board[i + k][14 - j - k] for k in range(5)]
 86 |                 flag_L = self._dot(u, feature_A)
 87 |                 flag_R = self._dot(v, feature_A)
 88 |                 if flag_L == 3:
 89 |                     pos = [[i + k, j + k] for k in range(1, 4)]
 90 |                     self._live3_list.append(pos)
 91 |                 if flag_R == 3:
 92 |                     pos = [[i + k, 14 - j - k] for k in range(1, 4)]
 93 |                     self._live3_list.append(pos)
 94 | 
 95 |         # B, horizontal and vertical
 96 |         for i in range(15):
 97 |             for j in range(10):
 98 |                 u = self._board[i][j: j + 6]
 99 |                 v = [self._board[k][i] for k in range(j, j + 6)]
100 |                 flag_H = self._dot(u, feature_B)
101 |                 flag_V = self._dot(v, feature_B)
102 |                 if flag_H == 3:
103 |                     pos = [[i, j + 1], [i, j + 2], [i, j + 4]]
104 |                     self._live3_list.append(pos)
105 |                 if flag_V == 3:
106 |                     pos = [[j + 1, i], [j + 2, i], [j + 4, i]]
107 |                     self._live3_list.append(pos)
108 | 
109 |         # B, diagonal
110 |         for i in range(10):
111 |             for j in range(10):
112 |                 u = [self._board[i + k][j + k] for k in range(6)]
113 |                 v = [self._board[i + k][14 - j - k] for k in range(6)]
114 |                 flag_L = self._dot(u, feature_B)
115 |                 flag_R = self._dot(v, feature_B)
116 |                 if flag_L == 3:
117 |                     pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 4, j + 4]]
118 |                     self._live3_list.append(pos)
119 |                 if flag_R == 3:
120 |                     pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 4, 10 - j]]
121 |                     self._live3_list.append(pos)
122 | 
123 |         # C, horizontal and vertical
124 |         for i in range(15):
125 |             for j in range(10):
126 |                 u = self._board[i][j: j + 6]
127 |                 v = [self._board[k][i] for k in range(j, j + 6)]
128 |                 flag_H = self._dot(u, feature_C)
129 |                 flag_V = self._dot(v, feature_C)
130 |                 if flag_H == 3:
131 |                     pos = [[i, j + 1], [i, j + 3], [i, j + 4]]
132 |                     self._live3_list.append(pos)
133 |                 if flag_V == 3:
134 |                     pos = [[j + 1, i], [j + 3, i], [j + 4, i]]
135 |                     self._live3_list.append(pos)
136 | 
137 |         # C, diagonal
138 |         for i in range(10):
139 |             for j in range(10):
140 |                 u = [self._board[i + k][j + k] for k in range(6)]
141 |                 v = [self._board[i + k][14 - j - k] for k in range(6)]
142 |                 flag_L = self._dot(u, feature_C)
143 |                 flag_R = self._dot(v, feature_C)
144 |                 if flag_L == 3:
145 |                     pos = [[i + 1, j + 1], [i + 3, j + 3], [i + 4, j + 4]]
146 |                     self._live3_list.append(pos)
147 |                 if flag_R == 3:
148 |                     pos = [[i + 1, 13 - j], [i + 3, 11 - j], [i + 4, 10 - j]]
149 |                     self._live3_list.append(pos)
150 | 
151 |     def _update_live4_list(self):
152 |         M = -100
153 |         feature_A = [M, 1, 1, 1, 1, M] # 4 or 104
154 |         feature_B = [0, 1, 1, M, 1, 1, 0] # 4 only
155 |         feature_C = [0, 1, M, 1, 1, 1, 0] # 4 only
156 |         feature_D = [0, 1, 1, 1, M, 1, 0] # 4 only
157 |         feature_E = [1, 1, 1, 1, M] # 4 only
158 |         feature_F = [M, 1, 1, 1, 1] # 4 only
159 |         feature_G = [1, 1, 1, M, 1] # 4 only
160 |         feature_H = [1, M, 1, 1, 1] # 4 only
161 |         feature_I = [1, 1, M, 1, 1] # 4 only
162 | 
163 |         self._live4_list.clear()
164 | 
165 |         # A，horizontal and vertical, feature_A = [M, 1, 1, 1, 1, M]
166 |         for i in range(15):
167 |             for j in range(10):
168 |                 u = self._board[i][j: j + 6]
169 |                 v = [self._board[k][i] for k in range(j, j + 6)]
170 |                 flag_H = self._dot(u, feature_A)
171 |                 flag_V = self._dot(v, feature_A)
172 |                 if flag_H == 4 or flag_H == 104:
173 |                     pos = [[i, k] for k in range(j + 1, j + 5)]
174 |                     self._live4_list.append(pos)
175 |                 if flag_V == 4 or flag_V == 104:
176 |                     pos = [[k, i] for k in range(j + 1, j + 5)]
177 |                     self._live4_list.append(pos)
178 | 
179 |         # A, diagonal
180 |         for i in range(10):
181 |             for j in range(10):
182 |                 u = [self._board[i + k][j + k] for k in range(6)]
183 |                 v = [self._board[i + k][14 - j - k] for k in range(6)]
184 |                 flag_L = self._dot(u, feature_A)
185 |                 flag_R = self._dot(v, feature_A)
186 |                 if flag_L == 4 or flag_L == 104:
187 |                     pos = [[i + k, j + k] for k in range(1, 5)]
188 |                     self._live4_list.append(pos)
189 |                 if flag_R == 4 or flag_R == 104:
190 |                     pos = [[i + k, 14 - j - k] for k in range(1, 5)]
191 |                     self._live4_list.append(pos)
192 | 
193 |         # B, horizontal and vertical,  feature_B = [0, 1, 1, M, 1, 1, 0]
194 |         for i in range(15):
195 |             for j in range(9):
196 |                 u = self._board[i][j: j + 7]
197 |                 v = [self._board[k][i] for k in range(j, j + 7)]
198 |                 flag_H = self._dot(u, feature_B)
199 |                 flag_V = self._dot(v, feature_B)
200 |                 if flag_H == 4:
201 |                     pos = [[i, j + 1], [i, j + 2], [i, j + 4], [i, j + 5]]
202 |                     self._live4_list.append(pos)
203 |                 if flag_V == 4:
204 |                     pos = [[j + 1, i], [j + 2, i], [j + 4, i], [j + 5, i]]
205 |                     self._live4_list.append(pos)
206 | 
207 |         # B, diagonal
208 |         for i in range(9):
209 |             for j in range(9):
210 |                 u = [self._board[i + k][j + k] for k in range(7)]
211 |                 v = [self._board[i + k][14 - j - k] for k in range(7)]
212 |                 flag_L = self._dot(u, feature_B)
213 |                 flag_R = self._dot(v, feature_B)
214 |                 if flag_L == 4:
215 |                     pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 4, j + 4], [i + 5, j + 5]]
216 |                     self._live4_list.append(pos)
217 |                 if flag_R == 4:
218 |                     pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 4, 10 - j], [i + 5, 9 - j]]
219 |                     self._live4_list.append(pos)
220 | 
221 |         # C, horizontal and vertical,  feature_C = [0, 1, M, 1, 1, 1, 0]
222 |         for i in range(15):
223 |             for j in range(9):
224 |                 u = self._board[i][j: j + 7]
225 |                 v = [self._board[k][i] for k in range(j, j + 7)]
226 |                 flag_H = self._dot(u, feature_C)
227 |                 flag_V = self._dot(v, feature_C)
228 |                 if flag_H == 4:
229 |                     pos = [[i, j + 1], [i, j + 3], [i, j + 4], [i, j + 5]]
230 |                     self._live4_list.append(pos)
231 |                 if flag_V == 4:
232 |                     pos = [[j + 1, i], [j + 3, i], [j + 4, i], [j + 5, i]]
233 |                     self._live4_list.append(pos)
234 | 
235 |         # C, diagonal
236 |         for i in range(9):
237 |             for j in range(9):
238 |                 u = [self._board[i + k][j + k] for k in range(7)]
239 |                 v = [self._board[i + k][14 - j - k] for k in range(7)]
240 |                 flag_L = self._dot(u, feature_C)
241 |                 flag_R = self._dot(v, feature_C)
242 |                 if flag_L == 4:
243 |                     pos = [[i + 1, j + 1], [i + 3, j + 3], [i + 4, j + 4], [i + 5, j + 5]]
244 |                     self._live4_list.append(pos)
245 |                 if flag_R == 4:
246 |                     pos = [[i + 1, 13 - j], [i + 3, 11 - j], [i + 4, 10 - j], [i + 5, 9 - j]]
247 |                     self._live4_list.append(pos)
248 | 
249 |         # D, horizontal and vertical,  feature_D = [0, 1, 1, 1, M, 1, 0]
250 |         for i in range(15):
251 |             for j in range(9):
252 |                 u = self._board[i][j: j + 7]
253 |                 v = [self._board[k][i] for k in range(j, j + 7)]
254 |                 flag_H = self._dot(u, feature_D)
255 |                 flag_V = self._dot(v, feature_D)
256 |                 if flag_H == 4:
257 |                     pos = [[i, j + 1], [i, j + 2], [i, j + 3], [i, j + 5]]
258 |                     self._live4_list.append(pos)
259 |                 if flag_V == 4:
260 |                     pos = [[j + 1, i], [j + 2, i], [j + 3, i], [j + 5, i]]
261 |                     self._live4_list.append(pos)
262 | 
263 |         # D, diagonal
264 |         for i in range(9):
265 |             for j in range(9):
266 |                 u = [self._board[i + k][j + k] for k in range(7)]
267 |                 v = [self._board[i + k][14 - j - k] for k in range(7)]
268 |                 flag_L = self._dot(u, feature_D)
269 |                 flag_R = self._dot(v, feature_D)
270 |                 if flag_L == 4:
271 |                     pos = [[i + 1, j + 1], [i + 2, j + 2], [i + 3, j + 3], [i + 5, j + 5]]
272 |                     self._live4_list.append(pos)
273 |                 if flag_R == 4:
274 |                     pos = [[i + 1, 13 - j], [i + 2, 12 - j], [i + 3, 11 - j], [i + 5, 9 - j]]
275 |                     self._live4_list.append(pos)
276 | 
277 |         # E feature_E = [1, 1, 1, 1, M]
278 |         # horizontal and vertical, boundry
279 |         for i in range(15):
280 |             u1 = self._board[i][0:5]
281 |             u2 = [self._board[j][i] for j in range(5)]
282 |             if self._dot(u1, feature_E) == 4:
283 |                 pos = [[i, k] for k in range(4)]
284 |                 self._live4_list.append(pos)
285 |             if self._dot(u2, feature_E) == 4:
286 |                 pos = [[k, i] for k in range(4)]
287 |                 self._live4_list.append(pos)
288 | 
289 |         # diagonal, from row 1 to row 5. Noted that there is no need for the row 11 to row 15 since
290 |         # it can be detected by feature (diagonal) except two special position. Similarly, for
291 |         # feature F there is no need for row 1 to row 5 except two special position.
292 |         # diagonal (up border)
293 |         for i in range(11):
294 |             u3 = [self._board[k][i + k] for k in range(5)]
295 |             u4 = [self._board[k][i - k + 4] for k in range(5)]
296 |             if self._dot(u3, feature_E) == 4:
297 |                 pos = [[k, i + k] for k in range(4)]
298 |                 self._live4_list.append(pos)
299 |             if self._dot(u4, feature_E) == 4:
300 |                 pos = [[k, i - k + 4] for k in range(4)]
301 |                 self._live4_list.append(pos)
302 | 
303 |         # diagonal (left and right border)
304 |         for i in range(1, 11):
305 |             u5 = [self._board[i + k][k] for k in range(5)]
306 |             u6 = [self._board[i + k][14 - k] for k in range(5)]
307 |             if self._dot(u5, feature_E) == 4:
308 |                 pos = [[i + k, k] for k in range(4)]
309 |                 self._live4_list.append(pos)
310 |             if self._dot(u6, feature_E) == 4:
311 |                 pos = [[i + k, 14 - k] for k in range(4)]
312 |                 self._live4_list.append(pos)
313 | 
314 |         # F feature_F = [M, 1, 1, 1, 1], similar to feature_E
315 |         # horizontal and vertical, right and bottom border
316 |         for i in range(15):
317 |             u1 = self._board[i][10:15]
318 |             u2 = [self._board[j][i] for j in range(10, 15)]
319 |             if self._dot(u1, feature_F) == 4:
320 |                 pos = [[i, k] for k in range(11, 15)]
321 |                 self._live4_list.append(pos)
322 |             if self._dot(u2, feature_F) == 4:
323 |                 pos = [[k, i] for k in range(11, 15)]
324 |                 self._live4_list.append(pos)
325 | 
326 |         # diagonal, bottom border
327 |         for i in range(11):
328 |             u3 = [self._board[k + 10][i + k] for k in range(5)]
329 |             u4 = [self._board[k + 10][i - k + 4] for k in range(5)]
330 |             if self._dot(u3, feature_F) == 4:
331 |                 pos = [[k + 10, i + k] for k in range(1, 5)]
332 |                 self._live4_list.append(pos)
333 |             if self._dot(u4, feature_F) == 4:
334 |                 pos = [[k + 10, i - k + 4] for k in range(1, 5)]
335 |                 self._live4_list.append(pos)
336 | 
337 |         # diagonal, left and right border
338 |         for i in range(1, 11):
339 |             u5 = [self._board[i + k][10 + k] for k in range(5)]
340 |             u6 = [self._board[i + k][4 - k] for k in range(5)]
341 |             if self._dot(u5, feature_F) == 4:
342 |                 pos = [[i + k, 10 + k] for k in range(1, 5)]
343 |                 self._live4_list.append(pos)
344 |             if self._dot(u6, feature_F) == 4:
345 |                 pos = [[i + k, 4 - k] for k in range(1, 5)]
346 |                 self._live4_list.append(pos)
347 | 
348 |         # G feature_G = [1, 1, 1, M, 1]
349 |         # horizontal and vertical
350 |         for i in range(15):
351 |             u1 = self._board[i][0:5]
352 |             u2 = self._board[i][10:15]
353 |             u3 = [self._board[k][i] for k in range(5)]
354 |             u4 = [self._board[k][i] for k in range(10, 15)]
355 |             if self._dot(u1, feature_G) == 4:
356 |                 pos = [[i, 0], [i, 1], [i, 2], [i, 4]]
357 |                 self._live4_list.append(pos)
358 |             if self._dot(u2, feature_G) == 4:
359 |                 pos = [[i, 10], [i, 11], [i, 12], [i, 14]]
360 |                 self._live4_list.append(pos)
361 |             if self._dot(u3, feature_G) == 4:
362 |                 pos = [[0, i], [1, i], [2, i], [4, i]]
363 |                 self._live4_list.append(pos)
364 |             if self._dot(u4, feature_G) == 4:
365 |                 pos = [[10, i], [11, i], [12, i], [14, i]]
366 |                 self._live4_list.append(pos)
367 | 
368 |         # diagonal. repetition exists so it's better to pick them out: u13-u16
369 |         for i in range(10):
370 |             u5 = [self._board[i + k + 1][k] for k in range(5)]
371 |             u6 = [self._board[k][i + k + 1] for k in range(5)]
372 |             u7 = [self._board[i + k][k + 10] for k in range(5)]
373 |             u8 = [self._board[k + 10][i + k] for k in range(5)]
374 |             u9 = [self._board[k][i + 4 - k] for k in range(5)]
375 |             u10 = [self._board[i + k + 1][14 - k] for k in range(5)]
376 |             u11 = [self._board[k + 10][i + 5 - k] for k in range(5)]
377 |             u12 = [self._board[i + k][4 - k] for k in range(5)]
378 |             if self._dot(u5, feature_G) == 4:
379 |                 pos = [[i + 1, 0],[i + 2, 1], [i + 3, 2], [i + 5, 4]]
380 |                 self._live4_list.append(pos)
381 |             if self._dot(u6, feature_G) == 4:
382 |                 pos = [[0, i + 1],[1, i + 2], [2, i + 3], [4, i + 5]]
383 |                 self._live4_list.append(pos)
384 |             if self._dot(u7, feature_G) == 4:
385 |                 pos = [[i, 10], [i + 1, 11], [i + 2, 12], [i + 4, 14]]
386 |                 self._live4_list.append(pos)
387 |             if self._dot(u8, feature_G) == 4:
388 |                 pos = [[10, i], [11, i + 1], [12, i + 2], [14, i + 4]]
389 |                 self._live4_list.append(pos)
390 |             if self._dot(u9, feature_G) == 4:
391 |                 pos = [[0, i + 4], [1, i + 3], [2, i + 2], [4, i]]
392 |                 self._live4_list.append(pos)
393 |             if self._dot(u10, feature_G) == 4:
394 |                 pos = [[i + 1, 14], [i + 2, 13], [i + 3, 12], [i + 5, 10]]
395 |                 self._live4_list.append(pos)
396 |             if self._dot(u11, feature_G) == 4:
397 |                 pos = [[10, i + 5], [11, i + 4], [12, i + 3], [14, i + 1]]
398 |                 self._live4_list.append(pos)
399 |             if self._dot(u12, feature_G) == 4:
400 |                 pos = [[i, 4], [i + 1, 3], [i + 2, 2], [i + 4, 0]]
401 |                 self._live4_list.append(pos)
402 | 
403 |         u13 = [self._board[k][k] for k in range(5)]
404 |         u14 = [self._board[k][k] for k in range(10, 15)]
405 |         u15 = [self._board[k][14 - k] for k in range(5)]
406 |         u16 = [self._board[k][14 - k] for k in range(10, 15)]
407 | 
408 |         if self._dot(u13, feature_G) == 4:
409 |             pos = [[0, 0], [1, 1], [2, 2], [4, 4]]
410 |             self._live4_list.append(pos)
411 |         if self._dot(u14, feature_G) == 4:
412 |             pos = [[10, 10], [11, 11], [12, 12], [14, 14]]
413 |             self._live4_list.append(pos)
414 |         if self._dot(u15, feature_G) == 4:
415 |             pos = [[0, 14], [1, 13], [2, 12], [4, 10]]
416 |             self._live4_list.append(pos)
417 |         if self._dot(u16, feature_G) == 4:
418 |             pos = [[10, 4], [11, 3], [12, 2], [14, 0]]
419 |             self._live4_list.append(pos)
420 | 
421 |         # H feature_H = [1, M, 1, 1, 1]
422 |         for i in range(15):
423 |             u1 = self._board[i][0:5]
424 |             u2 = self._board[i][10:15]
425 |             u3 = [self._board[k][i] for k in range(5)]
426 |             u4 = [self._board[k][i] for k in range(10, 15)]
427 |             if self._dot(u1, feature_H) == 4:
428 |                 pos = [[i, 0], [i, 2], [i, 3], [i, 4]]
429 |                 self._live4_list.append(pos)
430 |             if self._dot(u2, feature_H) == 4:
431 |                 pos = [[i, 10], [i, 12], [i, 13], [i, 14]]
432 |                 self._live4_list.append(pos)
433 |             if self._dot(u3, feature_H) == 4:
434 |                 pos = [[0, i], [2, i], [3, i], [4, i]]
435 |                 self._live4_list.append(pos)
436 |             if self._dot(u4, feature_H) == 4:
437 |                 pos = [[10, i], [12, i], [13, i], [14, i]]
438 |                 self._live4_list.append(pos)
439 | 
440 |         for i in range(10):
441 |             u5 = [self._board[i + k + 1][k] for k in range(5)]
442 |             u6 = [self._board[k][i + k + 1] for k in range(5)]
443 |             u7 = [self._board[i + k][k + 10] for k in range(5)]
444 |             u8 = [self._board[k + 10][i + k] for k in range(5)]
445 |             u9 = [self._board[k][i + 4 - k] for k in range(5)]
446 |             u10 = [self._board[i + k + 1][14 - k] for k in range(5)]
447 |             u11 = [self._board[k + 10][i + 5 - k] for k in range(5)]
448 |             u12 = [self._board[i + k][4 - k] for k in range(5)]
449 |             if self._dot(u5, feature_H) == 4:
450 |                 pos = [[i + 1, 0],[i + 3, 2], [i + 4, 3], [i + 5, 4]]
451 |                 self._live4_list.append(pos)
452 |             if self._dot(u6, feature_H) == 4:
453 |                 pos = [[0, i + 1],[2, i + 3], [3, i + 4], [4, i + 5]]
454 |                 self._live4_list.append(pos)
455 |             if self._dot(u7, feature_H) == 4:
456 |                 pos = [[i, 10], [i + 2, 12], [i + 3, 13], [i + 4, 14]]
457 |                 self._live4_list.append(pos)
458 |             if self._dot(u8, feature_H) == 4:
459 |                 pos = [[10, i], [12, i + 2], [13, i + 3], [14, i + 4]]
460 |                 self._live4_list.append(pos)
461 |             if self._dot(u9, feature_H) == 4:
462 |                 pos = [[0, i + 4], [2, i + 2], [3, i + 1], [4, i]]
463 |                 self._live4_list.append(pos)
464 |             if self._dot(u10, feature_H) == 4:
465 |                 pos = [[i + 1, 14], [i + 3, 12], [i + 4, 11], [i + 5, 10]]
466 |                 self._live4_list.append(pos)
467 |             if self._dot(u11, feature_H) == 4:
468 |                 pos = [[10, i + 5], [12, i + 3], [13, i + 2], [14, i + 1]]
469 |                 self._live4_list.append(pos)
470 |             if self._dot(u12, feature_H) == 4:
471 |                 pos = [[i, 4], [i + 2, 2], [i + 3, 1], [i + 4, 0]]
472 |                 self._live4_list.append(pos)
473 | 
474 |         u13 = [self._board[k][k] for k in range(5)]
475 |         u14 = [self._board[k][k] for k in range(10, 15)]
476 |         u15 = [self._board[k][14 - k] for k in range(5)]
477 |         u16 = [self._board[k][14 - k] for k in range(10, 15)]
478 | 
479 |         if self._dot(u13, feature_H) == 4:
480 |             pos = [[0, 0], [2, 2], [3, 3], [4, 4]]
481 |             self._live4_list.append(pos)
482 |         if self._dot(u14, feature_H) == 4:
483 |             pos = [[10, 10], [12, 12], [13, 13], [14, 14]]
484 |             self._live4_list.append(pos)
485 |         if self._dot(u15, feature_H) == 4:
486 |             pos = [[0, 14], [2, 12], [3, 11], [4, 10]]
487 |             self._live4_list.append(pos)
488 |         if self._dot(u16, feature_H) == 4:
489 |             pos = [[10, 4], [12, 2], [13, 1], [14, 0]]
490 |             self._live4_list.append(pos)
491 | 
492 |         for i in range(15):
493 |             u1 = self._board[i][0:5]
494 |             u2 = self._board[i][10:15]
495 |             u3 = [self._board[k][i] for k in range(5)]
496 |             u4 = [self._board[k][i] for k in range(10, 15)]
497 |             if self._dot(u1, feature_I) == 4:
498 |                 pos = [[i, 0], [i, 1], [i, 3], [i, 4]]
499 |                 self._live4_list.append(pos)
500 |             if self._dot(u2, feature_I) == 4:
501 |                 pos = [[i, 10], [i, 11], [i, 13], [i, 14]]
502 |                 self._live4_list.append(pos)
503 |             if self._dot(u3, feature_I) == 4:
504 |                 pos = [[0, i], [1, i], [3, i], [4, i]]
505 |                 self._live4_list.append(pos)
506 |             if self._dot(u4, feature_I) == 4:
507 |                 pos = [[10, i], [11, i], [13, i], [14, i]]
508 |                 self._live4_list.append(pos)
509 | 
510 |         for i in range(10):
511 |             u5 = [self._board[i + k + 1][k] for k in range(5)]
512 |             u6 = [self._board[k][i + k + 1] for k in range(5)]
513 |             u7 = [self._board[i + k][k + 10] for k in range(5)]
514 |             u8 = [self._board[k + 10][i + k] for k in range(5)]
515 |             u9 = [self._board[k][i + 4 - k] for k in range(5)]
516 |             u10 = [self._board[i + k + 1][14 - k] for k in range(5)]
517 |             u11 = [self._board[k + 10][i + 5 - k] for k in range(5)]
518 |             u12 = [self._board[i + k][4 - k] for k in range(5)]
519 |             if self._dot(u5, feature_I) == 4:
520 |                 pos = [[i + 1, 0],[i + 2, 1], [i + 4, 3], [i + 5, 4]]
521 |                 self._live4_list.append(pos)
522 |             if self._dot(u6, feature_I) == 4:
523 |                 pos = [[0, i + 1],[1, i + 2], [3, i + 4], [4, i + 5]]
524 |                 self._live4_list.append(pos)
525 |             if self._dot(u7, feature_I) == 4:
526 |                 pos = [[i, 10], [i + 1, 11], [i + 3, 13], [i + 4, 14]]
527 |                 self._live4_list.append(pos)
528 |             if self._dot(u8, feature_I) == 4:
529 |                 pos = [[10, i], [11, i + 1], [13, i + 3], [14, i + 4]]
530 |                 self._live4_list.append(pos)
531 |             if self._dot(u9, feature_I) == 4:
532 |                 pos = [[0, i + 4], [1, i + 3], [3, i + 1], [4, i]]
533 |                 self._live4_list.append(pos)
534 |             if self._dot(u10, feature_I) == 4:
535 |                 pos = [[i + 1, 14], [i + 2, 13], [i + 4, 11], [i + 5, 10]]
536 |                 self._live4_list.append(pos)
537 |             if self._dot(u11, feature_I) == 4:
538 |                 pos = [[10, i + 5], [11, i + 4], [13, i + 2], [14, i + 1]]
539 |                 self._live4_list.append(pos)
540 |             if self._dot(u12, feature_I) == 4:
541 |                 pos = [[i, 4], [i + 1, 3], [i + 3, 1], [i + 4, 0]]
542 |                 self._live4_list.append(pos)
543 | 
544 |         u13 = [self._board[k][k] for k in range(5)]
545 |         u14 = [self._board[k][k] for k in range(10, 15)]
546 |         u15 = [self._board[k][14 - k] for k in range(5)]
547 |         u16 = [self._board[k][14 - k] for k in range(10, 15)]
548 | 
549 |         if self._dot(u13, feature_I) == 4:
550 |             pos = [[0, 0], [1, 1], [3, 3], [4, 4]]
551 |             self._live4_list.append(pos)
552 |         if self._dot(u14, feature_I) == 4:
553 |             pos = [[10, 10], [11, 11], [13, 13], [14, 14]]
554 |             self._live4_list.append(pos)
555 |         if self._dot(u15, feature_I) == 4:
556 |             pos = [[0, 14], [1, 13], [3, 11], [4, 10]]
557 |             self._live4_list.append(pos)
558 |         if self._dot(u16, feature_I) == 4:
559 |             pos = [[10, 4], [11, 3], [13, 1], [14, 0]]
560 |             self._live4_list.append(pos)
561 | 
562 |     def _check_forbidden_moves(self):
563 |         if not self._conf['forbidden_moves']:
564 |             return False
565 |         self._update_live3_list()
566 |         self._update_live4_list()
567 |         count_valid_live3 = len(self._live3_list)
568 |         count_valid_live4 = len(self._live4_list)
569 |         for live3 in self._live3_list:
570 |             for live4 in self._live4_list:
571 |                 if any([live3 == live4[i:i+len(live3)] for i in range(len(live4)-len(live3)+1)]):  # live3 in live4! should be viewed as live4
572 |                     count_valid_live3 -= 1
573 |                 else:
574 |                     continue
575 |         if (count_valid_live3 >= 2) or (count_valid_live4 >= 2): #Found double-three or double-four! Forbidden for Black!
576 |             return True
577 |         else:
578 |             return False
579 | 
580 |     def check_rules(self, board, action, color):
581 |         self._read(board)
582 |         i = action[0]
583 |         j = action[1]
584 | 
585 |         if self._board[i][j] != 0:
586 |             return 'occupied'
587 | 
588 |         self._board[i][j] = color
589 | 
590 |         if color == BLACK:  # Black Player
591 |             # Check overline and winning pattern
592 |             count = self._count_consecutive(i, j, color)  # Count the maximal consecutive number
593 |             if count >= 5:
594 |                 if count == 5:
595 |                     # Winning pattern for Black
596 |                     # print("live3 = " + str(self._live3_list))
597 |                     # print("live4 = " + str(self._live4_list))
598 |                     # print("C5")
599 |                     return 'blackwins'
600 |                 else:
601 |                     # Overline forbidden move for Black, Black loses the Game
602 |                     # print("live3 = " + str(self._live3_list))
603 |                     # print("live4 = " + str(self._live4_list))
604 |                     if self._conf['forbidden_moves']:
605 |                         # print("Forbidden Move: C6+")
606 |                         return 'whitewins'
607 |                     else:
608 |                         # print('C5')
609 |                         return 'blackwins'
610 |             # Check double three and double four
611 |             signal = self._check_forbidden_moves()
612 |             if signal:
613 |                 # If we find the forbidden moves, then White wins
614 |                 # print("live3 = " + str(self._live3_list))
615 |                 # print("live4 = " + str(self._live4_list))
616 |                 # print("Forbidden Move: D3 D4")
617 |                 return 'whitewins'
618 |         else:  # White Player , i.e. color == -1
619 |             count = self._count_consecutive(i, j, color)  # Count the maximal consecutive number
620 |             if count >= 5:
621 |                 # Winning pattern for White
622 |                 # print("live3 = " + str(self._live3_list))
623 |                 # print("live4 = " + str(self._live4_list))
624 |                 # print("C5+")
625 |                 return 'whitewins'
626 | 
627 |         # If the board is full while we still don't have the winner , then draw
628 |         if sum(sum(np.array(np.array(board) == 0, dtype=int))) == 0:
629 |             return 'draw'
630 | 
631 |         return 'continue'
632 | 
633 |     def _dot(self, x, y):
634 |         if len(x) != len(y):
635 |             return 'error'
636 |         s = 0
637 |         for i in range(len(x)):
638 |             s += x[i] * y[i]
639 |         return s


--------------------------------------------------------------------------------
/AlphaGomoku/ui/__init__.py:
--------------------------------------------------------------------------------
1 | from .board import *
2 | from .renderer import *
3 | 


--------------------------------------------------------------------------------
/AlphaGomoku/ui/board.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from ..rules import *
  3 | import numpy as np
  4 | 
  5 | 
  6 | class Board:
  7 |     def __init__(self, renderer, board_size=15):
  8 |         self._board = [[0 for i in range(board_size)] for i in range(board_size)]
  9 |         self._board_size = board_size
 10 |         self._player = BLACK
 11 |         self._winner = 0
 12 |         self._round = 0
 13 |         self._last_move = None
 14 | 
 15 |         if renderer is None:
 16 |             self._display = False
 17 |         else:
 18 |             self._display = True
 19 |         self._renderer = renderer
 20 | 
 21 |     def __str__(self):
 22 |         print('round = ' + str(self.round()))
 23 |         print('last move = ' + str(self.last_move()))
 24 |         if self.current_player() == BLACK:
 25 |             return 'current_player = BLACK'
 26 |         else:
 27 |             return 'current_player = WHITE'
 28 | 
 29 |     # return the board
 30 |     def board(self):
 31 |         return np.copy(self._board)
 32 | 
 33 |     # player take an action(coordinate)
 34 |     def move(self, player, action, info=None):
 35 |         x = action[0]   # row
 36 |         y = action[1]   # col
 37 | 
 38 |         # waiting until renderer is initialized
 39 |         while self._display and (not self._renderer.is_initialized()):
 40 |             time.sleep(.2)
 41 | 
 42 |         if not isinstance(x, int) or not isinstance(y, int):
 43 |             print("> error: x, y should be an integer:", x, y)
 44 |             return 1, self.board()
 45 |         if x < 0 or x > self._board_size - 1 or y < 0 or y > self._board_size - 1:
 46 |             print("> error: x, y should be in [0, 14]", x, y)
 47 |             return 1, self.board()
 48 | 
 49 |         num_str = str(self.stone_num() + 1)
 50 |         if info is not None:
 51 |             info = info + '_' + num_str
 52 | 
 53 |         if player == BLACK:
 54 |             if self._display:
 55 |                 self._renderer.move(player, (x, y), info)
 56 |             self._board[x][y] = BLACK
 57 |             self._player = WHITE
 58 |             self._round += 1
 59 |         else:
 60 |             if self._display:
 61 |                 self._renderer.move(player, (x, y), info)
 62 |             self._board[x][y] = WHITE
 63 |             self._player = BLACK
 64 | 
 65 |         self._last_move = action
 66 | 
 67 |     def clear(self):
 68 |         self._board = [[0 for i in range(self._board_size)] for i in range(self._board_size)]
 69 |         self._player = BLACK
 70 |         self._winner = 0
 71 |         self._round = 0
 72 |         self._last_move = None
 73 |         if self._display:
 74 |             self._renderer.paint_background()
 75 |             while not self._renderer.is_initialized():
 76 |                 time.sleep(.1)
 77 | 
 78 |     def read(self, new_board):
 79 |         self.clear()
 80 |         black_num = 0
 81 |         white_num = 0
 82 | 
 83 |         for row in range(self._board_size):
 84 |             for col in range(self._board_size):
 85 |                 if new_board[row][col] == BLACK:
 86 |                     self.move(1, (row, col))
 87 |                     black_num += 1
 88 |                 elif new_board[row][col] == WHITE:
 89 |                     self.move(-1, (row, col))
 90 |                     white_num += 1
 91 | 
 92 |         self._round = black_num
 93 |         if black_num == white_num:
 94 |             self._player = BLACK
 95 |         elif black_num == white_num + 1:
 96 |             self._player = WHITE
 97 |         else:
 98 |             print("> error: illegal stone num")
 99 |             print('> black_num = ' + str(black_num))
100 |             print('> white_num = ' + str(white_num))
101 | 
102 |     def round(self):
103 |         return self._round
104 | 
105 |     def current_player(self):
106 |         return self._player
107 | 
108 |     def last_move(self):
109 |         return self._last_move
110 | 
111 |     def stone_num(self):
112 |         if self._player == BLACK:
113 |             return 2*self._round
114 |         else:
115 |             return 2*self._round - 1
116 | 
117 |     def legal_moves(self):
118 |         legal_moves = []
119 |         for i in range(self._board_size):
120 |             for j in range(self._board_size):
121 |                 if self._board[i][j] == 0:
122 |                     legal_moves.append((i, j))
123 |         return legal_moves
124 | 
125 |     def show_scores(self, action_list, score_list):
126 |         if self._renderer is not None:
127 |             self._renderer.show_score(self.board(), action_list, score_list)
128 | 


--------------------------------------------------------------------------------
/AlphaGomoku/ui/image/black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/black.png


--------------------------------------------------------------------------------
/AlphaGomoku/ui/image/desk.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/desk.jpg


--------------------------------------------------------------------------------
/AlphaGomoku/ui/image/white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyKen/15_by_15_AlphaGomoku/de6db305c369abf07a538d56e59764718abf9d0c/AlphaGomoku/ui/image/white.png


--------------------------------------------------------------------------------
/AlphaGomoku/ui/renderer.py:
--------------------------------------------------------------------------------
  1 | from sys import exit
  2 | from ..rules import *
  3 | import time
  4 | import threading
  5 | 
  6 | display_mode = True
  7 | use_dialog = True
  8 | try:
  9 |     import easygui
 10 | except ImportError:
 11 |     print('> error: module [easygui] not found')
 12 |     use_dialog = False
 13 | try:
 14 |     import pygame
 15 | except ImportError:
 16 |     print('> error: module [pygame] not found')
 17 |     display_mode = False
 18 | 
 19 | image_path = 'AlphaGomoku/ui/image/'
 20 | 
 21 | 
 22 | class Renderer(threading.Thread):
 23 | 
 24 |     # Noted that some functions have both public and private versions such as 'move', 'read', 'paint_background'
 25 |     # private ones are for Renderer thread, which will finish the rendering while the public func play the role in
 26 |     # sending signals to Renderer thread. (by updating some boolean variables, since Renderer Thread is listening
 27 |     # these variables in an endless loop)
 28 | 
 29 |     # Since all rendering must be done in Renderer thread, we have to take an indirect way.
 30 | 
 31 |     def __init__(self, screen_size, board_size=15):
 32 |         super(Renderer, self).__init__()
 33 |         self._screen_size = screen_size
 34 |         self._board_size = board_size
 35 |         self._spacing = int(self._screen_size[1] / (board_size + 1))
 36 |         self._screen = None
 37 |         self._background = None
 38 |         self._stone_black = None
 39 |         self._stone_white = None
 40 | 
 41 |         self._init = False
 42 | 
 43 |         self._update_move = False
 44 |         self._next_pos = None
 45 |         self._next_player = 0
 46 | 
 47 |         self._update_read = False
 48 |         self._new_board = None
 49 | 
 50 |         self._update_clear = False
 51 | 
 52 |         self._update_info = False
 53 |         self._update_score = False
 54 |         self._info_surface_cache = []
 55 |         self._info_rect_cache = []
 56 |         self._score_surface_cache = []
 57 |         self._score_rect_cache = []
 58 | 
 59 |         self._is_waiting_for_click = False
 60 |         self._mouse_click_pos = None
 61 | 
 62 |         self.setDaemon(True)
 63 |         self.start()
 64 | 
 65 |     def run(self):
 66 |         pygame.init()
 67 |         self._screen = pygame.display.set_mode(self._screen_size, 0, 32)
 68 |         self._background = pygame.image.load(image_path + 'desk.jpg').convert()
 69 |         self._stone_black = pygame.image.load(image_path + 'black.png').convert_alpha()
 70 |         self._stone_white = pygame.image.load(image_path + 'white.png').convert_alpha()
 71 |         self._stone_black = pygame.transform.smoothscale(self._stone_black, (self._spacing, self._spacing))
 72 |         self._stone_white = pygame.transform.smoothscale(self._stone_white, (self._spacing, self._spacing))
 73 |         self.paint_background()
 74 |         while True:
 75 |             for event in pygame.event.get():
 76 |                 if event.type == pygame.QUIT:
 77 |                     print("> exit")
 78 |                     pygame.quit()
 79 |                     exit()
 80 |                 if self._is_waiting_for_click and event.type == pygame.MOUSEBUTTONDOWN:
 81 |                     mouse_position = pygame.mouse.get_pos()
 82 |                     y = int(mouse_position[0] / self._spacing - 0.5)
 83 |                     x = int(mouse_position[1] / self._spacing - 0.5)
 84 |                     if x in range(self._board_size) and y in range(self._board_size):
 85 |                         self._is_waiting_for_click = False
 86 |                         self._mouse_click_pos = (x, y)
 87 |                     print("> click " + str(self._mouse_click_pos))
 88 |             if self._update_clear:
 89 |                 self._paint_background()
 90 |             if self._update_read:
 91 |                 self._read(self._new_board)
 92 |             if self._update_move:
 93 |                 self._move(self._next_player, self._next_pos)
 94 |             if self._update_info:
 95 |                 self._show_info()
 96 |             if self._update_score:
 97 |                 self._show_score()
 98 | 
 99 |     def paint_background(self):
100 |         self._update_clear = True
101 |         self._update_move = False
102 |         self._update_read = False
103 |         self._init = False
104 | 
105 |     def _paint_background(self):
106 |         self._screen.blit(self._background, (0, 0))
107 |         black_color = (0, 0, 0)
108 | 
109 |         for i in range(1, self._board_size + 1):
110 |             start_horizontal = (self._spacing, i * self._spacing)
111 |             end_horizontal = (self._screen_size[1] - self._spacing, i * self._spacing)
112 |             start_vertical = (i * self._spacing, self._spacing)
113 |             end_vertical = (i * self._spacing, self._screen_size[1] - self._spacing)
114 | 
115 |             if i == 1 or i == self._board_size + 1:
116 |                 pygame.draw.line(self._screen, black_color, start_horizontal, end_horizontal, 3)
117 |                 pygame.draw.line(self._screen, black_color, start_vertical, end_vertical, 3)
118 |             else:
119 |                 pygame.draw.line(self._screen, black_color, start_horizontal, end_horizontal, 2)
120 |                 pygame.draw.line(self._screen, black_color, start_vertical, end_vertical, 2)
121 | 
122 |         if self._board_size % 2 == 1:
123 |             mid = (self._board_size + 1) / 2
124 |             start_pos = (self._spacing * int(mid) - 2, self._spacing * int(mid) - 2)
125 |             size = (6, 6)
126 |             pygame.draw.rect(self._screen, black_color, pygame.rect.Rect(start_pos, size))
127 | 
128 |         pygame.display.update()
129 |         self._update_clear = False
130 |         self._init = True
131 | 
132 |     def move(self, player, action, info=None):
133 |         while self._update_move:
134 |             time.sleep(1e-4)
135 |         self._next_player = player
136 |         self._next_pos = action
137 |         self._update_move = True
138 |         if info is not None:
139 |             self.show_info(info, player, action)
140 | 
141 |     def _move(self, player, action):
142 |         position = (int((action[1] + 0.5) * self._spacing), int((action[0] + 0.5) * self._spacing))
143 |         if player == BLACK:
144 |             self._screen.blit(self._stone_black, position)
145 |         elif player == -1:
146 |             self._screen.blit(self._stone_white, position)
147 | 
148 |         self._update_move = False
149 | 
150 |     def read(self, new_board):
151 |         while self._update_read:
152 |             time.sleep(1e-4)
153 |         self._new_board = new_board
154 |         self._update_read = True
155 | 
156 |     def _read(self, new_board):
157 |         self._paint_background()
158 |         self._update_read = False
159 |         for row in range(self._board_size):
160 |             for col in range(self._board_size):
161 |                 if new_board[row][col] == 1:
162 |                     self._move(1, (row, col))
163 |                 elif new_board[row][col] == -1:
164 |                     self._move(-1, (row, col))
165 | 
166 |     def ask_for_click(self):
167 |         self._is_waiting_for_click = True
168 |         while self._is_waiting_for_click:
169 |             time.sleep(1e-4)
170 |         return self._mouse_click_pos
171 | 
172 |     def show_score(self, board, action_list, score_list):
173 |         self.read(board)
174 |         time.sleep(1e-2)
175 |         large_font = pygame.font.SysFont('Calibri', size=20)
176 |         red = (255, 0, 0)
177 | 
178 |         for a_s in list(zip(action_list, score_list)):
179 |             action, score = a_s[0], a_s[1]
180 |             if self._board_size == 8:
181 |                 position = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing))
182 |             if self._board_size == 15:
183 |                 position = (int((action[1] + 0.80) * self._spacing), int((action[0] + 0.72) * self._spacing))
184 | 
185 |             self._score_surface_cache.append(large_font.render(str(round(score, 2)), True, red))
186 |             self._score_rect_cache.append(position)
187 | 
188 |         self._update_score = True
189 | 
190 |     def _show_score(self):
191 |         size = len(self._score_rect_cache)
192 |         for i in range(size):
193 |             self._screen.blit(self._score_surface_cache[i], self._score_rect_cache[i])
194 |         self._score_surface_cache = []
195 |         self._score_rect_cache = []
196 | 
197 |         pygame.display.update()
198 |         self._update_score = False
199 | 
200 |     def show_info(self, info, player, action):
201 |         infos = info.split('_')
202 |         # p = 'p = ' + infos[0]
203 |         v = infos[1]
204 |         num = infos[2]
205 | 
206 |         if self._board_size == 8:
207 |             # position_1 = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing))
208 |             if float(infos[1]) >= 0:
209 |                 position_2 = (int((action[1] + 0.62) * self._spacing), int((action[0] + 0.78) * self._spacing))
210 |             else:
211 |                 position_2 = (int((action[1] + 0.61) * self._spacing), int((action[0] + 0.78) * self._spacing))
212 | 
213 |             if int(num) < 10:
214 |                 position_3 = (int((action[1] + 0.90) * self._spacing), int((action[0] + 0.96) * self._spacing))
215 |             else:
216 |                 position_3 = (int((action[1] + 0.82) * self._spacing), int((action[0] + 0.96) * self._spacing))
217 | 
218 |             small_font = pygame.font.SysFont('Calibri', size=16)
219 |             large_font = pygame.font.SysFont('Calibri', size=32)
220 | 
221 |         if self._board_size == 15:
222 |             # position_1 = (int((action[1] + 0.63) * self._spacing), int((action[0] + 0.76) * self._spacing))
223 |             if float(infos[1]) >= 0:
224 |                 position_2 = (int((action[1] + 0.72) * self._spacing), int((action[0] + 0.75) * self._spacing))
225 |             else:
226 |                 position_2 = (int((action[1] + 0.70) * self._spacing), int((action[0] + 0.75) * self._spacing))
227 | 
228 |             if int(num) < 10:
229 |                 position_3 = (int((action[1] + 0.90) * self._spacing), int((action[0] + 0.96) * self._spacing))
230 |             else:
231 |                 position_3 = (int((action[1] + 0.82) * self._spacing), int((action[0] + 0.96) * self._spacing))
232 | 
233 |             small_font = pygame.font.SysFont('Calibri', size=10)
234 |             large_font = pygame.font.SysFont('Calibri', size=20)
235 | 
236 |         color = (255, 0, 0)
237 |         if player == BLACK:
238 |             color = (255, 255, 255)
239 |         if player == WHITE:
240 |             color = (0, 0, 0)
241 | 
242 |         # self._info_surface_cache.append(small_font.render(p, True, color))
243 |         # self._info_rect_cache.append(position_1)
244 | 
245 |         if infos[1] != '2':
246 |             self._info_surface_cache.append(small_font.render(v, True, color))
247 |             self._info_rect_cache.append(position_2)
248 | 
249 |         self._info_surface_cache.append(large_font.render(num, True, color))
250 |         self._info_rect_cache.append(position_3)
251 |         self._update_info = True
252 | 
253 |     def _show_info(self):
254 |         size = len(self._info_rect_cache)
255 |         for i in range(size):
256 |             self._screen.blit(self._info_surface_cache[i], self._info_rect_cache[i])
257 |         self._info_surface_cache = []
258 |         self._info_rect_cache = []
259 | 
260 |         pygame.display.update()
261 |         self._update_info = False
262 | 
263 |     def is_initialized(self):
264 |         return self._init
265 | 
266 | 
267 | def ask_for_draw():
268 |     if display_mode and use_dialog:
269 |         return easygui.ccbox(title='Request', msg='AlphaRenju requests a draw.', choices=['draw', 'continue'])
270 |     else:
271 |         print('> AlphaRenju requests a draw.')
272 |         return 0
273 | 
274 | 
275 | def show_result(mode, result):
276 |     if display_mode and use_dialog and mode in [2, 2.5, 3, 9]:
277 |         info = ''
278 |         if result == 'blackwins':
279 |             info = 'Black wins!'
280 |         if result == 'whitewins':
281 |             info = 'White wins!'
282 |         if result == 'draw':
283 |             info = 'Draw!'
284 |         easygui.msgbox(title='Result', msg=info)
285 |     else:
286 |         print(result)
287 | 


--------------------------------------------------------------------------------
/AlphaGomoku/utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | from email.mime.text import MIMEText
 4 | from email.mime.multipart import MIMEMultipart
 5 | from email.header import Header
 6 | import smtplib
 7 | 
 8 | 
 9 | from_addr = "reposter@sina.com"
10 | password = ""
11 | 
12 | 
13 | def send_email_report(to_addr, content):
14 |     try:
15 |         msg = MIMEMultipart()
16 |         msg['Subject'] = Header('Gomoku AI Report', 'utf-8')
17 |         msg['From'] = Header(from_addr)
18 |         msg['To'] = Header(to_addr)
19 |         msg['Reply-to'] = Header(from_addr)
20 | 
21 |         msg.attach(MIMEText(content, 'plain', 'utf-8'))
22 | 
23 |         smtp_server = "smtp.sina.com"
24 |         server = smtplib.SMTP(smtp_server, 25)
25 | 
26 |         server.set_debuglevel(1)
27 |         server.starttls()
28 | 
29 |         server.login(from_addr, password)
30 |         server.sendmail(from_addr, [to_addr], msg.as_string())
31 |         server.quit()
32 |     except:
33 |         pass
34 | 
35 | 
36 | def log(func):
37 |     def wrapper(*args, **kwargs):
38 |         start = time.clock()
39 |         print('>> calling %s()' % func.__name__)
40 |         result = func(*args, **kwargs)
41 |         end = time.clock()
42 |         print('>> %s() time = %s' % (func.__name__, str(round(end - start, 3))))
43 |         return result
44 | 
45 |     return wrapper
46 | 
47 | 
48 | def index2coordinate(index, size):
49 |     row = index // size
50 |     col = index % size
51 |     return int(row), int(col)
52 | 
53 | 
54 | def coordinate2index(cor, size):
55 |     return size * cor[0] + cor[1]
56 | 
57 | 
58 | def board2legalvec(board):
59 |     vec = np.array(np.array(board) == 0, dtype=np.int)
60 |     return vec.flatten()
61 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ***
  2 | # 15 by 15 AlphaGomoku
  3 | 
  4 | Introduction
  5 | ====
  6 | - This is a Gomoku AI based on curriculum learning and AlphaGo methods.
  7 | 
  8 | <!---  (***2018-08-29***) We implement an ***AlphaGo-based Gomoku AI program*** in ***8 by 8 Free Style Gomoku***. You can also get access to our [***presentation PPT***](https://github.com/PolyKen/AlphaRenju_Zero/blob/master/tutorial/Gomoku%20PPT.pptx) in 2018 Likelihood Lab Summer Research Conference.
  9 | - (***2018-09-22***) We combine our original AlphaGomoku program with ***Curriculum Learning***, ***Double Networks Mechanism*** and ***Winning Value Decay*** to extend our AI to ***15 by 15 Free Style Gomoku***. Before we adopt these methods mentioned above, training 15 by 15 AlphaGomoku is intractable since the asymmetry and complexity of the game compared to the 8 by 8 simplified gomoku. 
 10 | - (***2018-9-25***) Our Reseach Paper is available at: [***paper***](https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/tutorial/gomoku_paper.pdf)(or at [***arxiv***](http://arxiv.org/abs/1809.10595))
 11 | - The training is continuing...... We hope that AlphaGomoku can evolve into Gomoku grand master someday. -->
 12 | 
 13 | 
 14 | Demonstration
 15 | ====
 16 | Human vs AlphaGomoku (15 by 15 board)
 17 | -------
 18 | AI adopts deterministic policy with 400 simulations per move. 
 19 | <p class="half" align="center">
 20 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_b1.PNG" width="350px" height="350px"/>
 21 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_b2.PNG" width="350px" height="350px"/>
 22 | </p>
 23 | 
 24 | <p class="half" align="center">
 25 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_b3.PNG" width="350px" height="350px"/>
 26 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_b4.PNG" width="350px" height="350px"/>
 27 | </p>
 28 | 
 29 | <!--
 30 | <p class="half" align="center">
 31 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_w1.PNG" width="350px" height="350px"/>
 32 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_w2.PNG" width="350px" height="350px"/>
 33 | </p>-->
 34 | 
 35 | <!--
 36 | <p class="half" align="center">
 37 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_w3.PNG" width="350px" height="350px"/>
 38 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/human_mcts_w4.PNG" width="350px" height="350px"/>
 39 | </p>-->
 40 | 
 41 | 
 42 | Tecent Gomoku AI(欢乐五子棋) vs AlphaGomoku (15 by 15 board)
 43 | -------
 44 | Tencent Gomoku AI plays black stone. AlphaGomoku adopts deterministic policy with 400 simulations per move.
 45 | <p align="center">
 46 |   <img src="https://github.com/PolyKen/15_by_15_AlphaGomoku/blob/master/demo/picture/tencent_1.jpg" width="350px" height="350px"/>
 47 | </p>
 48 | 
 49 | <!--
 50 | Animation (8 by 8 board)
 51 | -------
 52 | The left Gif is a game self played by AlphaGomoku; The right Gif is a game between human and ai, where human adopts balck stone. All AI simulate 400 times per move.
 53 | <p class="half" align="center">
 54 |   <img src="https://github.com/PolyKen/AlphaRenju_Zero/blob/master/demo/gif/ai_self_play.gif" width="350px" height="350px"/>
 55 |   <img src="https://github.com/PolyKen/AlphaRenju_Zero/blob/master/demo/gif/human(black)_vs_ai(white).gif" width="350px" height="350px"/>
 56 | </p>-->
 57 | 
 58 | <!--
 59 | Human vs AlphaGomoku (8 by 8 board)
 60 | -------
 61 | AI plays the white stone against human, adopting deterministic policy with 400 simulations per move.
 62 | <p class="half" align="center">
 63 |   <img src="https://github.com/PolyKen/AlphaRenju_Zero/blob/master/demo/picture/man_vs_ai_1.png" width="350px" height="350px"/>
 64 |   <img src="https://github.com/PolyKen/AlphaRenju_Zero/blob/master/demo/picture/man_vs_ai_2.png" width="350px" height="350px"/>
 65 | </p>
 66 | -->
 67 | 
 68 | Set up
 69 | ====
 70 | Python Version
 71 | -------
 72 | - ***3.6***
 73 | 
 74 | Requirement
 75 | -------
 76 | `pip install -r requirements.txt`
 77 | 
 78 | - ***tensorflow***
 79 | - ***keras***
 80 | - ***pygame***
 81 | - ***numpy***
 82 | - ***matplotlib***
 83 | - ***easygui*** (optional)
 84 | 
 85 | How to play with AlphaGomoku
 86 | -------
 87 | - Execute run.py.
 88 | - Select mode 2 (AI vs Human).
 89 | - You can also compete with different versions of AlphaGomoku by switching the network.
 90 | 
 91 | Training
 92 | -------
 93 | - Execute run.py.
 94 | - Select mode 13.
 95 | 
 96 | Setting parameters
 97 | -------
 98 | All important parameters are in AlphaGomoku/config.py. Some of them are listed as follows,
 99 | - simulation_times: the number of 'exploration' of game tree for each move.
100 | - c_puct: in general, when c_puct gets larger, the policy decision will rely more on prior probability.
101 | - initial_tau: temperature coefficient. When it gets smaller, policy will tend to be more deterministic.
102 | 
103 | Contribution
104 | ====
105 | Contributors
106 | -------
107 | - ***Zheng Xie***
108 | - ***XingYu Fu***
109 | - ***JinYuan Yu***
110 | 
111 | Institutions
112 | -------
113 | - ***Likelihood Lab***
114 | - ***Vthree.AI***
115 | - ***Sun Yat-sen University***
116 | 
117 | Acknowledgement
118 | -------
119 | We would like to say thanks to ***Andrew Chen*** from [***Vthree.AI***](http://vthree.ai/) and ***MingWen Liu*** from [***ShiningMidas Investment***](http://www.shiningmidas.com/) for their generous help throughout the research. We are also grateful to ***ZhiPeng Liang*** and ***Hao Chen*** from ***Sun Yat-sen University*** for their supports of the training process of our Gomoku AI. Without their supports, it's hard for us to finish such a complicated task.
120 | 
121 | Contact
122 | ====
123 | - xiezh25@mail2.sysu.edu.cn
124 | - fuxy28@mail2.sysu.edu.cn
125 | - yujy25@mail2.sysu.edu.cn
126 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.5.0
 2 | astor==0.7.1
 3 | cycler==0.10.0
 4 | easygui==0.98.1
 5 | gast==0.2.0
 6 | grpcio==1.15.0
 7 | h5py==2.8.0
 8 | Keras==2.2.4
 9 | Keras-Applications==1.0.6
10 | Keras-Preprocessing==1.0.5
11 | kiwisolver==1.0.1
12 | Markdown==3.0.1
13 | matplotlib==3.0.0
14 | numpy==1.14.5
15 | protobuf==3.6.1
16 | pygame==1.9.4
17 | pyparsing==2.2.2
18 | python-dateutil==2.7.3
19 | PyYAML==3.13
20 | scipy==1.1.0
21 | six==1.11.0
22 | tensorboard==1.10.0
23 | tensorflow==1.10.0
24 | termcolor==1.1.0
25 | Werkzeug==0.14.1
26 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | from AlphaGomoku import *
 2 | import warnings
 3 | import os
 4 | import multiprocessing as mp
 5 | 
 6 | 
 7 | def select_mode():
 8 |     print('> Please enter the mode:')
 9 |     print('> 1: Training (not available)')
10 |     print('> 2: AI vs Human')
11 |     print('> 3: Human vs Human')
12 |     print('> 4: AI vs AI')
13 |     print('> 5: Collect human play data')
14 |     print('> 6: Collect self play data')
15 |     print('> 7: Train on external data')
16 |     print('> 8: Collect human vs AI play data')
17 |     print('> 9: AI(NaiveAgent) vs Human mode')
18 |     print('> 10: AI vs AI(NaiveAgent) mode)')
19 |     print('> 11: Train on generated data')
20 |     print('> 12: Collect self play data(Fast AI)')
21 |     print('> 13: Self play and train')
22 |     _mode = int(input('> mode = '))
23 | 
24 |     if _mode == 2:
25 |         print('> Please select your color: (1: Black, 0: White)')
26 |         is_black = int(input('> color = '))
27 |         if is_black == 1:
28 |             _mode = 2.5
29 | 
30 |     return _mode
31 | 
32 | 
33 | def start(_mode):
34 |     conf = Config()
35 |     conf.set_mode(_mode)
36 |     _env = Env(conf)
37 |     _env.start_mode()
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     # ignore warnings
42 |     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
43 |     warnings.filterwarnings("ignore")
44 | 
45 |     mode = select_mode()
46 |     mp.freeze_support()
47 |     if mode == 13:
48 |         cores_num = mp.cpu_count()
49 |         cores_num = 2
50 |         pool = mp.Pool(processes=cores_num)
51 |         while True:
52 |             pool.map(func=start, iterable=[6] * cores_num)
53 |             start(7)
54 |     else:
55 |         start(mode)
56 | 


--------------------------------------------------------------------------------