├── AlphaZeroNetwork.py
├── CCRLDataset.py
├── MCTS.py
├── README.md
├── encoder.py
├── playchess.py
├── reactFrontent
    └── index.js
├── reformat.py
├── server.py
├── static
    ├── index.html
    ├── index.js
    └── pngs
    │   ├── B.png
    │   ├── K.png
    │   ├── N.png
    │   ├── P.png
    │   ├── Q.png
    │   ├── R.png
    │   ├── b.png
    │   ├── k.png
    │   ├── n.png
    │   ├── p.png
    │   ├── q.png
    │   └── r.png
├── train.py
└── weights
    ├── AlphaZeroNet_10x128.pt
    ├── AlphaZeroNet_20x256.pt
    └── Screen Shot 2022-11-22 at 6.38.54 AM.png


/AlphaZeroNetwork.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | class ConvBlock( nn.Module ):
  6 |     """
  7 |     The block consists of a conv layer, batch normalization layer
  8 |     and relu activation.
  9 |     """
 10 |     
 11 |     def __init__( self, input_channels, num_filters ):
 12 |         """
 13 |         Args:
 14 |             input_channels (int) the number of input channels
 15 |             num_filters (int) the number of filters in the conv layer
 16 |         """
 17 |         super().__init__()
 18 |         self.conv1 = nn.Conv2d( input_channels, num_filters, 3, padding=1 )
 19 |         self.bn1 = nn.BatchNorm2d( num_filters )
 20 |         self.relu1 = nn.ReLU()
 21 | 
 22 |     def __call__( self, x ):
 23 |         """
 24 |         Args:
 25 |             x (torch.Tensor) the tensor to apply the layers to.
 26 |         """
 27 |         x = self.conv1( x )
 28 |         x = self.bn1( x )
 29 |         x = self.relu1( x )
 30 | 
 31 |         return x
 32 | 
 33 | class ResidualBlock( nn.Module ):
 34 |     """
 35 |     A residual block.
 36 |     """
 37 | 
 38 |     def __init__( self, num_filters ):
 39 |         """
 40 |         Args:
 41 |             num_filters (int) the number of filters in the conv layers. Assumes this is the
 42 |             same as the number of input channels
 43 |         """
 44 |         super().__init__()
 45 |         self.conv1 = nn.Conv2d( num_filters, num_filters, 3,
 46 |                 padding=1 )
 47 |         self.bn1 = nn.BatchNorm2d( num_filters )
 48 |         self.relu1 = nn.ReLU()
 49 |         self.conv2 = nn.Conv2d( num_filters, num_filters, 3,
 50 |                 padding=1 )
 51 |         self.bn2 = nn.BatchNorm2d( num_filters )
 52 |         self.relu2 = nn.ReLU()
 53 | 
 54 |     def __call__( self, x ):
 55 |         """
 56 |         Args:
 57 |             x (torch.Tensor) the tensor to apply the layers to.
 58 |         """
 59 |         residual = x
 60 | 
 61 |         x = self.conv1( x )
 62 |         x = self.bn1( x )
 63 |         x = self.relu1( x )
 64 |         
 65 |         x = self.conv2( x )
 66 |         x = self.bn2( x )
 67 |         x += residual
 68 |         x = self.relu2( x )
 69 | 
 70 |         return x
 71 | 
 72 | class ValueHead( nn.Module ):
 73 |     """
 74 |     nn.Module for the value head
 75 |     """
 76 | 
 77 |     def __init__( self, input_channels ):
 78 |         """
 79 |         Args:
 80 |             input_channels (int) the number of input channels
 81 |         """
 82 |         super().__init__()
 83 |         self.conv1 = nn.Conv2d( input_channels, 1, 1 )
 84 |         self.bn1 = nn.BatchNorm2d( 1 )
 85 |         self.relu1 = nn.ReLU()
 86 |         self.fc1 = nn.Linear( 64, 256 )
 87 |         self.relu2 = nn.ReLU()
 88 |         self.fc2 = nn.Linear( 256, 1 )
 89 |         self.tanh1 = nn.Tanh()
 90 | 
 91 |     def __call__( self, x ):
 92 |         """
 93 |         Args:
 94 |             x (torch.Tensor) the tensor to apply the layers to.
 95 |         """
 96 | 
 97 |         x = self.conv1( x )
 98 |         x = self.bn1( x )
 99 |         x = self.relu1( x )
100 |         x = x.view( x.shape[0], 64 )
101 |         x = self.fc1( x )
102 |         x = self.relu2( x )
103 |         x = self.fc2( x )
104 |         x = self.tanh1( x )
105 | 
106 |         return x
107 | 
108 | class PolicyHead( nn.Module ):
109 |     """
110 |     nn.Module for the policy head
111 |     """
112 | 
113 |     def __init__( self, input_channels ):
114 |         """
115 |         Args:
116 |             input_channels (int) the number of input channels
117 |         """
118 |         super().__init__()
119 |         self.conv1 = nn.Conv2d( input_channels, 2, 1 )
120 |         self.bn1 = nn.BatchNorm2d( 2 )
121 |         self.relu1 = nn.ReLU()
122 |         self.fc1 = nn.Linear( 128, 4608 )
123 |     
124 |     def __call__( self, x ):
125 |         """
126 |         Args:
127 |             x (torch.Tensor) the tensor to apply the layers to.
128 |         """
129 | 
130 |         x = self.conv1( x )
131 |         x = self.bn1( x )
132 |         x = self.relu1( x )
133 |         x = x.view( x.shape[0], 128 )
134 |         x = self.fc1( x )
135 | 
136 |         return x
137 | 
138 | class AlphaZeroNet( nn.Module ):
139 |     """
140 |     Neural network with AlphaZero architecture.
141 |     """
142 | 
143 |     def __init__(self, num_blocks, num_filters ):
144 |         """
145 |         Args:
146 |             num_blocks (int) the number of residual blocks
147 |             filters_per_conv (int) the number of filters in each conv layer
148 |         """
149 |         super().__init__()
150 |         #The number of input planes is fixed at 16
151 |         self.convBlock1 = ConvBlock( 16, num_filters )
152 | 
153 |         residualBlocks = [ ResidualBlock( num_filters ) for i in range( num_blocks ) ]
154 | 
155 |         self.residualBlocks = nn.ModuleList( residualBlocks )
156 | 
157 |         self.valueHead = ValueHead( num_filters )
158 | 
159 |         self.policyHead = PolicyHead( num_filters )
160 | 
161 |         self.softmax1 = nn.Softmax( dim=1 )
162 | 
163 |         self.mseLoss = nn.MSELoss()
164 |         
165 |         self.crossEntropyLoss = nn.CrossEntropyLoss()
166 | 
167 |     def __call__( self, x, valueTarget=None, policyTarget=None, policyMask=None ):
168 |         """
169 |         Args:
170 |             x (torch.Tensor) the input tensor.
171 |             valueTarget (torch.Tensor) the value target.
172 |             policyTarget (torch.Tensor) the policy target.
173 |             policyMask (torch.Tensor) the legal move mask
174 |         """
175 | 
176 |         x = self.convBlock1( x )
177 | 
178 |         for block in self.residualBlocks:
179 |             x = block( x )
180 | 
181 |         value = self.valueHead( x )
182 | 
183 |         policy = self.policyHead( x )
184 | 
185 |         if self.training:
186 |             
187 |             valueLoss = self.mseLoss( value, valueTarget )
188 | 
189 |             policyTarget = policyTarget.view( policyTarget.shape[0] )
190 | 
191 |             policyLoss = self.crossEntropyLoss( policy, policyTarget )
192 |             
193 |             return valueLoss, policyLoss
194 | 
195 |         else:
196 | 
197 |             policyMask = policyMask.view( policyMask.shape[0], -1 )
198 | 
199 |             policy_exp = torch.exp( policy )
200 | 
201 |             policy_exp *= policyMask.type( torch.float32 )
202 | 
203 |             policy_exp_sum = torch.sum( policy_exp, dim=1, keepdim=True )
204 |             
205 |             policy_softmax = policy_exp / policy_exp_sum
206 | 
207 |             return value, policy_softmax
208 | 
209 | 


--------------------------------------------------------------------------------
/CCRLDataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import chess.pgn
 3 | import numpy as np
 4 | import os
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | import encoder
 8 | 
 9 | def tolist( mainline_moves ):
10 |     """
11 |     Change an iterable object of moves to a list of moves.
12 |     
13 |     Args:
14 |         mainline_moves (Mainline object) iterable list of moves
15 | 
16 |     Returns:
17 |         moves (list of chess.Move) list version of the input moves
18 |     """
19 |     moves = []
20 |     for move in mainline_moves:
21 |         moves.append( move )
22 |     return moves
23 | 
24 | class CCRLDataset( Dataset ):
25 |     """
26 |     Subclass of torch.utils.data.Dataset for the ccrl dataset.
27 |     """
28 | 
29 |     def __init__( self, ccrl_dir ):
30 |         """
31 |         Args:
32 |             ccrl_dir (string) Path to directory containing
33 |                 pgn files with names 0.pgn, 1.pgn, 2.pgn, etc.
34 |         """
35 |         self.ccrl_dir = ccrl_dir
36 |         self.pgn_file_names = os.listdir( ccrl_dir )
37 | 
38 |     def __len__( self ):
39 |         """
40 |         Get length of dataset
41 |         """
42 |         return len( self.pgn_file_names )
43 | 
44 |     def __getitem__( self, idx ):
45 |         """
46 |         Load the game in idx.pgn
47 |         Get a random position, the move made from it, and the winner
48 |         Encode these as numpy arrays
49 |         
50 |         Args:
51 |             idx (int) the index into the dataset.
52 |         
53 |         Returns:
54 |            position (torch.Tensor (16, 8, 8) float32) the encoded position
55 |            policy (torch.Tensor (1) long) the target move's index
56 |            value (torch.Tensor (1) float) the encoded winner of the game
57 |            mask (torch.Tensor (72, 8, 8) int) the legal move mask
58 |         """
59 |         pgn_file_name = self.pgn_file_names[ idx ]
60 |         pgn_file_name = os.path.join( self.ccrl_dir, pgn_file_name )
61 |         pgn_fh = open( pgn_file_name )
62 |         game = chess.pgn.read_game( pgn_fh )
63 | 
64 |         moves = tolist( game.mainline_moves() )
65 | 
66 |         randIdx = int( np.random.random() * ( len( moves ) - 1 ) )
67 | 
68 |         board = game.board()
69 | 
70 |         for idx, move in enumerate( moves ):
71 |             board.push( move )
72 |             if( randIdx == idx ):
73 |                 next_move = moves[ idx + 1 ]
74 |                 break
75 | 
76 |         winner = encoder.parseResult( game.headers[ 'Result' ] )
77 | 
78 |         position, policy, value, mask = encoder.encodeTrainingPoint( board, next_move, winner )
79 |             
80 |         return { 'position': torch.from_numpy( position ),
81 |                  'policy': torch.Tensor( [policy] ).type( dtype=torch.long ),
82 |                  'value': torch.Tensor( [value] ),
83 |                  'mask': torch.from_numpy( mask ) }
84 | 


--------------------------------------------------------------------------------
/MCTS.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import encoder
  3 | import math
  4 | from threading import Thread
  5 | from atomic import AtomicLong
  6 | import time
  7 | 
  8 | def calcUCT( edge, N_p ):
  9 |     """
 10 |     Calculate the UCT formula.
 11 | 
 12 |     Args:
 13 |         edge (Edge) the edge which the UCT formula is for
 14 |         N_p (float) the parents visit count
 15 | 
 16 |     Returns:
 17 |         (float) the calculated value
 18 |     """
 19 | 
 20 |     Q = edge.getQ()
 21 | 
 22 |     N_c = edge.getN()
 23 | 
 24 |     P = edge.getP()
 25 | 
 26 |     #This is a quick fix
 27 |     #when getting nans from nn
 28 |     #if math.isnan( P ):
 29 |     #    P = 0.1
 30 | 
 31 |     C = 1.5
 32 | 
 33 |     UCT = Q + P * C * math.sqrt( N_p ) / ( 1 + N_c )
 34 | 
 35 |     assert not math.isnan( UCT ), 'Q {} N_c {} P {}'.format( Q, N_c, P )
 36 |     
 37 |     return UCT
 38 | 
 39 | class Node:
 40 |     """
 41 |     A node in the search tree.
 42 |     Nodes store their visit count (N), the sum of the
 43 |     win probabilities in the subtree from the point
 44 |     of view of this node (sum_Q), and a list of
 45 |     edges
 46 |     """
 47 | 
 48 |     def __init__( self, board, new_Q, move_probabilities ):
 49 |         """
 50 |         Args:
 51 |             board (chess.Board) the chess board
 52 |             new_Q (float) the probability of winning according to neural network
 53 |             move_probabilities (numpy.array (200) float) probability distribution across move list
 54 |         """
 55 |         self.N = 1.
 56 | 
 57 |         self.sum_Q = new_Q
 58 | 
 59 |         self.edges = []
 60 | 
 61 |         for idx, move in enumerate( board.legal_moves ):
 62 |             edge = Edge( move, move_probabilities[ idx ] )
 63 |             self.edges.append( edge )
 64 | 
 65 |     def getN( self ):
 66 |         """
 67 |         Returns:
 68 |             (float) the number of rollouts performed
 69 |         """
 70 | 
 71 |         return self.N
 72 |     
 73 |     def getQ( self ):
 74 |         """
 75 |         Returns:
 76 |             (float) the number of rollouts performed
 77 |         """
 78 | 
 79 |         return self.sum_Q / self.N
 80 | 
 81 |     def UCTSelect( self ):
 82 |         """
 83 |         Get the edge that maximizes the UCT formula, or none
 84 |         if this node is terminal.
 85 |         Returns:
 86 |             max_edge (Edge) the edge maximizing the UCT formula.
 87 |         """
 88 | 
 89 |         max_uct = -1000.
 90 |         max_edge = None
 91 | 
 92 |         for edge in self.edges:
 93 | 
 94 |             uct = calcUCT( edge, self.N )
 95 | 
 96 |             if max_uct < uct:
 97 |                 max_uct = uct
 98 |                 max_edge = edge
 99 | 
100 |         assert not ( max_edge == None and not self.isTerminal() )
101 | 
102 |         return max_edge
103 |     
104 |     def maxNSelect( self ):
105 |         """
106 |         Returns:
107 |             max_edge (Edge) the edge with maximum N.
108 |         """
109 | 
110 |         max_N = -1
111 |         max_edge = None
112 | 
113 |         for edge in self.edges:
114 | 
115 |             N = edge.getN()
116 | 
117 |             if max_N < N:
118 |                 max_N = N
119 |                 max_edge = edge
120 | 
121 |         return max_edge
122 | 
123 |     def getStatisticsString( self ):
124 |         """
125 |         Get a string containing the current search statistics.
126 |         Returns:
127 |             string (string) a string describing all the moves from this node
128 |         """
129 | 
130 |         string = '|{: ^10}|{: ^10}|{: ^10}|{: ^10}|{: ^10}|\n'.format(
131 |                 'move', 'P', 'N', 'Q', 'UCT' )
132 | 
133 |         edges = self.edges.copy()
134 | 
135 |         edges.sort( key=lambda edge: edge.getN() )
136 | 
137 |         edges.reverse()
138 | 
139 |         for edge in edges:
140 | 
141 |             move = edge.getMove()
142 | 
143 |             P = edge.getP()
144 | 
145 |             N = edge.getN()
146 | 
147 |             Q = edge.getQ()
148 | 
149 |             UCT = calcUCT( edge, self.N )
150 | 
151 |             string += '|{: ^10}|{:10.4f}|{:10.4f}|{:10.4f}|{:10.4f}|\n'.format(
152 |                 str( move ), P, N, Q, UCT )
153 | 
154 |         return string
155 | 
156 |     def isTerminal( self ):
157 |         """
158 |         Checks if this node is terminal.'
159 |         """
160 |         return len( self.edges ) == 0
161 | 
162 | class Edge:
163 |     """
164 |     An edge in the search tree.
165 |     Each edge stores a move, a move probability,
166 |     virtual losses and a child.
167 |     """
168 | 
169 |     def __init__( self, move, move_probability ):
170 |         """
171 |         Args:
172 |             move (chess.Move) the move this edge represents
173 |             move_probability (float) this move's probability from the neural network
174 |         """
175 | 
176 |         self.move = move
177 | 
178 |         self.P = move_probability
179 | 
180 |         self.child = None
181 |         
182 |         #self.virtualLosses = AtomicLong( 0 )
183 |         self.virtualLosses = 0.
184 | 
185 |     def has_child( self ):
186 |         """
187 |         Returns:
188 |             (bool) whether this edge has a child
189 |         """
190 | 
191 |         return self.child != None
192 | 
193 |     def getN( self ):
194 |         """
195 |         Returns:
196 |             (int) the child's N
197 |         """
198 | 
199 |         if self.has_child():
200 |             return self.child.N + self.virtualLosses
201 |         else:
202 |             return 0. + self.virtualLosses
203 | 
204 |     def getQ( self ):
205 |         """
206 |         Returns:
207 |             (int) the child's Q
208 |         """
209 |         if self.has_child():
210 |             return 1. - ( ( self.child.sum_Q + self.virtualLosses ) / ( self.child.N + self.virtualLosses ) )
211 |         else:
212 |             return 0.
213 | 
214 |     def getP( self ):
215 |         """
216 |         Returns:
217 |             (int) this move's probability (P)
218 |         """
219 | 
220 |         return self.P
221 | 
222 |     def expand( self, board, new_Q, move_probabilities ):
223 |         """
224 |         Create the child node with the given board position. Return
225 |         True if we are expanding an unexpanded node, and otherwise false.
226 |         Args:
227 |             board (chess.Board) the chess position
228 |             new_Q (float) the probability of winning according to the neural network
229 |             move_probabilities (numpy.array (200) float) the move probabilities according to the neural network
230 | 
231 |         Returns:
232 |             (bool) whether we are expanding an unexpanded node
233 |         """
234 | 
235 |         if self.child == None:
236 | 
237 |             self.child = Node( board, new_Q, move_probabilities )
238 | 
239 |             return True
240 | 
241 |         else:
242 | 
243 |             return False
244 | 
245 |     def getChild( self ):
246 |         """
247 |         Returns:
248 |             (Node) this edge's child node
249 |         """
250 | 
251 |         return self.child
252 | 
253 |     def getMove( self ):
254 |         """
255 |         Returns:
256 |             (chess.Move) this edge's move
257 |         """
258 | 
259 |         return self.move
260 | 
261 |     def addVirtualLoss( self ):
262 |         """
263 |         When doing multiple rollouts in parallel,
264 |         we can discourage threads from taking
265 |         the same path by adding fake losses
266 |         to visited nodes.
267 |         """
268 | 
269 |         self.virtualLosses += 1
270 | 
271 |     def clearVirtualLoss( self ):
272 | 
273 |         #self.virtualLosses = AtomicLong( 0 )
274 |         self.virtualLosses = 0.
275 |    
276 | class Root( Node ):
277 | 
278 |     def __init__( self, board, neuralNetwork ):
279 |         """
280 |         Create the root of the search tree.
281 | 
282 |         Args:
283 |             board (chess.Board) the chess position
284 |             neuralNetwork (torch.nn.Module) the neural network
285 | 
286 |         """
287 |         value, move_probabilities = encoder.callNeuralNetwork( board, neuralNetwork )
288 | 
289 |         Q = value / 2. + 0.5
290 | 
291 |         super().__init__( board, Q, move_probabilities )
292 | 
293 |         self.same_paths = 0
294 | 
295 |     def selectTask( self, board, node_path, edge_path ):
296 |         """
297 |         Do the selection stage of MCTS.
298 | 
299 |         Args/Returns:
300 |             board (chess.Board) the root position on input,
301 |                 on return, either the positon of the selected unexpanded node,
302 |                 or the last node visited, if that is terminal
303 |             node_path (list of Node) ordered list of nodes traversed
304 |             edge_path (list of Edge) ordered list of edges traversed
305 |         """
306 | 
307 |         cNode = self
308 | 
309 |         while True:
310 | 
311 |             node_path.append( cNode )
312 | 
313 |             cEdge = cNode.UCTSelect()
314 | 
315 |             edge_path.append( cEdge )
316 | 
317 |             if cEdge == None:
318 | 
319 |                 #cNode is terminal. Return with board set to the same position as cNode
320 |                 #and edge_path[ -1 ] = None
321 | 
322 |                 assert cNode.isTerminal()
323 | 
324 |                 break
325 |             
326 |             cEdge.addVirtualLoss()
327 | 
328 |             board.push( cEdge.getMove() )
329 | 
330 |             if not cEdge.has_child():
331 | 
332 |                 #cEdge has not been expanded. Return with board set to the same
333 |                 #position as the unexpanded Node
334 | 
335 |                 break
336 | 
337 |             cNode = cEdge.getChild()
338 | 
339 |     def rollout( self, board, neuralNetwork ):
340 |         """
341 |         Each rollout traverses the tree until
342 |         it reaches an un-expanded node or a terminal node.
343 |         Unexpanded nodes are expanded and their
344 |         win probability propagated.
345 |         Terminal nodes have their win probability
346 |         propagated as well.
347 | 
348 |         Args:
349 |             board (chess.Board) the chess position
350 |             neuralNetwork (torch.nn.Module) the neural network
351 |         """
352 |         
353 |         node_path = []
354 |         edge_path = []
355 | 
356 |         self.selectTask( board, node_path, edge_path )
357 | 
358 |         edge = edge_path[ -1 ]
359 | 
360 |         if edge != None:
361 |             value, move_probabilities = encoder.callNeuralNetwork( board, neuralNetwork )
362 | 
363 |             new_Q = value / 2. + 0.5
364 | 
365 |             edge.expand( board, new_Q, move_probabilities )
366 | 
367 |             new_Q = 1. - new_Q
368 | 
369 |         else:
370 |             winner = encoder.parseResult( board.result() )
371 | 
372 |             if not board.turn:
373 |                 winner *= -1
374 | 
375 |             new_Q = float( winner ) / 2. + 0.5
376 | 
377 |         last_node_idx = len( node_path ) - 1
378 |             
379 |         for i in range( last_node_idx, -1, -1 ):
380 | 
381 |             node = node_path[ i ]
382 | 
383 |             node.N += 1
384 | 
385 |             if ( last_node_idx - i ) % 2 == 0:
386 | 
387 |                 node.sum_Q += new_Q
388 | 
389 |             else:
390 |                 
391 |                 node.sum_Q += 1. - new_Q
392 | 
393 |         for edge in edge_paths[ i ]:
394 |                 
395 |             if edge != None:
396 |                edge.clearVirtualLoss()
397 | 
398 | 
399 |     def parallelRollouts( self, board, neuralNetwork, num_parallel_rollouts ):
400 |         """
401 |         Same as rollout, except done in parallel.
402 | 
403 |         Args:
404 |             board (chess.Board) the chess position
405 |             neuralNetwork (torch.nn.Module) the neural network
406 |             num_parallel_rollouts (int) the number of rollouts done in parallel
407 |         """
408 | 
409 |         boards = []
410 |         node_paths = []
411 |         edge_paths = []
412 |         threads = []
413 | 
414 |         for i in range( num_parallel_rollouts ):
415 |             boards.append( board.copy() )
416 |             node_paths.append( [] )
417 |             edge_paths.append( [] )
418 |             threads.append( Thread( target=self.selectTask,
419 |                     args=( boards[ i ], node_paths[ i ], edge_paths[ i ] ) ) )
420 |             threads[ i ].start()
421 |             time.sleep( 0.0001 )
422 | 
423 |         for i in range( num_parallel_rollouts ):
424 |             threads[ i ].join()
425 | 
426 |         values, move_probabilities = encoder.callNeuralNetworkBatched( boards, neuralNetwork )
427 | 
428 |         for i in range( num_parallel_rollouts ):
429 |             edge = edge_paths[ i ][ -1 ]
430 |             board = boards[ i ]
431 |             value = values[ i ]
432 |             if edge != None:
433 |                 
434 |                 new_Q = value / 2. + 0.5
435 |                 
436 |                 isunexpanded = edge.expand( board, new_Q,
437 |                         move_probabilities[ i ] )
438 | 
439 |                 if not isunexpanded:
440 |                     self.same_paths += 1
441 | 
442 |                 new_Q = 1. - new_Q
443 |                 
444 |             else:
445 |                 winner = encoder.parseResult( board.result() )
446 | 
447 |                 if not board.turn:
448 |                     winner *= -1
449 | 
450 |                 new_Q = float( winner ) / 2. + 0.5
451 | 
452 |             last_node_idx = len( node_paths[ i ] ) - 1
453 |             
454 |             for r in range( last_node_idx, -1, -1 ):
455 |                
456 |                 node = node_paths[ i ][ r ]
457 | 
458 |                 node.N += 1.
459 | 
460 |                 if ( last_node_idx - r ) % 2 == 0:
461 | 
462 |                     node.sum_Q += new_Q
463 | 
464 |                 else:
465 |                     
466 |                     node.sum_Q += 1. - new_Q
467 | 
468 |             for edge in edge_paths[ i ]:
469 |                 
470 |                 if edge != None:
471 |                     edge.clearVirtualLoss()
472 | 
473 | 
474 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # A chess engine based on the AlphaZero algorithm
 3 | 
 4 | This is a pytorch implementation of Google Deep Mind's AlphaZero algorithm for chess.
 5 | 
 6 | ## Live
 7 | 
 8 | [Play against it here](http://ec2-54-175-18-115.compute-1.amazonaws.com/index.html)
 9 | 
10 | ## Dependencies
11 | 
12 | Standard python libraries.
13 | 
14 | ## Running the chess engine
15 | 
16 | The entry point to the chess engine is the python file playchess.py. Good parameters for strong, long-thinking moves would be:
17 | ```
18 | python3 playchess.py --model weights/AlphaZeroNet_20x256.pt --verbose --rollouts 1000 --threads 10 --mode h
19 | ```
20 | The current position is displayed with an ascii chess board. Enter your moves in long algebraic notation. Note that running the engine requires a weights file.  
21 | 
22 | ## Training script
23 | 
24 | Download the [CCRL Dataset](https://lczero.org/blog/2018/09/a-standard-dataset/), reformat it using `reformat.py`and run `train.py`.
25 | 
26 | ## About the algorithm
27 | 
28 | The algorithm is based on [this paper](https://arxiv.org/pdf/1712.01815.pdf). One very important difference between the algorithm used here and the one described in that paper is that this implementation used supervised learning instead of reinforcement learning. Doing reienforcement learning is very computationally intensive. As said in that paper, it took thousands of TPUs to generate the self play games. This program, on the other hand, trains on the [CCRL Dataset](https://lczero.org/blog/2018/09/a-standard-dataset/), which contains 2.5 million top notch chess games. Because each game has around 80 unique positions in it, this yields about 200 million data points for training on. 
29 | 
30 | ## Strength
31 | 
32 | I have only tested it a few times against the computers on [lichess](https://lichess.org/) and [chess.com](https://www.chess.com). It was able to draw, but had to think for about a minute while its opponent only got a few seconds. Compared to most chess engines which use the Alpha-Beta algorithm and a hand written evaluation function, it evaluates a lot fewer positions every second. Most top programs consider about 45 million chess positions every second, whereas this programs considers about 300, even with GPU and multicore speedups. 
33 | 
34 | ## Video
35 | * [Drawing Chess.com AI](https://youtu.be/zHTBfBq5PXY)
36 | * [Training](https://youtu.be/IMUqCLswa3s)
37 | 


--------------------------------------------------------------------------------
/encoder.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import chess
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | cuda = False
  7 | 
  8 | def parseResult( result ):
  9 |     """
 10 |     Map the result string to an int in {-1, 0, 1}
 11 |     for black won, draw, and white won respectively.
 12 |     
 13 |     Args:
 14 |         result (string) string representation of the winner of a game
 15 |     
 16 |     Returns:
 17 |         (int) integer representing the winner
 18 |     """
 19 |     if result == "1-0":
 20 |         return 1
 21 |     elif result == "1/2-1/2":
 22 |         return 0
 23 |     elif result == "0-1":
 24 |         return -1
 25 |     else:
 26 |         raise Exception( "Unexpected result string {}. Exiting".format( result ) )
 27 | 
 28 | def encodePosition( board ):
 29 |     """
 30 |     Encodes a chess position as a vector. The first 12 planes represent
 31 |     the different pieces. The next 4 represent castling rights.
 32 |     
 33 |     Args:
 34 |         board (chess.Board) the position to be encoded.
 35 | 
 36 |     Returns:
 37 |         planes (numpy.array (16,8,8) float32) the array encoding this position
 38 |     """
 39 |     planes = np.zeros( (16, 8, 8), dtype=np.float32 )
 40 | 
 41 |     #white pawns
 42 |     wPawns = board.pieces( chess.PAWN, chess.WHITE )
 43 |     wPawns = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wPawns ]
 44 |     for r, f in wPawns:
 45 |         planes[ 0, r, f ] = 1.
 46 |     
 47 |     #black pawns
 48 |     bPawns = board.pieces( chess.PAWN, chess.BLACK )
 49 |     bPawns = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bPawns ]
 50 |     for r, f in bPawns:
 51 |         planes[ 1, r, f ] = 1.
 52 |     
 53 |     #white rooks
 54 |     wRooks = board.pieces( chess.ROOK, chess.WHITE )
 55 |     wRooks = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wRooks ]
 56 |     for r, f in wRooks:
 57 |         planes[ 2, r, f ] = 1.
 58 |     
 59 |     #black rooks
 60 |     bRooks = board.pieces( chess.ROOK, chess.BLACK )
 61 |     bRooks = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bRooks ]
 62 |     for r, f in bRooks:
 63 |         planes[ 3, r, f ] = 1.
 64 |     
 65 |     #white bishops
 66 |     wBishops = board.pieces( chess.BISHOP, chess.WHITE )
 67 |     wBishops = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wBishops ]
 68 |     for r, f in wBishops:
 69 |         planes[ 4, r, f ] = 1.
 70 |     
 71 |     #black bishops
 72 |     bBishops = board.pieces( chess.BISHOP, chess.BLACK )
 73 |     bBishops = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bBishops ]
 74 |     for r, f in bBishops:
 75 |         planes[ 5, r, f ] = 1.
 76 |     
 77 |     #white knights
 78 |     wKnights = board.pieces( chess.KNIGHT, chess.WHITE )
 79 |     wKnights = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wKnights ]
 80 |     for r, f in wKnights:
 81 |         planes[ 6, r, f ] = 1.
 82 |     
 83 |     #black knights
 84 |     bKnights = board.pieces( chess.KNIGHT, chess.BLACK )
 85 |     bKnights = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bKnights ]
 86 |     for r, f in bKnights:
 87 |         planes[ 7, r, f ] = 1.
 88 |     
 89 |     #white queens
 90 |     wQueens = board.pieces( chess.QUEEN, chess.WHITE )
 91 |     wQueens = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wQueens ]
 92 |     for r, f in wQueens:
 93 |         planes[ 8, r, f ] = 1.
 94 |     
 95 |     #black queens
 96 |     bQueens = board.pieces( chess.QUEEN, chess.BLACK )
 97 |     bQueens = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bQueens ]
 98 |     for r, f in bQueens:
 99 |         planes[ 9, r, f ] = 1.
100 |     
101 |     #white kings
102 |     wKings = board.pieces( chess.KING, chess.WHITE )
103 |     wKings = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in wKings ]
104 |     for r, f in wKings:
105 |         planes[ 10, r, f ] = 1.
106 |     
107 |     #black kings
108 |     bKings = board.pieces( chess.KING, chess.BLACK )
109 |     bKings = [ (chess.square_rank( sq ), chess.square_file( sq ) ) for sq in bKings ]
110 |     for r, f in bKings:
111 |         planes[ 11, r, f ] = 1.
112 | 
113 |     #white can kingside castle
114 |     if board.has_kingside_castling_rights( chess.WHITE ):
115 |         planes[ 12, :, : ] = 1.
116 |    
117 |     #black can kingside castle
118 |     if board.has_kingside_castling_rights( chess.BLACK ):
119 |         planes[ 13, :, : ] = 1.
120 |     
121 |     #white can queenside castle
122 |     if board.has_queenside_castling_rights( chess.WHITE ):
123 |         planes[ 14, :, : ] = 1.
124 |    
125 |     #black can queenside castle
126 |     if board.has_queenside_castling_rights( chess.BLACK ):
127 |         planes[ 15, :, : ] = 1.
128 | 
129 |     return planes
130 | 
131 | def moveToIdx( move ):
132 |     """
133 |     Maps a legal move to an index in (72, 8, 8)
134 |     Each of the 72 planes represents a different direction
135 |     and distance: rook and bishop directions with distance (64 planes)
136 |     and 8 horse directions.
137 |     The location in the plane specifies the start square.
138 |     
139 |     Args:
140 |         move (chess.Move) the move to be encoded.
141 | 
142 |     Returns:
143 |         directionAndDistancePlane (int) the plane the move maps to
144 |         from_rank (int) the moves starting rank
145 |         from_file (int) the moves starting file
146 |     """
147 | 
148 |     from_rank = chess.square_rank( move.from_square )
149 |     from_file = chess.square_file( move.from_square )
150 |     
151 |     to_rank = chess.square_rank( move.to_square )
152 |     to_file = chess.square_file( move.to_square )
153 | 
154 |     if from_rank == to_rank and from_file < to_file:
155 |         directionPlane = 0
156 |         distance = to_file - from_file
157 |         directionAndDistancePlane = directionPlane + distance
158 |     elif from_rank == to_rank and from_file > to_file:
159 |         directionPlane = 8
160 |         distance = from_file - to_file
161 |         directionAndDistancePlane = directionPlane + distance
162 |     elif from_file == to_file and from_rank < to_rank:
163 |         directionPlane = 16
164 |         distance = to_rank - from_rank
165 |         directionAndDistancePlane = directionPlane + distance
166 |     elif from_file == to_file and from_rank > to_rank:
167 |         directionPlane = 24
168 |         distance = from_rank - to_rank
169 |         directionAndDistancePlane = directionPlane + distance
170 |     elif to_file - from_file == to_rank - from_rank and to_file - from_file > 0:
171 |         directionPlane = 32
172 |         distance = to_rank - from_rank
173 |         directionAndDistancePlane = directionPlane + distance
174 |     elif to_file - from_file == to_rank - from_rank and to_file - from_file < 0:
175 |         directionPlane = 40
176 |         distance = from_rank - to_rank
177 |         directionAndDistancePlane = directionPlane + distance
178 |     elif to_file - from_file == -(to_rank - from_rank) and to_file - from_file > 0:
179 |         directionPlane = 48
180 |         distance = to_file - from_file
181 |         directionAndDistancePlane = directionPlane + distance
182 |     elif to_file - from_file == -(to_rank - from_rank) and to_file - from_file < 0:
183 |         directionPlane = 56
184 |         distance = from_file - to_file
185 |         directionAndDistancePlane = directionPlane + distance
186 |     elif to_file - from_file == 1 and to_rank - from_rank == 2:
187 |         directionAndDistancePlane = 64
188 |     elif to_file - from_file == 2 and to_rank - from_rank == 1:
189 |         directionAndDistancePlane = 65
190 |     elif to_file - from_file == 2 and to_rank - from_rank == -1:
191 |         directionAndDistancePlane = 66
192 |     elif to_file - from_file == 1 and to_rank - from_rank == -2:
193 |         directionAndDistancePlane = 67
194 |     elif to_file - from_file == -1 and to_rank - from_rank == 2:
195 |         directionAndDistancePlane = 68
196 |     elif to_file - from_file == -2 and to_rank - from_rank == 1:
197 |         directionAndDistancePlane = 69
198 |     elif to_file - from_file == -2 and to_rank - from_rank == -1:
199 |         directionAndDistancePlane = 70
200 |     elif to_file - from_file == -1 and to_rank - from_rank == -2:
201 |         directionAndDistancePlane = 71
202 | 
203 |     return directionAndDistancePlane, from_rank, from_file
204 | 
205 | def getLegalMoveMask( board ):
206 |     """
207 |     Returns a mask encoding the legal moves.
208 |     
209 |     Args:
210 |         board (chess.Board) the chess position.
211 | 
212 |     Returns:
213 |         mask (numpy.array (72, 8, 8) int32) the legal move mask
214 |     """
215 |     mask = np.zeros( (72, 8, 8), dtype=np.int32 )
216 |     
217 |     for move in board.legal_moves:
218 |         planeIdx, rankIdx, fileIdx = moveToIdx( move )
219 |         mask[ planeIdx, rankIdx, fileIdx ] = 1
220 | 
221 |     return mask
222 | 
223 | def mirrorMove( move ):
224 |     """
225 |     Mirrors a move vertically.
226 | 
227 |     Args:
228 |         move (chess.Move) the move to be flipped
229 | 
230 |     Returns:
231 |         (chess.Move) the mirrored move
232 |     """
233 | 
234 |     from_square = move.from_square
235 |     to_square = move.to_square
236 | 
237 |     new_from_square = chess.square_mirror( from_square )
238 |     
239 |     new_to_square = chess.square_mirror( to_square )
240 | 
241 |     return chess.Move( new_from_square, new_to_square )
242 | 
243 | def encodeTrainingPoint( board, move, winner ):
244 |     """
245 |     Encodes a position, move, and winner as vectors.
246 |     
247 |     Args:
248 |         board (chess.Board) the chess position.
249 |         move (chess.Move) the target move from this position
250 |         winner (int) the winner of the game. -1 means black won,
251 |             0 means draw, 1 means white won.
252 | 
253 |     Returns:
254 |         positionPlanes (numpy.array shape=(16,8,8) dtype=float32) the encoded position
255 |         moveIdx (int) index of the encoded target move
256 |         winner (float) the winner of the game 
257 |         mask (numpy.array (72, 8, 8) int32) the legal move mask
258 |     """
259 | 
260 |     #Flip everything if black's turn
261 |     if not board.turn:
262 |         board = board.mirror()
263 |         winner *= -1
264 |         move = mirrorMove( move )
265 | 
266 |     positionPlanes = encodePosition( board )
267 | 
268 |     planeIdx, rankIdx, fileIdx = moveToIdx( move )
269 | 
270 |     moveIdx = planeIdx * 64 + rankIdx * 8 + fileIdx
271 | 
272 |     mask = getLegalMoveMask( board )
273 | 
274 |     return positionPlanes, moveIdx, float( winner ), mask
275 | 
276 | def encodePositionForInference( board ):
277 |     """
278 |     Encodes a position as a vector.
279 |     
280 |     Args:
281 |         board (chess.Board) the chess position.
282 | 
283 |     Returns:
284 |         positionPlanes (numpy.array shape=(16,8,8) dtype=float32) the encoded position
285 |         mask (numpy.array (72, 8, 8) int32) the legal move mask
286 |     """
287 | 
288 |     #Flip if black's turn
289 |     if not board.turn:
290 |         board = board.mirror()
291 | 
292 |     positionPlanes = encodePosition( board )
293 | 
294 |     mask = getLegalMoveMask( board )
295 | 
296 |     return positionPlanes, mask
297 | 
298 | def decodePolicyOutput( board, policy ):
299 |     """
300 |     Decode the policy output from the neural network.
301 | 
302 |     Args:
303 |         board (chess.Board) the board
304 |         policy (numpy.array) the policy output
305 | 
306 |     """
307 | 
308 |     move_probabilities = np.zeros( 200, dtype=np.float32 )
309 | 
310 |     num_moves = 0
311 | 
312 |     for idx, move in enumerate( board.legal_moves ):
313 |         if not board.turn:
314 |             move = mirrorMove( move )
315 |         planeIdx, rankIdx, fileIdx = moveToIdx( move )
316 |         moveIdx = planeIdx * 64 + rankIdx * 8 + fileIdx
317 |         move_probabilities[ idx ] = policy[ moveIdx ]
318 |         num_moves += 1
319 | 
320 |     return move_probabilities[ :num_moves ]
321 | 
322 | def callNeuralNetwork( board, neuralNetwork ):
323 |     """
324 |     Call the neural network on the given position,
325 |     get the outputs.
326 | 
327 |     Args:
328 |         board (chess.Board) the chess board
329 |         neuralNetwork (torch.nn.Module) the neural network
330 | 
331 |     Returns:
332 |         value (float) the value of this position
333 |         move_probabilities (numpy.array (num_moves) float) the move probabilities
334 |     """
335 | 
336 |     position, mask = encodePositionForInference( board )
337 | 
338 |     position = torch.from_numpy( position )[ None, ... ]
339 |         
340 |     mask = torch.from_numpy( mask )[ None, ... ]
341 | 
342 |     if cuda:
343 |         position = position.cuda()
344 |         mask = mask.cuda()
345 | 
346 |     value, policy = neuralNetwork( position, policyMask=mask )
347 |     
348 |     value = value.cpu().numpy()[ 0, 0 ]
349 | 
350 |     policy = policy.cpu().numpy()[ 0 ]
351 | 
352 |     move_probabilities = decodePolicyOutput( board, policy )
353 | 
354 |     return value, move_probabilities
355 | 
356 | def callNeuralNetworkBatched( boards, neuralNetwork ):
357 |     """
358 |     Run neural network on each board given. Return outputs.
359 | 
360 |     Args:
361 |         boards (list of chess.Board) the input positions
362 |         neuralNetwork (torch.nn.Module) the neural network
363 | 
364 |     Returns:
365 |         value (numpy.array (num_inputs) float) the value output for each input position
366 |         move_probabilities (numpy.array (num_inputs, 200) float) the move probabilities for each position
367 |     """
368 | 
369 |     num_inputs = len( boards )
370 | 
371 |     inputs = torch.zeros( (num_inputs, 16, 8, 8), dtype=torch.float32 )
372 |     
373 |     masks = torch.zeros( (num_inputs, 72, 8, 8), dtype=torch.float32 )
374 | 
375 |     for i in range( num_inputs ):
376 |     
377 |         position, mask = encodePositionForInference( boards[ i ] )
378 | 
379 |         inputs[ i ] = torch.from_numpy( position )
380 | 
381 |         masks[ i ] = torch.from_numpy( mask )
382 | 
383 |     if cuda:
384 |         inputs = inputs.cuda()
385 |         masks = masks.cuda()
386 | 
387 |     value, policy = neuralNetwork( inputs, policyMask=masks )
388 |  
389 |     move_probabilities = np.zeros( ( num_inputs, 200 ), dtype=np.float32 )
390 | 
391 |     value = value.cpu().numpy().reshape( (num_inputs) )
392 | 
393 |     policy = policy.cpu().numpy()
394 | 
395 |     for i in range( num_inputs ):
396 | 
397 |         move_probabilities_tmp = decodePolicyOutput( boards[ i ], policy[ i ] )
398 | 
399 |         move_probabilities[ i, : move_probabilities_tmp.shape[0] ] = move_probabilities_tmp
400 | 
401 |     return value, move_probabilities
402 | 
403 | 


--------------------------------------------------------------------------------
/playchess.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import argparse
  3 | import chess
  4 | import MCTS
  5 | import torch
  6 | import AlphaZeroNetwork
  7 | import time
  8 | 
  9 | def tolist( move_generator ):
 10 |     """
 11 |     Change an iterable object of moves to a list of moves.
 12 |     
 13 |     Args:
 14 |         move_generator (Mainline object) iterable list of moves
 15 | 
 16 |     Returns:
 17 |         moves (list of chess.Move) list version of the input moves
 18 |     """
 19 |     moves = []
 20 |     for move in move_generator:
 21 |         moves.append( move )
 22 |     return moves
 23 | 
 24 | def main( modelFile, mode, color, num_rollouts, num_threads, fen, verbose ):
 25 |     
 26 |     #prepare neural network
 27 |     alphaZeroNet = AlphaZeroNetwork.AlphaZeroNet( 20, 256 )
 28 | 
 29 |     #toggle for cpu/gpu
 30 |     cuda = False
 31 |     if cuda:
 32 |         weights = torch.load( modelFile )
 33 |     else:
 34 |         weights = torch.load( modelFile, map_location=torch.device('cpu') )
 35 | 
 36 |     alphaZeroNet.load_state_dict( weights )
 37 | 
 38 |     if cuda:
 39 |         alphaZeroNet = alphaZeroNet.cuda()
 40 | 
 41 |     for param in alphaZeroNet.parameters():
 42 |         param.requires_grad = False
 43 | 
 44 |     alphaZeroNet.eval()
 45 |    
 46 |     #create chess board object
 47 |     if fen:
 48 |         board = chess.Board( fen )
 49 |     else:
 50 |         board = chess.Board()
 51 | 
 52 |     #play chess moves
 53 |     while True:
 54 | 
 55 |         if board.is_game_over():
 56 |             #If the game is over, output the winner and wait for user input to continue
 57 |             print( 'Game over. Winner: {}'.format( board.result() ) )
 58 |             board.reset_board()
 59 |             c = input( 'Enter any key to continue ' )
 60 | 
 61 |         #Print the current state of the board
 62 |         if board.turn:
 63 |             print( 'White\'s turn' )
 64 |         else:
 65 |             print( 'Black\'s turn' )
 66 |         print( board )
 67 | 
 68 |         if mode == 'h' and board.turn == color:
 69 |             #If we are in human mode and it is the humans turn, play the move specified from stdin
 70 |             move_list = tolist( board.legal_moves )
 71 | 
 72 |             idx = -1
 73 | 
 74 |             while not (0 <= idx and idx < len( move_list ) ):
 75 |             
 76 |                 string = input( 'Choose a move ' )
 77 | 
 78 |                 for i, move in enumerate( move_list ):
 79 |                     if str( move ) == string:
 80 |                         idx = i
 81 |                         break
 82 |             
 83 |             board.push( move_list[ idx ] )
 84 | 
 85 |         else:
 86 |             #In all other cases the AI selects the next move
 87 |             
 88 |             starttime = time.perf_counter()
 89 | 
 90 |             with torch.no_grad():
 91 | 
 92 |                 root = MCTS.Root( board, alphaZeroNet )
 93 |             
 94 |                 for i in range( num_rollouts ):
 95 |                     root.parallelRollouts( board.copy(), alphaZeroNet, num_threads )
 96 | 
 97 |             endtime = time.perf_counter()
 98 | 
 99 |             elapsed = endtime - starttime
100 | 
101 |             Q = root.getQ()
102 | 
103 |             N = root.getN()
104 | 
105 |             nps = N / elapsed
106 | 
107 |             same_paths = root.same_paths
108 |        
109 |             if verbose:
110 |                 #In verbose mode, print some statistics
111 |                 print( root.getStatisticsString() )
112 |                 print( 'total rollouts {} Q {:0.3f} duplicate paths {} elapsed {:0.2f} nps {:0.2f}'.format( int( N ), Q, same_paths, elapsed, nps ) )
113 |      
114 |             edge = root.maxNSelect()
115 | 
116 |             bestmove = edge.getMove()
117 | 
118 |             print( 'best move {}'.format( str( bestmove ) ) )
119 |         
120 |             board.push( bestmove )
121 | 
122 |         if mode == 'p':
123 |             #In profile mode, exit after the first move
124 |             break
125 | 
126 | def parseColor( colorString ):
127 |     """
128 |     Maps 'w' to True and 'b' to False.
129 | 
130 |     Args:
131 |         colorString (string) a string representing white or black
132 | 
133 |     """
134 | 
135 |     if colorString == 'w' or colorString == 'W':
136 |         return True
137 |     elif colorString == 'b' or colorString == 'B':
138 |         return False
139 |     else:
140 |         print( 'Unrecognized argument for color' )
141 |         exit()
142 | 
143 | if __name__=='__main__':
144 |     parser = argparse.ArgumentParser(usage='Play chess against the computer or watch self play games.')
145 |     parser.add_argument( '--model', help='Path to model (.pt) file.' )
146 |     parser.add_argument( '--mode', help='Operation mode: \'s\' self play, \'p\' profile, \'h\' human' )
147 |     parser.add_argument( '--color', help='Your color w or b' )
148 |     parser.add_argument( '--rollouts', type=int, help='The number of rollouts on computers turn' )
149 |     parser.add_argument( '--threads', type=int, help='Number of threads used per rollout' )
150 |     parser.add_argument( '--verbose', help='Print search statistics', action='store_true' )
151 |     parser.add_argument( '--fen', help='Starting fen' )
152 |     parser.set_defaults( verbose=False, mode='p', color='w', rollouts=10, threads=1 )
153 |     parser = parser.parse_args()
154 | 
155 |     main( parser.model, parser.mode, parseColor( parser.color ), parser.rollouts, parser.threads, parser.fen, parser.verbose )
156 | 
157 | 


--------------------------------------------------------------------------------
/reactFrontent/index.js:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | class ChessBoard extends React.Component {
  4 | 
  5 | 	constructor(props){
  6 | 		super(props)
  7 | 
  8 | 		this.state = { pieces: [
  9 | 			["R", "N", "B", "Q", "K", "B", "N", "R"],
 10 | 			["P", "P", "P", "P", "P", "P", "P", "P"],
 11 | 			["1", "1", "1", "1", "1", "1", "1", "1"], 
 12 | 			["1", "1", "1", "1", "1", "1", "1", "1"], 
 13 | 			["1", "1", "1", "1", "1", "1", "1", "1"], 
 14 | 			["1", "1", "1", "1", "1", "1", "1", "1"], 
 15 | 			["p", "p", "p", "p", "p", "p", "p", "p"],
 16 | 			["r", "n", "b", "q", "k", "b", "n", "r"]
 17 | 		], selected: null, turn: "w" }
 18 | 	}
 19 | 
 20 | 	castleCheck( pieces, fromRank, fromFile, toRank, toFile ){
 21 | 		return (pieces[ fromRank ][fromFile] == 'k' || pieces[fromRank][fromFile] == 'K') && ( ( toFile - fromFile ) % 2 == 0 )
 22 | 	}
 23 | 
 24 | 	getMoveFromAPI(pieces, turn){
 25 | 		let fen = pieces[7].join("") + "/" +
 26 | 			pieces[6].join("") + "/" +
 27 | 			pieces[5].join("") + "/" +
 28 | 			pieces[4].join("") + "/" +
 29 | 			pieces[3].join("") + "/" +
 30 | 			pieces[2].join("") + "/" +
 31 | 			pieces[1].join("") + "/" +
 32 | 			pieces[0].join("") + " " + turn + " KQkq - 0 1"
 33 | 		fen = fen.replace(/\d{2,}/g, function(m) { // get all digit combination, contains more than one digit
 34 | 			return m.split('').reduce(function(sum, v) { // split into individual digit
 35 | 				return sum + Number(v) // parse and add to sum
 36 | 			}, 0) // set initial value as 0 (sum)
 37 | 		})
 38 | 		var data = new FormData()
 39 | 		data.append('fen', fen)
 40 | 		fetch('/AI', {
 41 | 			method: 'POST',
 42 | 			body: data
 43 | 		}).then( (r) => { return r.text() } ).then( (r) => {
 44 | 
 45 | 			//castles
 46 | 			if( r == "e1g1" && pieces[0][4] == 'K' ){
 47 | 				pieces[0][4] = '1';
 48 | 				pieces[0][5] = 'R';
 49 | 				pieces[0][6] = 'K';
 50 | 				pieces[0][7] = '1';
 51 | 			}else if( r == "e1c1" && pieces[0][4] == 'K' ){
 52 | 				pieces[0][0] = '1';
 53 | 				pieces[0][1] = '1';
 54 | 				pieces[0][2] = 'K';
 55 | 				pieces[0][3] = 'R';
 56 | 				pieces[0][4] = '1';
 57 | 			}else if( r == "e8g8" && pieces[7][4] == 'k' ){
 58 | 				pieces[7][4] = '1';
 59 | 				pieces[7][5] = 'r';
 60 | 				pieces[7][6] = 'k';
 61 | 				pieces[7][7] = '1';
 62 | 			}else if( r == "e8c8" && pieces[7][4] == 'k' ){
 63 | 				pieces[7][0] = '1';
 64 | 				pieces[7][1] = '1';
 65 | 				pieces[7][2] = 'k';
 66 | 				pieces[7][3] = 'r';
 67 | 				pieces[7][4] = '1';
 68 | 			}else{ ///normal
 69 | 				let file1 = r.charCodeAt(0) - 'a'.charCodeAt(0)
 70 | 				let rank1 = r.charCodeAt(1) - '1'.charCodeAt(0)
 71 | 				let file2 = r.charCodeAt(2) - 'a'.charCodeAt(0)
 72 | 				let rank2 = r.charCodeAt(3) - '1'.charCodeAt(0)
 73 | 				pieces[ rank2 ][ file2 ] = pieces[ rank1 ][ file1 ]
 74 | 				pieces[ rank1 ][ file1 ] = "1"
 75 | 			}
 76 | 
 77 | 			let newTurn = turn == "w" ? "b" : "w"
 78 | 			this.setState( {selected: null, pieces: pieces, turn: newTurn } )
 79 | 		})
 80 | 	}
 81 | 
 82 | 	render(){
 83 | 		return <div> {this.state.pieces.slice().reverse().map( (row, rowIdx) => {
 84 | 			return <div key={rowIdx} style={{display: "flex" }} > { row.map( ( piece, colIdx ) => {
 85 | 				return <button key={colIdx} onClick={ (e) => {
 86 | 					sIdx = this.state.selected
 87 | 					pieces = this.state.pieces.slice()
 88 | 					if( sIdx == null || pieces[ sIdx[ 1 ] ][ sIdx[ 0 ] ] == "1" ){
 89 | 						this.setState( {selected: [colIdx, 7 - rowIdx  ] } )
 90 | 					}else{
 91 | 
 92 | 						//castles
 93 | 						if( sIdx[1] == 0 && sIdx[0] == 4 && colIdx == 6 && pieces[0][4] == 'K' ){
 94 | 							pieces[0][4] = '1';
 95 | 							pieces[0][5] = 'R';
 96 | 							pieces[0][6] = 'K';
 97 | 							pieces[0][7] = '1';
 98 | 						}else if( sIdx[1] == 0 && sIdx[0] == 4 && colIdx == 2 && pieces[0][4] == 'K' ){
 99 | 							pieces[0][0] = '1';
100 | 							pieces[0][1] = '1';
101 | 							pieces[0][2] = 'K';
102 | 							pieces[0][3] = 'R';
103 | 							pieces[0][4] = '1';
104 | 						}else if( sIdx[1] == 7 && sIdx[0] == 4 && colIdx == 6 && pieces[7][4] == 'k' ){
105 | 							pieces[7][4] = '1';
106 | 							pieces[7][5] = 'r';
107 | 							pieces[7][6] = 'k';
108 | 							pieces[7][7] = '1';
109 | 						}else if( sIdx[1] == 7 && sIdx[0] == 4 && colIdx == 2 && pieces[7][4] == 'k' ){
110 | 							pieces[7][0] = '1';
111 | 							pieces[7][1] = '1';
112 | 							pieces[7][2] = 'k';
113 | 							pieces[7][3] = 'r';
114 | 							pieces[7][4] = '1';
115 | 						}else{ ///normal
116 | 							pieces[ 7 - rowIdx ][ colIdx ] = pieces[ sIdx[ 1 ] ][ sIdx[ 0 ] ]
117 | 							pieces[ sIdx[ 1 ] ][ sIdx[ 0 ] ] = "1"
118 | 						}
119 | 						let newTurn = this.state.turn == "w" ? "b" : "w"
120 | 						this.setState( {selected: null, pieces: pieces, turn: newTurn } )
121 | 						this.getMoveFromAPI( pieces, newTurn )
122 | 					}
123 | 				}}
124 | 				style={{outline: "none", border: "none", width: "100px", height: "100px", background: (rowIdx + colIdx) % 2 == 0 ? "gainsboro" : "darkGrey" }} >
125 | 					{piece != "1" &&(<img style={{width: "50px", height: "50px"}} src={"/pngs/" + piece + ".png"} alt={""} />)}
126 | 					</button>
127 | 			} ) } </div>
128 | 		} ) }</div>
129 | 	}
130 | }
131 | 
132 | ReactDOM.createRoot(document.getElementById('root')).render(
133 | 	<div style={{width: "100%", display: "flex", justifyContent: "center"}}>
134 | 		<ChessBoard /> 
135 | 	</div>);
136 | 


--------------------------------------------------------------------------------
/reformat.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import chess.pgn
 3 | import os
 4 | from threading import Thread
 5 | import time
 6 | 
 7 | #re-writes all the files into the new directory, using a unique thread id and an index for a new name
 8 | def reformat_games( file_names, new_dir, thread_idx ):
 9 |     file_name_idx = 0
10 |     #Iterate over all file names
11 |     while len( file_names ) > 0:
12 |         pgn_fh = open( file_names.pop() )
13 |         #iterate over all games in file
14 |         while True:
15 |             game = chess.pgn.read_game( pgn_fh )
16 |             if not game:
17 |                 break
18 |             print( game, file=open( os.path.join( new_dir, '{}_{}.pgn'.format( thread_idx, file_name_idx ) ), 'w' ), end='\n\n' )
19 |             file_name_idx += 1
20 |             if file_name_idx % 1000 == 0:
21 |                 print( 'Thread {} wrote {} train pngs'.format( thread_idx, file_name_idx ) )
22 | 
23 | #get all pgns
24 | ccrl_dir = '/home/ubuntu/pytorch-alpha-zero/cclr-data/cclr/train'
25 | all_file_names = os.listdir( ccrl_dir )
26 | for i in range( len( all_file_names ) ):
27 |     all_file_names[ i ] = os.path.join( ccrl_dir, all_file_names[ i ] ) 
28 | 
29 | #the new dir
30 | reformat_dir = '/home/ubuntu/pytorch-alpha-zero/cclr-data/train'
31 | 
32 | #launch some threads
33 | threads = []
34 | num_threads = 50
35 | for i in range( num_threads ):
36 |     files_per_thread = int( len( all_file_names ) / num_threads )
37 |     threads.append( Thread( target=reformat_games,
38 |         args=( all_file_names[ i * files_per_thread : (i + 1) * files_per_thread ],
39 |             reformat_dir, i ) ) )
40 |     threads[ i ].start()
41 |     time.sleep( 0.0001 )
42 | 
43 | for i in range( num_threads ):
44 |     threads[ i ].join()
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from flask import request
 3 | import chess
 4 | import encoder
 5 | import torch
 6 | import AlphaZeroNetwork
 7 | from flask import send_from_directory
 8 | app = Flask(__name__, static_url_path='')
 9 | 
10 | @app.route('/<path:path>')
11 | def send_static(path):
12 |     return send_from_directory('static', path)
13 | 
14 | modelFile = "weights/AlphaZeroNet_20x256.pt"
15 | #toggle for cpu/gpu
16 | cuda = False
17 | if cuda:
18 |     weights = torch.load( modelFile )
19 | else:
20 |     weights = torch.load( modelFile, map_location=torch.device('cpu') )
21 | 
22 | @app.route('/AI', methods=['POST'] )
23 | def AI():
24 |     #prepare neural network
25 |     alphaZeroNet = AlphaZeroNetwork.AlphaZeroNet( 20, 256 )
26 |     alphaZeroNet.load_state_dict( weights )
27 |     if cuda:
28 |         alphaZeroNet = alphaZeroNet.cuda()
29 |     for param in alphaZeroNet.parameters():
30 |         param.requires_grad = False
31 |     alphaZeroNet.eval()
32 | 
33 |     fen = request.form['fen' ] 
34 |     board = chess.Board( fen )
35 |     with torch.no_grad():
36 |         value, move_probabilities = encoder.callNeuralNetwork( board, alphaZeroNet )
37 |         maxP = -1
38 |         maxMove = None
39 |         for idx, move in enumerate( board.legal_moves ):
40 |             if( move_probabilities[ idx ] > maxP ):
41 |                 maxP = move_probabilities[ idx ]
42 |                 maxMove = move
43 |         return maxMove.uci()
44 | 
45 | if __name__ == '__main__':
46 |     app.run(port=80, host="0.0.0.0", threaded=True)
47 | 


--------------------------------------------------------------------------------
/static/index.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <html>
 3 |     <head>
 4 | 	<!-- Load React. -->
 5 |   	<!-- Note: when deploying, replace "development.js" with "production.min.js". -->
 6 |   	<script src="https://unpkg.com/react@18/umd/react.production.min.js" crossorigin></script>
 7 |   	<script src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js" crossorigin></script>
 8 |     </head>
 9 |     <body>
10 |         <div id="root"></div>
11 |         <script src="index.js"></script>
12 |     </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/static/index.js:
--------------------------------------------------------------------------------
  1 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
  2 | 
  3 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
  4 | 
  5 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
  6 | 
  7 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
  8 | 
  9 | var ChessBoard = function (_React$Component) {
 10 | 	_inherits(ChessBoard, _React$Component);
 11 | 
 12 | 	function ChessBoard(props) {
 13 | 		_classCallCheck(this, ChessBoard);
 14 | 
 15 | 		var _this = _possibleConstructorReturn(this, (ChessBoard.__proto__ || Object.getPrototypeOf(ChessBoard)).call(this, props));
 16 | 
 17 | 		_this.state = { pieces: [["R", "N", "B", "Q", "K", "B", "N", "R"], ["P", "P", "P", "P", "P", "P", "P", "P"], ["1", "1", "1", "1", "1", "1", "1", "1"], ["1", "1", "1", "1", "1", "1", "1", "1"], ["1", "1", "1", "1", "1", "1", "1", "1"], ["1", "1", "1", "1", "1", "1", "1", "1"], ["p", "p", "p", "p", "p", "p", "p", "p"], ["r", "n", "b", "q", "k", "b", "n", "r"]], selected: null, turn: "w" };
 18 | 		return _this;
 19 | 	}
 20 | 
 21 | 	_createClass(ChessBoard, [{
 22 | 		key: "castleCheck",
 23 | 		value: function castleCheck(pieces, fromRank, fromFile, toRank, toFile) {
 24 | 			return (pieces[fromRank][fromFile] == 'k' || pieces[fromRank][fromFile] == 'K') && (toFile - fromFile) % 2 == 0;
 25 | 		}
 26 | 	}, {
 27 | 		key: "getMoveFromAPI",
 28 | 		value: function getMoveFromAPI(pieces, turn) {
 29 | 			var _this2 = this;
 30 | 
 31 | 			var fen = pieces[7].join("") + "/" + pieces[6].join("") + "/" + pieces[5].join("") + "/" + pieces[4].join("") + "/" + pieces[3].join("") + "/" + pieces[2].join("") + "/" + pieces[1].join("") + "/" + pieces[0].join("") + " " + turn + " KQkq - 0 1";
 32 | 			fen = fen.replace(/\d{2,}/g, function (m) {
 33 | 				// get all digit combination, contains more than one digit
 34 | 				return m.split('').reduce(function (sum, v) {
 35 | 					// split into individual digit
 36 | 					return sum + Number(v); // parse and add to sum
 37 | 				}, 0); // set initial value as 0 (sum)
 38 | 			});
 39 | 			var data = new FormData();
 40 | 			data.append('fen', fen);
 41 | 			fetch('/AI', {
 42 | 				method: 'POST',
 43 | 				body: data
 44 | 			}).then(function (r) {
 45 | 				return r.text();
 46 | 			}).then(function (r) {
 47 | 
 48 | 				//castles
 49 | 				if (r == "e1g1" && pieces[0][4] == 'K') {
 50 | 					pieces[0][4] = '1';
 51 | 					pieces[0][5] = 'R';
 52 | 					pieces[0][6] = 'K';
 53 | 					pieces[0][7] = '1';
 54 | 				} else if (r == "e1c1" && pieces[0][4] == 'K') {
 55 | 					pieces[0][0] = '1';
 56 | 					pieces[0][1] = '1';
 57 | 					pieces[0][2] = 'K';
 58 | 					pieces[0][3] = 'R';
 59 | 					pieces[0][4] = '1';
 60 | 				} else if (r == "e8g8" && pieces[7][4] == 'k') {
 61 | 					pieces[7][4] = '1';
 62 | 					pieces[7][5] = 'r';
 63 | 					pieces[7][6] = 'k';
 64 | 					pieces[7][7] = '1';
 65 | 				} else if (r == "e8c8" && pieces[7][4] == 'k') {
 66 | 					pieces[7][0] = '1';
 67 | 					pieces[7][1] = '1';
 68 | 					pieces[7][2] = 'k';
 69 | 					pieces[7][3] = 'r';
 70 | 					pieces[7][4] = '1';
 71 | 				} else {
 72 | 					///normal
 73 | 					var file1 = r.charCodeAt(0) - 'a'.charCodeAt(0);
 74 | 					var rank1 = r.charCodeAt(1) - '1'.charCodeAt(0);
 75 | 					var file2 = r.charCodeAt(2) - 'a'.charCodeAt(0);
 76 | 					var rank2 = r.charCodeAt(3) - '1'.charCodeAt(0);
 77 | 					pieces[rank2][file2] = pieces[rank1][file1];
 78 | 					pieces[rank1][file1] = "1";
 79 | 				}
 80 | 
 81 | 				var newTurn = turn == "w" ? "b" : "w";
 82 | 				_this2.setState({ selected: null, pieces: pieces, turn: newTurn });
 83 | 			});
 84 | 		}
 85 | 	}, {
 86 | 		key: "render",
 87 | 		value: function render() {
 88 | 			var _this3 = this;
 89 | 
 90 | 			return React.createElement(
 91 | 				"div",
 92 | 				null,
 93 | 				" ",
 94 | 				this.state.pieces.slice().reverse().map(function (row, rowIdx) {
 95 | 					return React.createElement(
 96 | 						"div",
 97 | 						{ key: rowIdx, style: { display: "flex" } },
 98 | 						" ",
 99 | 						row.map(function (piece, colIdx) {
100 | 							return React.createElement(
101 | 								"button",
102 | 								{ key: colIdx, onClick: function onClick(e) {
103 | 										sIdx = _this3.state.selected;
104 | 										pieces = _this3.state.pieces.slice();
105 | 										if (sIdx == null || pieces[sIdx[1]][sIdx[0]] == "1") {
106 | 											_this3.setState({ selected: [colIdx, 7 - rowIdx] });
107 | 										} else {
108 | 
109 | 											//castles
110 | 											if (sIdx[1] == 0 && sIdx[0] == 4 && colIdx == 6 && pieces[0][4] == 'K') {
111 | 												pieces[0][4] = '1';
112 | 												pieces[0][5] = 'R';
113 | 												pieces[0][6] = 'K';
114 | 												pieces[0][7] = '1';
115 | 											} else if (sIdx[1] == 0 && sIdx[0] == 4 && colIdx == 2 && pieces[0][4] == 'K') {
116 | 												pieces[0][0] = '1';
117 | 												pieces[0][1] = '1';
118 | 												pieces[0][2] = 'K';
119 | 												pieces[0][3] = 'R';
120 | 												pieces[0][4] = '1';
121 | 											} else if (sIdx[1] == 7 && sIdx[0] == 4 && colIdx == 6 && pieces[7][4] == 'k') {
122 | 												pieces[7][4] = '1';
123 | 												pieces[7][5] = 'r';
124 | 												pieces[7][6] = 'k';
125 | 												pieces[7][7] = '1';
126 | 											} else if (sIdx[1] == 7 && sIdx[0] == 4 && colIdx == 2 && pieces[7][4] == 'k') {
127 | 												pieces[7][0] = '1';
128 | 												pieces[7][1] = '1';
129 | 												pieces[7][2] = 'k';
130 | 												pieces[7][3] = 'r';
131 | 												pieces[7][4] = '1';
132 | 											} else {
133 | 												///normal
134 | 												pieces[7 - rowIdx][colIdx] = pieces[sIdx[1]][sIdx[0]];
135 | 												pieces[sIdx[1]][sIdx[0]] = "1";
136 | 											}
137 | 											var newTurn = _this3.state.turn == "w" ? "b" : "w";
138 | 											_this3.setState({ selected: null, pieces: pieces, turn: newTurn });
139 | 											_this3.getMoveFromAPI(pieces, newTurn);
140 | 										}
141 | 									},
142 | 									style: { outline: "none", border: "none", width: "100px", height: "100px", background: (rowIdx + colIdx) % 2 == 0 ? "gainsboro" : "darkGrey" } },
143 | 								piece != "1" && React.createElement("img", { style: { width: "50px", height: "50px" }, src: "/pngs/" + piece + ".png", alt: "" })
144 | 							);
145 | 						}),
146 | 						" "
147 | 					);
148 | 				})
149 | 			);
150 | 		}
151 | 	}]);
152 | 
153 | 	return ChessBoard;
154 | }(React.Component);
155 | 
156 | ReactDOM.createRoot(document.getElementById('root')).render(React.createElement(
157 | 	"div",
158 | 	{ style: { width: "100%", display: "flex", justifyContent: "center" } },
159 | 	React.createElement(ChessBoard, null)
160 | ));


--------------------------------------------------------------------------------
/static/pngs/B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/B.png


--------------------------------------------------------------------------------
/static/pngs/K.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/K.png


--------------------------------------------------------------------------------
/static/pngs/N.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/N.png


--------------------------------------------------------------------------------
/static/pngs/P.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/P.png


--------------------------------------------------------------------------------
/static/pngs/Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/Q.png


--------------------------------------------------------------------------------
/static/pngs/R.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/R.png


--------------------------------------------------------------------------------
/static/pngs/b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/b.png


--------------------------------------------------------------------------------
/static/pngs/k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/k.png


--------------------------------------------------------------------------------
/static/pngs/n.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/n.png


--------------------------------------------------------------------------------
/static/pngs/p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/p.png


--------------------------------------------------------------------------------
/static/pngs/q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/q.png


--------------------------------------------------------------------------------
/static/pngs/r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/static/pngs/r.png


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import torch
 4 | import torch.optim as optim
 5 | import torch.nn as nn
 6 | from torch.utils.data import DataLoader
 7 | from CCRLDataset import CCRLDataset
 8 | from AlphaZeroNetwork import AlphaZeroNet
 9 | 
10 | #Training params
11 | num_epochs = 40
12 | num_blocks = 10
13 | num_filters = 128
14 | ccrl_dir = '/home/ubuntu/pytorch-alpha-zero/ccrl/reformated'
15 | logmode=True
16 | cuda=False
17 | 
18 | def train():
19 |     train_ds = CCRLDataset( ccrl_dir )
20 |     train_loader = DataLoader( train_ds, batch_size=256, shuffle=True, num_workers=48 )
21 | 
22 |     if cuda:
23 |         alphaZeroNet = AlphaZeroNet( num_blocks, num_filters ).cuda()
24 |     else:
25 |         alphaZeroNet = AlphaZeroNet( num_blocks, num_filters )
26 |     optimizer = optim.Adam( alphaZeroNet.parameters() )
27 |     mseLoss = nn.MSELoss()
28 | 
29 |     print( 'Starting training' )
30 | 
31 |     for epoch in range( num_epochs ):
32 |         
33 |         alphaZeroNet.train()
34 |         for iter_num, data in enumerate( train_loader ):
35 | 
36 |             optimizer.zero_grad()
37 | 
38 |             if cuda:
39 |                 position = data[ 'position' ].cuda()
40 |                 valueTarget = data[ 'value' ].cuda()
41 |                 policyTarget = data[ 'policy' ].cuda()
42 |             else:
43 |                 position = data[ 'position' ]
44 |                 valueTarget = data[ 'value' ]
45 |                 policyTarget = data[ 'policy' ]
46 | 
47 |             # You can manually examine some the training data here
48 | 
49 |             valueLoss, policyLoss = alphaZeroNet( position, valueTarget=valueTarget,
50 |                                  policyTarget=policyTarget )
51 | 
52 |             loss = valueLoss + policyLoss
53 | 
54 |             loss.backward()
55 | 
56 |             optimizer.step()
57 | 
58 |             message = 'Epoch {:03} | Step {:05} / {:05} | Value loss {:0.5f} | Policy loss {:0.5f}'.format(
59 |                      epoch, iter_num, len( train_loader ), float( valueLoss ), float( policyLoss ) )
60 |             
61 |             if iter_num != 0 and not logmode:
62 |                 print( ('\b' * len(message) ), end='' )
63 |             print( message, end='', flush=True )
64 |             if logmode:
65 |                 print('')
66 |         
67 |         print( '' )
68 |         
69 |         networkFileName = 'AlphaZeroNet_{}x{}.pt'.format( num_blocks, num_filters ) 
70 | 
71 |         torch.save( alphaZeroNet.state_dict(), networkFileName )
72 | 
73 |         print( 'Saved model to {}'.format( networkFileName ) )
74 | 
75 | if __name__ == '__main__':
76 | 
77 |     train()
78 | 


--------------------------------------------------------------------------------
/weights/AlphaZeroNet_10x128.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/weights/AlphaZeroNet_10x128.pt


--------------------------------------------------------------------------------
/weights/AlphaZeroNet_20x256.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/weights/AlphaZeroNet_20x256.pt


--------------------------------------------------------------------------------
/weights/Screen Shot 2022-11-22 at 6.38.54 AM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdawkins11/pytorch-alpha-zero/a9f4936fa6c8d28b0259fc57aebe1af49eda6f04/weights/Screen Shot 2022-11-22 at 6.38.54 AM.png


--------------------------------------------------------------------------------