├── .gitignore
├── README.md
├── __init__.py
├── architectures
    ├── __init__.py
    ├── tree_tf_op.py
    ├── tree_tf_op_kfac.py
    └── tree_tf_op_multi.py
├── bp_tree.py
├── build.sh
├── build_centos.sh
├── cuda_includes.h
├── cuda_op_kernel.cc
├── cuda_op_kernel.cu.cc
├── global_vars.py
├── gnu_go_test.py
├── includes.h
├── kernels
    ├── create_batch.cu
    ├── init_op.cu
    ├── init_state.cu
    ├── max_prob_to_coord_valid_mvs.cu
    ├── move_random_ai.cu
    ├── move_unit.cu
    ├── prob_to_coord.cu
    ├── prob_to_coord_valid_mvs.cu
    ├── return_state.cu
    ├── return_winner.cu
    ├── session_backup.cu.cc
    ├── vars.cu.cc
    └── verify_integrity.cu
├── models
    ├── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001
    ├── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index
    └── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta
├── net_vs_gnugo.py
├── notebooks
    ├── go_black.png
    ├── go_blank.png
    ├── go_pieces.png
    ├── go_white.png
    └── training_visualizations.ipynb
├── play_network_gui.py
├── py_util
    ├── __init__.py
    ├── _py_util.c
    ├── add_valid_mvs.c
    ├── backup_visit.c
    ├── build.sh
    ├── build_centos.sh
    ├── choose_moves.c
    ├── includes.h
    ├── init_tree.c
    ├── prune_tree.c
    ├── py_util.py
    ├── py_util_dyn.py
    ├── register_mv.c
    ├── return_probs_map.c
    ├── return_tree.c
    ├── rotate_reflect_imgs.c
    └── session_backup.c
├── run.sh
├── vars.cc
├── vars_class_return.cc
└── vars_class_set.cc


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ipynb
 2 | notebooks/.ipynb_checkpoints/*
 3 | models/*
 4 | *.prof
 5 | p.py
 6 | *.o
 7 | *.swn
 8 | *.lprof
 9 | *.so
10 | *.npy
11 | *.pyc
12 | *.swp
13 | *.swo
14 | *.exp
15 | *.lib
16 | *.pyd
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # alpha_go_zero_implementation
2 | An implementation of the Alpha Go Zero algorithm, runnable on a single GPU, released into the public domain.
3 | I provide a semi-detailed overview along with instructions for compiling and running at http://arcanefortune.com/alpha_go_initial.php
4 | An updated discussion of the code can be found at: http://arcanefortune.com/alpha_go_update.php
5 | 
6 | See "build.sh" and "build_centos.sh" for examples on how to compile the code. It can vary slightly with each platform. "build_centos8.sh" works
7 | on Centos 8 as of Jan-23-2020. See the links above for more details on the setups I've compiled on.
8 | 
9 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/__init__.py


--------------------------------------------------------------------------------
/architectures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/architectures/__init__.py


--------------------------------------------------------------------------------
/architectures/tree_tf_op.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import sys
  4 | sys.path.append("..")
  5 | import global_vars as gv
  6 | import os
  7 | 
  8 | DEVICE = '/gpu:1'
  9 | 
 10 | hdir = os.getenv('HOME')
 11 | 
 12 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels]
 13 | map_prod = np.prod(gv.map_sz)
 14 | 
 15 | with tf.device(DEVICE):
 16 | 	sess = tf.InteractiveSession()
 17 | 	tf_op = tf.load_op_library('cuda_op_kernel.so')
 18 | 	
 19 | 	##################### set / load vars
 20 | 	set_var_int32 = tf.placeholder(tf.int32, shape=[None])
 21 | 	set_var_int8 = tf.placeholder(tf.int8, shape=[None])
 22 | 
 23 | gm_var_nms = ['board', 'valid_mv_map_internal']
 24 | 
 25 | gm_var_placeholders = ['set_var_int8']*2
 26 | 
 27 | gm_vars = {}; set_gm_vars = {}
 28 | 
 29 | def return_vars():
 30 | 	v = {}
 31 | 	for var in gm_var_nms:
 32 | 		exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var))
 33 | 	return v
 34 | 
 35 | def set_vars(v):
 36 | 	for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
 37 | 		exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var))
 38 | 
 39 | ########################
 40 | 
 41 | def tf_pearsonr(val, val_target_nmean):
 42 | 	val_nmean = val - tf.reduce_mean(val)
 43 | 	val_target_nmean = val_target - tf.reduce_mean(val_target)
 44 | 	
 45 | 	val_std = tf.sqrt(tf.reduce_sum(val_nmean**2))
 46 | 	val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2))
 47 | 
 48 | 	return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std)
 49 | 
 50 | 
 51 | def init_model(N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \
 52 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, WEIGHT_STD=1e-2):
 53 | 	
 54 | 	global convs, weights, outputs, output_nms, pol, pol_pre, pol_mean_sq_err, train_step
 55 | 	global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 
 56 | 	global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map
 57 | 	global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit
 58 |         global tree_to_coords, nn_max_to_coords, nn_prob_to_coords
 59 | 	global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit
 60 | 	global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord
 61 | 	global sess, imgs, valid_mv_map, pol_target, val_target, moving_player
 62 | 	global gm_vars, set_gm_vars, oFC1, session_restore, session_backup
 63 | 	global winner, dir_pre, dir_a
 64 | 	global games_running, score, n_captures, pol_cross_entrop_err
 65 | 	global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs
 66 | 	global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs
 67 | 	assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES)
 68 | 
 69 | 	with tf.device(DEVICE):
 70 | 		#### init state
 71 | 		init_state = tf_op.init_state()
 72 | 
 73 | 		dir_pre = tf.placeholder(tf.float32, shape=())
 74 | 		dir_a = tf.placeholder(tf.float32, shape=())
 75 | 
 76 | 		moving_player = tf.placeholder(tf.int32, shape=())
 77 | 		winner, score, n_captures = tf_op.return_winner(moving_player)
 78 | 
 79 | 		games_running = tf.ones(gv.BATCH_SZ, dtype=tf.int8)
 80 | 
 81 | 		session_restore = tf_op.session_restore()
 82 | 		session_backup = tf_op.session_backup()
 83 | 
 84 | 		##### vars
 85 | 		for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
 86 | 			exec('gm_vars["%s"] = tf_op.%s()' % (var, var))
 87 | 			exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder))
 88 | 
 89 | 		#### imgs
 90 | 		imgs, valid_mv_map = tf_op.create_batch(moving_player)
 91 | 		#print imgs.shape, imgs_shape
 92 | 		assert imgs.shape == tf.placeholder(tf.float32, shape=imgs_shape).shape, 'tf op shape not matching global_vars'
 93 | 		move_random_ai = tf_op.move_random_ai(moving_player)
 94 | 
 95 | 		global move_frm_inputs, to_coords_input
 96 | 		to_coords_input = tf.placeholder(tf.int32, shape=gv.BATCH_SZ)
 97 | 		move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player)
 98 | 
 99 | 		####
100 | 		pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod])
101 | 		val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ])
102 | 		
103 | 		convs = []; weights = []; outputs = []; output_nms = []
104 | 		
105 | 		convs += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 
106 | 			strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0',
107 | 			kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA),
108 | 			bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA))))]
109 | 
110 | 		weights += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv0')[0]]
111 | 		outputs += [convs[-1]]
112 | 		output_nms += ['conv0']
113 | 
114 | 		for i in range(1, len(N_FILTERS)):
115 | 			output_nms += ['conv' + str(i)]
116 | 			
117 | 			conv_out = tf.contrib.layers.batch_norm(\
118 | 				tf.layers.conv2d(inputs=convs[i-1], filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2,
119 | 					strides=[STRIDES[i]]*2, padding="same", activation=None, name=output_nms[-1],
120 | 					kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA),
121 | 					bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)))
122 | 			
123 | 			# residual bypass
124 | 			if (i % 2) == 0:
125 | 				conv_out += convs[i-2]
126 | 
127 | 			convs += [tf.nn.relu(conv_out)]
128 | 
129 | 			weights += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, output_nms[-1])[0]]
130 | 			outputs += [convs[-1]]
131 | 
132 | 		out_sz = np.int(np.prod(convs[-1].shape[1:]))
133 | 		convr = tf.reshape(convs[-1], [gv.BATCH_SZ, out_sz])
134 | 
135 | 		################### pol
136 | 		# FC layer
137 | 		wFC1p = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD), name='wFC1')
138 | 		bFC1p = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='bFC1')
139 | 		
140 | 		oFC1p = tf.nn.relu(tf.matmul(convr, wFC1p) + bFC1p)
141 | 
142 | 		weights += [wFC1p]
143 | 		outputs += [oFC1p]
144 | 		output_nms += ['oFC1p']
145 | 
146 | 		# FC layer
147 | 		wFC2p = tf.Variable(tf.random_normal([N_FC1, map_prod], stddev=WEIGHT_STD), name='wFC2')
148 | 		bFC2p = tf.Variable(tf.random_normal([map_prod], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='bFC2')
149 | 		
150 | 		pol_pre = tf.nn.relu(tf.matmul(oFC1p, wFC2p) + bFC2p)
151 | 
152 | 		weights += [wFC2p]
153 | 		outputs += [pol_pre]
154 | 		output_nms += ['pol_pre']
155 | 		
156 | 		pol = tf.nn.softmax(pol_pre)
157 | 		outputs += [pol]
158 | 		output_nms += ['pol']
159 | 		
160 | 		nn_max_to_coords = tf.argmax(pol_pre, 1, output_type=tf.int32)
161 | 		nn_prob_to_coords = tf_op.prob_to_coord(pol, dir_pre, dir_a) 
162 | 		nn_prob_to_coords_valid_mvs = tf_op.prob_to_coord_valid_mvs(pol)
163 | 		nn_max_prob_to_coords_valid_mvs = tf_op.max_prob_to_coord_valid_mvs(pol)
164 | 
165 | 		# move unit
166 | 		nn_max_move_unit = tf_op.move_unit(nn_max_to_coords, moving_player)
167 | 		nn_prob_move_unit = tf_op.move_unit(nn_prob_to_coords, moving_player)
168 | 		nn_prob_move_unit_valid_mvs = tf_op.move_unit(nn_prob_to_coords_valid_mvs, moving_player)
169 | 		nn_max_prob_move_unit_valid_mvs = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs, moving_player)
170 | 
171 | 		# sq
172 | 		sq_err = tf.reduce_sum((pol - pol_target)**2, axis=1)
173 | 		pol_mean_sq_err = tf.reduce_mean(sq_err)
174 | 
175 | 		# sq reg
176 | 		sq_err_reg = tf.reduce_sum(pol_pre**2, axis=1)
177 | 		pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg)
178 | 
179 | 		# cross entrop
180 | 		pol_ln = tf.log(pol)
181 | 		pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln)
182 | 
183 | 		################# val
184 | 		# FC layer
185 | 		wFC1v = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD), name='val_wFC1')
186 | 		bFC1v = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='val_bFC1')
187 | 		
188 | 		#oFC1v = tf.nn.relu(tf.matmul(convr, wFC1v) + bFC1v)
189 | 		oFC1v = tf.matmul(convr, wFC1v) + bFC1v
190 | 
191 | 		weights += [wFC1v]
192 | 		outputs += [oFC1v]
193 | 		output_nms += ['oFC1v']
194 | 		
195 | 		# FC layer
196 | 		wFC2v = tf.Variable(tf.random_normal([N_FC1, 1], stddev=WEIGHT_STD), name='val')
197 | 		bFC2v = tf.Variable(tf.random_normal([1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='val')
198 | 		
199 | 		val = tf.tanh(tf.squeeze(tf.matmul(oFC1v, wFC2v) + bFC2v))
200 | 
201 | 		weights += [wFC2v]
202 | 		outputs += [val]
203 | 		output_nms += ['val']
204 | 
205 | 		# sq error
206 | 		val_mean_sq_err = tf.reduce_mean((val - val_target)**2)
207 | 
208 | 		# pearson
209 | 		val_pearsonr = tf_pearsonr(val, val_target)
210 | 
211 | 		########## FC l2 reg
212 | 		FC_L2_reg = 0
213 | 		for weights in [wFC1v, wFC2v, bFC1v, bFC2v, wFC1p, wFC2p, bFC1p, bFC2p]:
214 | 			FC_L2_reg += tf.reduce_sum(weights**2)
215 | 		FC_L2_reg *= (L2_LAMBDA/2.)
216 | 
217 | 		################### movement from tree statistics
218 | 		visit_count_map = tf.placeholder(tf.float32, shape=(gv.BATCH_SZ, gv.map_szt))
219 | 		
220 | 		tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map, dir_pre, dir_a)
221 | 		tree_det_visit_coord = tf.argmax(visit_count_map, 1, output_type=tf.int32)
222 | 
223 | 		tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player)
224 | 		tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player)
225 | 
226 | 		################### initialize
227 | 
228 | 		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
229 | 		
230 | 		loss = LSQ_LAMBDA * pol_mean_sq_err + \
231 | 		       LSQ_REG_LAMBDA * pol_mean_sq_reg_err + \
232 | 		       POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \
233 | 		       VAL_LAMBDA * val_mean_sq_err + \
234 | 		       VALR_LAMBDA * val_pearsonr + \
235 | 		       tf.losses.get_regularization_loss() + FC_L2_reg
236 | 
237 | 		with tf.control_dependencies(update_ops):
238 | 			train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss)
239 | 
240 | 		sess.run(tf.global_variables_initializer())
241 | 
242 | 	# saving
243 | 	saver = tf.train.Saver()
244 | 


--------------------------------------------------------------------------------
/architectures/tree_tf_op_kfac.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import sys
  4 | sys.path.append("..")
  5 | import global_vars as gv
  6 | import os
  7 | import kfac
  8 | sess = tf.InteractiveSession()
  9 | 
 10 | hdir = os.getenv('HOME')
 11 | tf_op = tf.load_op_library('cuda_op_kernel.so')
 12 | 
 13 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels]
 14 | map_prod = np.prod(gv.map_sz)
 15 | 
 16 | ##################### set / load vars
 17 | set_var_int32 = tf.placeholder(tf.int32, shape=[None])
 18 | set_var_int8 = tf.placeholder(tf.int8, shape=[None])
 19 | 
 20 | gm_var_nms = ['board', 'valid_mv_map_internal']
 21 | 
 22 | gm_var_placeholders = ['set_var_int8']*2
 23 | 
 24 | gm_vars = {}; set_gm_vars = {}
 25 | 
 26 | def return_vars():
 27 | 	v = {}
 28 | 	for var in gm_var_nms:
 29 | 		exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var))
 30 | 	return v
 31 | 
 32 | def set_vars(v):
 33 | 	for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
 34 | 		exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var))
 35 | 
 36 | ########################
 37 | 
 38 | def tf_pearsonr(val, val_target_nmean):
 39 | 	val_nmean = val - tf.reduce_mean(val)
 40 | 	val_target_nmean = val_target - tf.reduce_mean(val_target)
 41 | 	
 42 | 	val_std = tf.sqrt(tf.reduce_sum(val_nmean**2))
 43 | 	val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2))
 44 | 
 45 | 	return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std)
 46 | 
 47 | 
 48 | def init_model(N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \
 49 | 		POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, WEIGHT_STD=1e-2):
 50 | 	
 51 | 	global convs, weights, outputs, output_nms, pol, pol_pre, pol_mean_sq_err, train_step
 52 | 	global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 
 53 | 	global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map
 54 | 	global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit
 55 |         global tree_to_coords, nn_max_to_coords, nn_prob_to_coords
 56 | 	global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit
 57 | 	global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord
 58 | 	global sess, imgs, valid_mv_map, pol_target, val_target, moving_player
 59 | 	global gm_vars, set_gm_vars, oFC1, session_restore, session_backup
 60 | 	global winner, dir_pre, dir_a
 61 | 	global games_running, score, n_captures, pol_cross_entrop_err
 62 | 	global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs
 63 | 	global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs
 64 | 	assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES)
 65 | 
 66 | 	#### init state
 67 | 	layer_collection = kfac.LayerCollection()
 68 |         init_state = tf_op.init_state()
 69 | 
 70 | 	dir_pre = tf.placeholder(tf.float32, shape=())
 71 | 	dir_a = tf.placeholder(tf.float32, shape=())
 72 | 
 73 | 	moving_player = tf.placeholder(tf.int32, shape=())
 74 | 	winner, score, n_captures = tf_op.return_winner(moving_player)
 75 | 
 76 | 	games_running = tf.ones(gv.BATCH_SZ, dtype=tf.int8)
 77 | 
 78 | 	session_restore = tf_op.session_restore()
 79 | 	session_backup = tf_op.session_backup()
 80 | 
 81 | 	##### vars
 82 | 	for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
 83 | 		exec('gm_vars["%s"] = tf_op.%s()' % (var, var))
 84 | 		exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder))
 85 | 
 86 | 	#### imgs
 87 | 	imgs, valid_mv_map = tf_op.create_batch(moving_player)
 88 | 	#print imgs.shape, imgs_shape
 89 | 	assert imgs.shape == tf.placeholder(tf.float32, shape=imgs_shape).shape, 'tf op shape not matching global_vars'
 90 | 	move_random_ai = tf_op.move_random_ai(moving_player)
 91 | 
 92 | 	global move_frm_inputs, to_coords_input
 93 | 	to_coords_input = tf.placeholder(tf.int32, shape=gv.BATCH_SZ)
 94 | 	move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player)
 95 | 
 96 | 	####
 97 |         pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod])
 98 |         val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ])
 99 |         
100 |         convs = []; weights = []; outputs = []; output_nms = []
101 | 	
102 | 	layer = tf.layers.Conv2D(filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 
103 | 		kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD),
104 | 		strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0')
105 | 	preactivations = layer(imgs)
106 | 	activations = tf.nn.relu(preactivations)
107 | 
108 | 	layer_collection.register_conv2d((layer.kernel, layer.bias), (1,1,1,1), "SAME", imgs, preactivations)
109 | 
110 | 	convs += [activations]
111 | 
112 | 	for i in range(1, len(N_FILTERS)):
113 | 		layer = tf.layers.Conv2D(filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2,
114 | 				kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD),
115 | 				strides=[STRIDES[i]]*2, padding="same", activation=None, name='conv%i' % i)
116 | 		
117 | 		preactivations = layer(convs[i-1])
118 | 
119 | 		layer_collection.register_conv2d((layer.kernel, layer.bias), (1,1,1,1), "SAME", convs[i-1], preactivations)
120 | 
121 | 		# residual bypass
122 | 		if (i % 2) == 0:
123 | 			preactivations += convs[i-2]
124 | 
125 | 		activations = tf.nn.relu(preactivations)
126 | 		convs += [activations]
127 | 
128 | 	out_sz = np.int(np.prod(convs[-1].shape[1:]))
129 | 	convr = tf.reshape(convs[-1], [gv.BATCH_SZ, out_sz])
130 | 
131 | 	################### pol
132 | 	# FC layer
133 | 	layer = tf.layers.Dense(N_FC1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='FC1')
134 | 	preactivations = layer(convr)
135 | 	oFC1p = tf.nn.relu(preactivations)
136 | 
137 | 	layer_collection.register_fully_connected((layer.kernel, layer.bias), convr, preactivations)
138 | 
139 | 	# FC layer
140 | 	layer = tf.layers.Dense(map_prod, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='FC2')
141 | 	preactivations = layer(oFC1p)
142 | 	pol_pre = tf.nn.relu(preactivations)
143 | 
144 | 	layer_collection.register_fully_connected((layer.kernel, layer.bias), oFC1p, preactivations)
145 | 
146 | 	layer_collection.register_categorical_predictive_distribution(pol_pre)
147 | 	
148 | 	pol = tf.nn.softmax(pol_pre)
149 | 		
150 | 	nn_max_to_coords = tf.argmax(pol_pre, 1, output_type=tf.int32)
151 | 	nn_prob_to_coords = tf_op.prob_to_coord(pol, dir_pre, dir_a) 
152 | 	nn_prob_to_coords_valid_mvs = tf_op.prob_to_coord_valid_mvs(pol)
153 | 	nn_max_prob_to_coords_valid_mvs = tf_op.max_prob_to_coord_valid_mvs(pol)
154 | 
155 | 	# move unit
156 | 	nn_max_move_unit = tf_op.move_unit(nn_max_to_coords, moving_player)
157 | 	nn_prob_move_unit = tf_op.move_unit(nn_prob_to_coords, moving_player)
158 | 	nn_prob_move_unit_valid_mvs = tf_op.move_unit(nn_prob_to_coords_valid_mvs, moving_player)
159 | 	nn_max_prob_move_unit_valid_mvs = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs, moving_player)
160 | 
161 | 	# sq
162 | 	sq_err = tf.reduce_sum((pol - pol_target)**2, axis=1)
163 | 	pol_mean_sq_err = tf.reduce_mean(sq_err)
164 | 
165 | 	# sq reg
166 | 	sq_err_reg = tf.reduce_sum(pol_pre**2, axis=1)
167 | 	pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg)
168 | 
169 | 	# cross entrop
170 | 	pol_ln = tf.log(pol)
171 | 	pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln)
172 | 
173 | 	global oFC1v, preactivations
174 | 	################# val
175 | 	# FC layer
176 | 	layer = tf.layers.Dense(N_FC1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='v_FC1')
177 | 	preactivations = layer(convr)
178 | 	oFC1v = preactivations
179 | 
180 | 	layer_collection.register_fully_connected((layer.kernel, layer.bias), convr, preactivations)
181 | 
182 | 	# FC layer
183 | 	layer = tf.layers.Dense(1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='v_FC2')
184 | 	preactivations = layer(oFC1v)
185 | 	val = tf.squeeze(tf.tanh(preactivations))
186 | 
187 | 	layer_collection.register_fully_connected((layer.kernel, layer.bias), oFC1v, preactivations)
188 | 
189 | 	layer_collection.register_normal_predictive_distribution(val, var=1.0)
190 | 
191 | 	# sq error
192 | 	val_mean_sq_err = tf.reduce_mean((val - val_target)**2)
193 | 
194 | 	# pearson
195 | 	val_pearsonr = tf_pearsonr(val, val_target)
196 | 
197 | 	
198 | 	################### movement from tree statistics
199 | 	visit_count_map = tf.placeholder(tf.float32, shape=(gv.BATCH_SZ, gv.map_szt))
200 | 	
201 | 	tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map, dir_pre, dir_a)
202 | 	tree_det_visit_coord = tf.argmax(visit_count_map, 1, output_type=tf.int32)
203 | 
204 | 	tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player)
205 | 	tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player)
206 | 
207 | 	################### initialize
208 | 
209 | 	update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
210 | 	
211 | 	loss = POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \
212 | 	       VAL_LAMBDA * val_mean_sq_err
213 | 
214 | 	params = tf.trainable_variables()
215 | 	grads = tf.gradients(loss, params)
216 |         grad_params = list(zip(grads, params))
217 | 
218 | 	learning_rate = .25
219 | 	damping_lambda = .01
220 | 	moving_avg_decay=.99
221 | 	kfac_norm_constraint = .0001
222 | 	kfac_momentum = .9
223 | 
224 | 	optimizer = kfac.optimizer.KfacOptimizer(layer_collection=layer_collection, damping=damping_lambda,
225 | 		learning_rate=EPS, cov_ema_decay=moving_avg_decay,
226 | 		momentum=kfac_momentum, norm_constraint=kfac_norm_constraint)
227 | 
228 | 	train_step = optimizer.apply_gradients(grad_params)
229 | 
230 | 
231 | 	#with tf.control_dependencies(update_ops):
232 | 	#	#train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss)
233 | 	#	train_step = tf.train.GradientDescentOptimizer(EPS).minimize(loss)
234 | 
235 | 	sess.run(tf.global_variables_initializer())
236 | 
237 | 	# saving
238 | 	saver = tf.train.Saver()
239 | 


--------------------------------------------------------------------------------
/architectures/tree_tf_op_multi.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import sys
  4 | sys.path.append("..")
  5 | import global_vars as gv
  6 | import os
  7 | 
  8 | hdir = os.getenv('HOME')
  9 | 
 10 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels]
 11 | map_prod = np.prod(gv.map_sz)
 12 | 
 13 | gm_var_nms = ['board', 'valid_mv_map_internal']
 14 | gm_var_placeholders = ['set_var_int8']*2
 15 | 
 16 | gm_vars = {}; set_gm_vars = {}
 17 | 
 18 | def return_vars():
 19 | 	v = {}
 20 | 	with tf.device(DEVICE):
 21 | 		for var in gm_var_nms:
 22 | 			exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var))
 23 | 	return v
 24 | 
 25 | def set_vars(v):
 26 | 	with tf.device(DEVICE):
 27 | 		for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
 28 | 			exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var))
 29 | 
 30 | def tf_pearsonr(val, val_target_nmean):
 31 | 	val_nmean = val - tf.reduce_mean(val)
 32 | 	val_target_nmean = val_target - tf.reduce_mean(val_target)
 33 | 	
 34 | 	val_std = tf.sqrt(tf.reduce_sum(val_nmean**2))
 35 | 	val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2))
 36 | 
 37 | 	return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std)
 38 | 
 39 | 
 40 | # the `training` input dictates whether batch norm statistics are updated
 41 | def init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \
 42 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, 
 43 | 		VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, WEIGHT_STD=1e-2, training=True):
 44 | 	
 45 | 	global sess, tf_op, set_var_int32, set_var_int8	
 46 | 	global convs, weights, output_nms, pol, pol_pre, pol_mean_sq_err, train_step
 47 | 	global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 
 48 | 	global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map
 49 | 	global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit
 50 |         global tree_to_coords, nn_max_to_coords, nn_prob_to_coords
 51 | 	global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit
 52 | 	global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord
 53 | 	global sess, imgs, imgs32, valid_mv_map, pol_target, val_target, moving_player
 54 | 	global gm_vars, set_gm_vars, oFC1, session_restore, session_backup
 55 | 	global winner, to_coords_input
 56 | 	global score, n_captures, pol_cross_entrop_err
 57 | 	global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs
 58 | 	global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs
 59 | 	global move_frm_inputs
 60 | 	assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES)
 61 | 
 62 | 	imgs = {}; valid_mv_map = {}
 63 | 	move_random_ai = {}
 64 | 
 65 | 	convs = {}; weights = {}; output_nms = {}
 66 | 	pol = {}; pol_pre = {}; val = {}
 67 | 	nn_max_to_coords = {}; nn_prob_to_coords = {}; nn_prob_to_coords_valid_mvs = {}
 68 | 	nn_max_prob_to_coords_valid_mvs = {}
 69 | 	nn_max_move_unit = {}; nn_prob_move_unit = {}; nn_prob_move_unit_valid_mvs = {}
 70 | 	nn_max_prob_move_unit_valid_mvs = {}
 71 | 
 72 | 	with tf.device(DEVICE):
 73 | 		sess = tf.InteractiveSession()
 74 | 		if DEVICE == '/gpu:0':
 75 | 			tf_op = tf.load_op_library('cuda_op_kernel_75.so')
 76 | 		else:
 77 | 			tf_op = tf.load_op_library('cuda_op_kernel_52.so')
 78 | 
 79 | 		##################### set / load vars
 80 | 		set_var_int32 = tf.placeholder(tf.int32, shape=[None])
 81 | 		set_var_int8 = tf.placeholder(tf.int8, shape=[None])
 82 | 
 83 | 		#### init state
 84 | 		init_state = tf_op.init_state()
 85 | 
 86 | 		moving_player = tf.placeholder(tf.int8, shape=())
 87 | 		winner, score, n_captures = tf_op.return_winner(moving_player)
 88 | 
 89 | 		visit_count_map = tf.placeholder(tf.float16, shape=(gv.BATCH_SZ, gv.map_szt)) # map of visits
 90 | 		to_coords_input = tf.placeholder(tf.int16, shape=gv.BATCH_SZ) # simply the coordinates
 91 | 		
 92 | 		pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod])
 93 | 		val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ])
 94 | 
 95 | 		session_restore = tf_op.session_restore()
 96 | 		session_backup = tf_op.session_backup()
 97 | 
 98 | 		##### vars
 99 | 		for var, placeholder in zip(gm_var_nms, gm_var_placeholders):
100 | 			exec('gm_vars["%s"] = tf_op.%s()' % (var, var))
101 | 			exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder))
102 | 
103 | 		#### imgs
104 | 		imgs, valid_mv_map = tf_op.create_batch(moving_player) # output is float16
105 | 		imgs32 = tf.cast(imgs, tf.float32)
106 | 		assert imgs.shape == tf.placeholder(tf.float16, shape=imgs_shape).shape, 'tf op shape not matching global_vars'
107 | 		move_random_ai = tf_op.move_random_ai(moving_player)
108 | 
109 | 		move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player) # deterministically move from input coordinates
110 | 
111 | 		################### movement from tree statistics (must be supplied--these are placeholders)
112 | 		tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map)
113 | 		tree_det_visit_coord = tf.cast(tf.argmax(visit_count_map, 1, output_type=tf.int32), tf.int16)
114 | 		
115 | 		tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player)
116 | 		tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player)
117 | 		
118 | 		############ specifics of how 3 networks will be initialized (on each card)
119 | 		scopes = ['eval', 'main', 'eval32']
120 | 		dtypes = ['float16', 'float16', 'float32']
121 | 		if training:
122 | 			#trainings = [False, False, True]
123 | 			trainings = [True, True, True]
124 | 		else:
125 | 			trainings = [False, False, False]
126 | 		
127 | 		for s in scopes:
128 | 			convs[s] = []; weights[s] = []; output_nms[s] = []
129 | 
130 | 		################ network (f32 and f16 weights)
131 | 		for s, d, t in zip(scopes, dtypes, trainings):
132 | 			with tf.variable_scope(s):
133 | 				if s == 'eval32':
134 | 					# conv2d: "channels_last (default) corresponds to inputs with shape (batch, height, width, channels)"
135 | 					# https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/layers/Conv2D
136 | 					
137 | 					# batch_norm: "Can be used as a normalizer function for conv2d and fully_connected. The normalization
138 | 					#              is over all but the last dimension if data_format is NHWC (default)"
139 | 					# https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/contrib/layers/batch_norm
140 | 					convs[s] += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs32, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 
141 | 						strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0',
142 | 						kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA),
143 | 						bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t))]
144 | 				else: # float16 models
145 | 					convs[s] += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 
146 | 						strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0',
147 | 						kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA),
148 | 						bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t))]
149 | 
150 | 				weights[s] += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, s + '/conv0')[0]]
151 | 				output_nms[s] += ['conv0']
152 | 				
153 | 				# convolutional layers
154 | 				for i in range(1, len(N_FILTERS)):
155 | 					output_nms[s] += ['conv' + str(i)]
156 | 					
157 | 					conv_out = tf.contrib.layers.batch_norm(\
158 | 						tf.layers.conv2d(inputs=convs[s][i-1], filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2,
159 | 							strides=[STRIDES[i]]*2, padding="same", activation=None, name=output_nms[s][-1],
160 | 							kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA),
161 | 							bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t)
162 | 					
163 | 					# residual bypass
164 | 					if (i % 2) == 0:
165 | 						conv_out += convs[s][i-2]
166 | 
167 | 					convs[s] += [tf.nn.relu(conv_out)]
168 | 
169 | 					weights[s] += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, s + '/' + output_nms[s][-1])[0]]
170 | 
171 | 				out_sz = np.int(np.prod(convs[s][-1].shape[1:]))
172 | 				convr = tf.reshape(convs[s][-1], [gv.BATCH_SZ, out_sz])
173 | 
174 | 				################### policy output head (pol)
175 | 				# FC layer
176 | 				wFC1p = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD, dtype=d), name='wFC1')
177 | 				bFC1p = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='bFC1')
178 | 				
179 | 				oFC1p = tf.nn.relu(tf.matmul(convr, wFC1p) + bFC1p)
180 | 
181 | 				weights[s] += [wFC1p]
182 | 				output_nms[s] += ['oFC1p']
183 | 
184 | 				# FC layer
185 | 				wFC2p = tf.Variable(tf.random_normal([N_FC1, map_prod], stddev=WEIGHT_STD, dtype=d), name='wFC2')
186 | 				bFC2p = tf.Variable(tf.random_normal([map_prod], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='bFC2')
187 | 				
188 | 				pol_pre[s] = tf.nn.relu(tf.matmul(oFC1p, wFC2p) + bFC2p)
189 | 
190 | 				weights[s] += [wFC2p]
191 | 				output_nms[s] += ['pol_pre']
192 | 				
193 | 				pol[s] = tf.nn.softmax(pol_pre[s])
194 | 				output_nms[s] += ['pol']
195 | 				
196 | 				#if s != 'eval32':
197 | 				nn_max_to_coords[s] = tf.cast(tf.argmax(pol_pre[s], 1, output_type=tf.int32), 'int16')
198 | 				if s == 'eval32':
199 | 					pol16 = tf.cast(pol[s], tf.float16)
200 | 					nn_prob_to_coords[s] = tf_op.prob_to_coord(pol16) 
201 | 					nn_prob_to_coords_valid_mvs[s] = tf_op.prob_to_coord_valid_mvs(pol16)
202 | 					nn_max_prob_to_coords_valid_mvs[s] = tf_op.max_prob_to_coord_valid_mvs(pol16)
203 | 
204 | 				else:
205 | 					nn_prob_to_coords[s] = tf_op.prob_to_coord(pol[s]) 
206 | 					nn_prob_to_coords_valid_mvs[s] = tf_op.prob_to_coord_valid_mvs(pol[s])
207 | 					nn_max_prob_to_coords_valid_mvs[s] = tf_op.max_prob_to_coord_valid_mvs(pol[s])
208 | 
209 | 				####### move unit
210 | 				# (these take as input coordinates and return flags indicating if movement was possible for each game)
211 | 				nn_max_move_unit[s] = tf_op.move_unit(nn_max_to_coords[s], moving_player)
212 | 				nn_prob_move_unit[s] = tf_op.move_unit(nn_prob_to_coords[s], moving_player)
213 | 				nn_prob_move_unit_valid_mvs[s] = tf_op.move_unit(nn_prob_to_coords_valid_mvs[s], moving_player)
214 | 				nn_max_prob_move_unit_valid_mvs[s] = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs[s], moving_player)
215 | 				
216 | 				################# value output head (val)
217 | 				# FC layer
218 | 				wFC1v = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD, dtype=d), name='val_wFC1')
219 | 				bFC1v = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='val_bFC1')
220 | 				
221 | 				#oFC1v = tf.nn.relu(tf.matmul(convr, wFC1v) + bFC1v)
222 | 				oFC1v = tf.matmul(convr, wFC1v) + bFC1v
223 | 
224 | 				weights[s] += [wFC1v]
225 | 				output_nms[s] += ['oFC1v']
226 | 				
227 | 				# FC layer
228 | 				wFC2v = tf.Variable(tf.random_normal([N_FC1, 1], stddev=WEIGHT_STD, dtype=d), name='val')
229 | 				bFC2v = tf.Variable(tf.random_normal([1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='val')
230 | 				
231 | 				val[s] = tf.tanh(tf.squeeze(tf.matmul(oFC1v, wFC2v) + bFC2v))
232 | 
233 | 				weights[s] += [wFC2v]
234 | 				output_nms[s] += ['val']
235 | 
236 | 				
237 | 				################### initialize loss
238 | 				if s == 'eval32':
239 | 					########## FC l2 reg
240 | 					FC_L2_reg = 0
241 | 					for t_weights in [wFC1v, wFC2v, bFC1v, bFC2v, wFC1p, wFC2p, bFC1p, bFC2p]:
242 | 						FC_L2_reg += tf.reduce_sum(t_weights**2)
243 | 					FC_L2_reg *= (L2_LAMBDA/2.)
244 | 
245 | 					##### pol
246 | 					# sq
247 | 					sq_err = tf.reduce_sum((pol[s] - pol_target)**2, axis=1)
248 | 					pol_mean_sq_err = tf.reduce_mean(sq_err)
249 | 
250 | 					# sq reg
251 | 					sq_err_reg = tf.reduce_sum(pol_pre[s]**2, axis=1)
252 | 					pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg)
253 | 
254 | 					# cross entrop
255 | 					pol_ln = tf.log(pol[s])
256 | 					pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln)
257 | 					
258 | 					#### val
259 | 					# sq error
260 | 					val_mean_sq_err = tf.reduce_mean((val[s] - val_target)**2)
261 | 
262 | 					# pearson
263 | 					val_pearsonr = tf_pearsonr(val[s], val_target)
264 | 
265 | 					update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=s)
266 | 					
267 | 					loss = LSQ_LAMBDA * pol_mean_sq_err + \
268 | 					       LSQ_REG_LAMBDA * pol_mean_sq_reg_err + \
269 | 					       POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \
270 | 					       VAL_LAMBDA * val_mean_sq_err + \
271 | 					       VALR_LAMBDA * val_pearsonr + \
272 | 					       tf.losses.get_regularization_loss(s) + FC_L2_reg
273 | 
274 | 					with tf.control_dependencies(update_ops):
275 | 						train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss)
276 | 
277 | 				sess.run(tf.global_variables_initializer())
278 | 
279 | 	# saving
280 | 	saver = tf.train.Saver()
281 | 


--------------------------------------------------------------------------------
/bp_tree.py:
--------------------------------------------------------------------------------
  1 | # ------------
  2 | # model copies:
  3 | # ------------
  4 | # eval32: model to run bp on, the model which all others are eventually updated to
  5 | # eval: float16 versions of `eval32`. updated to follow backprop (the `eval32` model)
  6 | # main: older version of `eval` that `eval` model must win against with certainty p < .05
  7 | #       Once the benchmark is reached, `main` is updated to `eval32`.
  8 | #	`main` is used to create all training batches
  9 | 
 10 | import os.path
 11 | import pygame
 12 | import scipy.stats
 13 | import copy
 14 | import random
 15 | import multiprocessing as mp
 16 | import time
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | import global_vars as gv
 20 | from datetime import datetime
 21 | import architectures.tree_tf_op_multi as arch # the tensorflow model definitions
 22 | import py_util.py_util as pu # operates and stores the move branching tree
 23 | import gnu_go_test as gt # playing against gnu go
 24 | from colorama import Fore, Style
 25 | sdir = 'models/' # directory to save and load models
 26 | 
 27 | ################################### configuration: 
 28 | #### load previous model or start from scratch? (set save_nm = None if you want to start from scratch, i.e, create a new model)
 29 | #save_nm = None
 30 | save_nm = 'go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy'
 31 | 
 32 | if True: # run on two gpus
 33 | 	MASTER_WORKER = 0
 34 | 	GPU_LIST = [0,1] # gpu card ids
 35 | else: # run on one gpu only
 36 | 	MASTER_WORKER = 1
 37 | 	GPU_LIST = [1]
 38 | 
 39 | ###### variables to save
 40 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA', 'N_REP_TRAIN',
 41 | 	'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 'N_TURNS', 'CPUCT', 
 42 | 	'N_EVAL_NN_GMS', 'N_EVAL_NN_GNU_GMS', 'N_EVAL_TREE_GMS', 'N_EVAL_TREE_GNU_GMS', 'CHKP_FREQ', 'BUFFER_SZ', 'N_BATCH_SETS_MIN', 'N_BATCH_SETS_BLOCK', 'N_BATCH_SETS_TOTAL',
 43 | 	'save_nm', 'start_time', 'EVAL_FREQ', 'boards', 'scores', 'GATE_THRESH', 'N_GATE_BATCH_SETS']
 44 | 
 45 | training_ex_vars = ['board', 'winner', 'tree_probs', 'batch_set', 'batch_sets_created', 'batch_sets_created_total', 'buffer_loc']
 46 | 
 47 | logs = ['val_mean_sq_err', 'pol_cross_entrop', 'pol_max_pre', 'pol_max', 'val_pearsonr','opt_batch','eval_batch',
 48 | 	'self_eval_win_rate', 'model_promoted', 'self_eval_perc']
 49 | print_logs = ['val_mean_sq_err', 'pol_cross_entrop', 'pol_max', 'val_pearsonr']
 50 | 
 51 | for nm in ['tree', 'nn']:
 52 | 	for suffix in ['', '_gnu']:
 53 | 		for key in ['win', 'n_captures', 'n_captures_opp', 'score', 'n_mvs', 'boards']:
 54 | 			logs += ['%s_%s%s' % (key, nm, suffix)]
 55 | 
 56 | state_vars = ['log', 'run_time', 'global_batch', 'global_batch_saved', 'global_batch_evald', 'save_counter','boards', 'save_t'] # updated each save
 57 | 
 58 | ##########################################
 59 | def ret_d(player): # return dictionary for input into tensorflow
 60 | 	return {arch.moving_player: player}
 61 | 
 62 | # simulate making moves (i.e., use the tree search)
 63 | # `scopes` controls which models to use (and their ordering of who plays first)
 64 | def run_sim(turn, starting_player, scopes=['main', 'main']):
 65 | 	arch.sess.run(arch.session_backup)
 66 | 	pu.session_backup()
 67 | 
 68 | 	for sim in range(N_SIM):
 69 | 		# backup then make next move
 70 | 		for turn_sim in range(turn, N_TURNS+1):
 71 | 			for player, s in zip([0,1], scopes):
 72 | 				if turn_sim == turn and starting_player == 1 and player == 0: # skip player 0, has already moved
 73 | 					continue
 74 | 
 75 | 				# get valid moves, network policy and value estimates:
 76 | 				valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[s], arch.val[s]], feed_dict=ret_d(player))
 77 | 
 78 | 				# backup visit Q values
 79 | 				if turn_sim != turn:
 80 | 					pu.backup_visit(player, np.array(val, dtype='single'))
 81 | 
 82 | 				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
 83 | 				to_coords = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree)
 84 | 				pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree
 85 | 
 86 | 				arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars)
 87 | 		
 88 | 		############ backup terminal state
 89 | 		winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single')
 90 | 
 91 | 		# update tree with values (outcomes) of each game)
 92 | 		pu.backup_visit(0, winner)
 93 | 		pu.backup_visit(1, -winner)
 94 | 
 95 | 		# return move back to previous node in tree
 96 | 		arch.sess.run(arch.session_restore) # reset gpu game state
 97 | 		pu.session_restore() # reset cpu tree state
 98 | 
 99 | 
100 | ####################################
101 | shared_nms = ['buffer_loc', 'batch_sets_created', 'batch_sets_created_total', 'batch_set', 's_board', 's_winner', 's_tree_probs', 'weights_changed', 'buffer_lock', 'weights_lock', 'save_nm', 'new_model', 'weights', 'weights_eval',\
102 | 	'eval_games_won', 'eval_batch_sets_played', 'eval_stats_lock', 'scope_next', 'eval_batch_sets_main_first']
103 | # ^ update sv() to handle shared variables
104 | 
105 | def init(i_buffer_loc, i_batch_sets_created, i_batch_sets_created_total, i_batch_set, i_s_board, i_s_winner, i_s_tree_probs, i_weights_changed, i_buffer_lock, i_weights_lock, i_save_nm, i_new_model, i_weights, i_weights_eval, i_eval_games_won, i_eval_batch_sets_played, i_eval_stats_lock, i_scope_next, i_eval_batch_sets_main_first):
106 | 	for nm in shared_nms:
107 | 		exec('global ' + nm)
108 | 		exec('%s = i_%s' % (nm, nm))
109 | 
110 | #####################################################################################################################
111 | def worker_save_shapes(i):
112 | 	#### restore
113 | 	save_d = np.load(sdir + save_nm, allow_pickle=True).item()
114 | 
115 | 	for key in save_vars + state_vars + training_ex_vars:
116 | 		if (key == 'save_nm') or (key in shared_nms):
117 | 			continue
118 | 		exec('%s = save_d["%s"]' % (key,key))
119 | 
120 | 	############# init / load model
121 | 	DEVICE = '/gpu:%i' % i
122 | 	arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM,
123 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA)
124 | 
125 | 
126 | 	weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='main')
127 | 	weights_l = []
128 | 	for k in range(len(weights)):
129 | 		weights_l.append(tuple(weights[k].shape.as_list()))
130 | 	
131 | 	return weights_l
132 | 
133 | # worker: sets weights from shared variables if they've been updated by the master worker
134 | def set_weights():
135 | 	if WORKER_ID == MASTER_WORKER: # return if we are the master worker
136 | 		return False
137 | 	
138 | 	with weights_lock:
139 | 		if weights_changed.value == 0: # weights haven't been changed
140 | 			return False
141 | 
142 | 		for i in range(len(weights_current)):
143 | 			# set `main` model copy from shared weights
144 | 			w = np.frombuffer(weights[i].get_obj(), 'float16')
145 | 			w = w.reshape(tuple(weights_current[i].shape.as_list()))
146 | 			weights_current[i].load(w)
147 | 			
148 | 			# set `eval` model copy from shared weights
149 | 			w = np.frombuffer(weights_eval[i].get_obj(), 'float16')
150 | 			w = w.reshape(tuple(weights_eval_current[i].shape.as_list()))
151 | 			weights_eval_current[i].load(w)
152 | 
153 | 		weights_changed.value = 0
154 | 	return True
155 | 
156 | # master: set shared variables to values loaded from restore file
157 | #         (the values were from the checkpoint into the tensorflow variables)
158 | #	  (this is only done once ever -- once model training is started for 1st time)
159 | def set_all_shared_to_loaded(): 	
160 | 	assert WORKER_ID == MASTER_WORKER # only the master worker should do this
161 | 	with weights_lock:
162 | 		weights_current_vals = arch.sess.run(weights_current) # `main` from tf
163 | 		weights_eval_current_vals = arch.sess.run(weights_eval_current)
164 | 
165 | 		for i in range(len(weights_current)):
166 | 			# set `main` shared variables = `main` from tf
167 | 			w = np.frombuffer(weights[i].get_obj(), 'float16')
168 | 			w[:] = weights_current_vals[i].ravel()
169 | 			
170 | 			# set `eval` shared variables = `main` from tf
171 | 			w = np.frombuffer(weights_eval[i].get_obj(), 'float16')
172 | 			w[:] = weights_eval_current_vals[i].ravel()
173 | 
174 | # master: set shared variables `main` and `eval` (and tf vars) from current tensorflow copy of eval32
175 | def set_all_to_eval32_and_get():
176 | 	assert WORKER_ID == MASTER_WORKER # only the master worker should do this
177 | 	with weights_lock:
178 | 		weights_eval32_current_vals = arch.sess.run(weights_eval32_current) # `eval32` from tf
179 | 
180 | 		for i in range(len(weights_current)):
181 | 			# set `main` shared variables = `eval32` from tf
182 | 			w = np.frombuffer(weights[i].get_obj(), 'float16')
183 | 			w[:] = weights_eval32_current_vals[i].ravel()
184 | 			
185 | 			# set `eval` shared variables = `eval32` from tf
186 | 			w = np.frombuffer(weights_eval[i].get_obj(), 'float16')
187 | 			w[:] = weights_eval32_current_vals[i].ravel()
188 | 
189 | 			# update tf copy
190 | 			weights_current[i].load(weights_eval32_current_vals[i]) # `main`
191 | 			weights_eval_current[i].load(weights_eval32_current_vals[i]) # `eval`
192 | 		
193 | 		weights_changed.value = 1
194 | 
195 | # master: update `eval` to values from backprop (current `eval32` tf weights)
196 | def set_eval16_to_eval32_start_eval():
197 | 	assert WORKER_ID == MASTER_WORKER
198 | 	with weights_lock and eval_stats_lock:
199 | 		weights_eval32_current_vals = arch.sess.run(weights_eval32_current) # `eval32` from tf
200 | 
201 | 		for i in range(len(weights_current)):
202 | 			# set `eval` shared variables = `eval32` from tf
203 | 			w = np.frombuffer(weights_eval[i].get_obj(), 'float16')
204 | 			w[:] = weights_eval32_current_vals[i].ravel()
205 | 			
206 | 			# update tensorflow `eval` model = `eval32`
207 | 			weights_eval_current[i].load(weights_eval32_current_vals[i])
208 | 
209 | 		weights_changed.value = 1
210 | 		eval_games_won.value = 0
211 | 		eval_batch_sets_played.value = 0
212 | 		scope_next.value = 0
213 | 		eval_batch_sets_main_first.value = 0
214 | 
215 | 
216 | def print_eval_stats():
217 | 	p_val = scipy.stats.binom_test(eval_games_won.value, eval_batch_sets_played.value*gv.BATCH_SZ, alternative='greater') 
218 | 	model_outperforms = p_val < .05
219 | 	perc = 100*np.single(eval_games_won.value)/(eval_batch_sets_played.value * gv.BATCH_SZ)
220 | 	pstr = 'eval wins %i' % eval_games_won.value
221 | 	pstr += ' sets played %i' % eval_batch_sets_played.value
222 | 	pstr += ' percent %1.2f' % perc
223 | 	pstr += ' p %1.3f' % p_val
224 | 	pstr += ' pass %i' % model_outperforms
225 | 	print pstr
226 | 	return model_outperforms, perc
227 | 
228 | # plays 2*N_GATE_BATCH_SETS rounds of batches, ensuring ordering of eval and main are balanced
229 | # will also terminate at end of current batch eval if N_GATE_BATCH_SETS+1 have been played
230 | # scope_next: alternates between 0,1 at start of each new batch set. to order which player goes first
231 | def eval_model():
232 | 	set_weights()
233 | 
234 | 	while True:
235 | 		arch.sess.run(arch.init_state)
236 | 		pu.init_tree()
237 | 		turn_start_t = time.time()
238 | 		
239 | 		### choose order
240 | 		with eval_stats_lock:
241 | 			if scope_next.value == 0:
242 | 				scopes = ['main', 'eval']
243 | 			else:
244 | 				scopes = ['eval', 'main']
245 | 
246 | 			scope_next.value = 1 - scope_next.value
247 | 		
248 | 		scopes = np.asarray(scopes)
249 | 		
250 | 		for turn in range(N_TURNS):
251 | 			### make move
252 | 			for player, s in zip([0,1], scopes):
253 | 				if eval_batch_sets_played.value >= (2*N_GATE_BATCH_SETS):
254 | 					return # finished
255 | 							
256 | 				run_sim(turn, player, scopes=scopes)
257 | 
258 | 				valid_mv_map, pol = arch.sess.run([arch.valid_mv_map, arch.pol[s]], feed_dict = ret_d(player)) # generate batch and valid moves
259 | 				
260 | 				#########
261 | 				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
262 | 				visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited
263 | 				
264 | 				to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 
265 | 					arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts
266 | 
267 | 				pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree
268 | 
269 | 			pu.prune_tree(0)
270 | 			
271 | 			if (turn+1) % 2 == 0:
272 | 				print 'eval finished turn %i (%i sec) GPU %i eval_batch_sets_played %i' % (turn, time.time() - turn_start_t, WORKER_ID, eval_batch_sets_played.value)
273 | 				
274 | 
275 | 		with eval_stats_lock:
276 | 			# do not add any more stats for these conditions
277 | 			if eval_batch_sets_main_first.value >= N_GATE_BATCH_SETS and scopes[0] == 'main':
278 | 				continue
279 | 			if (eval_batch_sets_played.value - eval_batch_sets_main_first.value) >= N_GATE_BATCH_SETS and scopes[0] == 'eval':
280 | 				continue
281 | 
282 | 			eval_player = np.nonzero(scopes == 'eval')[0][0]
283 | 			res = arch.sess.run(arch.winner, feed_dict={arch.moving_player: eval_player})
284 | 			print 'ties', (res == 0).sum(), 'wins', (res == 1).sum(), 'rate %2.3f' % ((res == 1).sum()/np.single(gv.BATCH_SZ)), 'opp wins', (res == -1).sum(), scopes
285 | 			eval_games_won.value += np.int((res == 1).sum())
286 | 			eval_batch_sets_played.value += 1
287 | 			eval_batch_sets_main_first.value += int(scopes[0] == 'main')
288 | 			print_eval_stats()
289 | 
290 | 
291 | def worker(i_WORKER_ID):
292 | 	global WORKER_ID, weights_current, weights_eval_current, weights_eval32_current, val_mean_sq_err, pol_cross_entrop_err, val_pearsonr
293 | 	global board, winner, tree_probs, save_d, bp_eval_nodes, t_start, run_time, save_nm
294 | 	WORKER_ID = i_WORKER_ID
295 | 
296 | 	err_denom = 0; val_pearsonr = 0
297 | 	val_mean_sq_err = 0; pol_cross_entrop_err = 0; 
298 | 	t_start = datetime.now()
299 | 	run_time = datetime.now() - datetime.now()
300 | 
301 | 	#### restore
302 | 	save_d = np.load(sdir + save_nm, allow_pickle=True).item()
303 | 
304 | 	for key in save_vars + state_vars + training_ex_vars:
305 | 		if (key == 'save_nm') or (key in shared_nms):
306 | 			continue
307 | 		exec('global ' + key)
308 | 		exec('%s = save_d["%s"]' % (key,key))
309 | 
310 | 	EPS_ORIG = EPS
311 | 	#EPS = 2e-3 ###################################################### < overrides previous backprop step sizes
312 | 	
313 | 	############# init / load model
314 | 	DEVICE = '/gpu:%i' % WORKER_ID
315 | 	arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM,
316 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA)
317 | 
318 | 	bp_eval_nodes = [arch.train_step, arch.val_mean_sq_err, arch.pol_cross_entrop_err, arch.val_pearsonr]
319 | 	
320 | 	# ops for trainable weights
321 | 	weights_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='main')
322 | 	weights_eval_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='eval/')
323 | 	weights_eval32_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='eval32')
324 | 
325 | 	if new_model == False:
326 | 		print 'restore nm %s' % save_nm
327 | 		arch.saver.restore(arch.sess, sdir + save_nm)
328 | 		if WORKER_ID == MASTER_WORKER:
329 | 			set_all_shared_to_loaded()
330 | 	else: #### sync model weights
331 | 		if WORKER_ID == MASTER_WORKER:
332 | 			set_all_to_eval32_and_get()
333 | 		else:
334 | 			while set_weights() == False: # wait for weights to be set
335 | 				continue
336 | 	###### shared variables
337 | 	board = np.frombuffer(s_board.get_obj(), 'float16').reshape((BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels))
338 | 	winner = np.frombuffer(s_winner.get_obj(), 'int8').reshape((N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ))
339 | 	tree_probs = np.frombuffer(s_tree_probs.get_obj(), 'float32').reshape((BUFFER_SZ, gv.map_szt))
340 | 	
341 | 	######## local variables
342 | 	# BUFFER_SZ = N_BATCH_SETS * N_TURNS * 2 * gv.BATCH_SZ
343 | 	L_BUFFER_SZ = N_TURNS * 2 * gv.BATCH_SZ
344 | 	board_local = np.zeros((L_BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels), dtype='float16')
345 | 	winner_local = np.zeros((N_TURNS, 2, gv.BATCH_SZ), dtype='int8')
346 | 	tree_probs_local = np.zeros((L_BUFFER_SZ, gv.map_szt), dtype='float32')
347 | 
348 | 	if EPS_ORIG != EPS:
349 | 		#save_nm += 'EPS_%2.4f.npy' % EPS 
350 | 		save_d['EPS'] = EPS
351 | 		print 'saving to', save_nm
352 | 
353 | 	### sound
354 | 	if WORKER_ID == MASTER_WORKER:
355 | 		pygame.init()
356 | 		pygame.mixer.music.load('/home/tapa/gtr-nylon22.mp3')
357 | 
358 | 	######
359 | 	while True:
360 | 		#### generate training batches with `main` model
361 | 		arch.sess.run(arch.init_state)
362 | 		pu.init_tree()
363 | 		turn_start_t = time.time()
364 | 		buffer_loc_local = 0
365 | 		for turn in range(N_TURNS):
366 | 			### make move
367 | 			for player in [0,1]:
368 | 				set_weights()
369 | 				run_sim(turn, player) # using `main` model
370 | 
371 | 				inds = buffer_loc_local + np.arange(gv.BATCH_SZ) # inds to save training vars at
372 | 				board_local[inds], valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol['main']], feed_dict = ret_d(player)) # generate batch and valid moves
373 | 
374 | 				#########
375 | 				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
376 | 				visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited
377 | 				
378 | 				tree_probs_local[inds] = visit_count_map / visit_count_map.sum(1)[:,np.newaxis] 
379 | 
380 | 				to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 
381 | 					arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts
382 | 
383 | 				pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree
384 | 
385 | 				###############
386 | 				
387 | 				buffer_loc_local += gv.BATCH_SZ
388 | 
389 | 			pu.prune_tree(0)
390 | 			
391 | 			if (turn+1) % 2 == 0:
392 | 				print 'finished turn %i (%i sec) GPU %i batch_sets_created %i (total %i)' % (turn, time.time() - turn_start_t, WORKER_ID, batch_sets_created.value, batch_sets_created_total.value)
393 | 		
394 | 		##### create prob maps
395 | 		for player in [0,1]:
396 | 			winner_local[:, player] = arch.sess.run(arch.winner, feed_dict={arch.moving_player: player})
397 | 		
398 | 		#### set shared buffers with training variables we just generated from self-play
399 | 		with buffer_lock:
400 | 			board[buffer_loc.value:buffer_loc.value + buffer_loc_local] = board_local
401 | 			tree_probs[buffer_loc.value:buffer_loc.value + buffer_loc_local] = tree_probs_local
402 | 			winner[batch_set.value] = winner_local
403 | 			
404 | 			buffer_loc.value += buffer_loc_local
405 | 			batch_sets_created.value += 1
406 | 			batch_sets_created_total.value += 1
407 | 			batch_set.value += 1
408 | 			
409 | 			# save checkpoint
410 | 			if buffer_loc.value >= BUFFER_SZ or batch_set.value >= N_BATCH_SETS_TOTAL:
411 | 				buffer_loc.value = 0
412 | 				batch_set.value = 0
413 | 			
414 | 				# save batch only
415 | 				batch_d = {}
416 | 				for key in ['tree_probs', 'winner', 'board']:
417 | 					exec('batch_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))' % (key, key))
418 | 				batch_save_nm = sdir + save_nm + '_batches' + str(batch_sets_created_total.value)
419 | 				np.save(batch_save_nm, batch_d)
420 | 				print 'saved', batch_save_nm
421 | 				batch_d = {}
422 | 
423 | 
424 | 		################ train/eval/test
425 | 		if WORKER_ID == MASTER_WORKER and batch_sets_created.value >= N_BATCH_SETS_BLOCK and batch_sets_created_total.value >= N_BATCH_SETS_MIN:
426 | 			########### train
427 | 			with buffer_lock:
428 | 				if batch_sets_created_total.value < (N_BATCH_SETS_MIN + N_BATCH_SETS_BLOCK): # don't overtrain on the initial set
429 | 					batch_sets_created.value = N_BATCH_SETS_BLOCK
430 | 
431 | 				if batch_sets_created.value >= N_BATCH_SETS_TOTAL: # if for some reason master worker gets delayed
432 | 					batch_sets_created.value = N_BATCH_SETS_BLOCK
433 | 
434 | 				board_c = np.array(board, dtype='single')
435 | 				winner_rc = np.array(winner.ravel(), dtype='single')
436 | 				
437 | 				valid_entries = np.prod(np.isnan(tree_probs) == False, 1) * np.nansum(tree_probs, 1) # remove examples with nans or no probabilties
438 | 				inds_valid = np.nonzero(valid_entries)[0]
439 | 				print len(inds_valid), 'out of', BUFFER_SZ, 'valid training examples'
440 | 
441 | 				for rep in range(N_REP_TRAIN):
442 | 					random.shuffle(inds_valid)
443 | 					for batch in range(N_TURNS * batch_sets_created.value):
444 | 						inds = inds_valid[batch*gv.BATCH_SZ + np.arange(gv.BATCH_SZ)]
445 | 
446 | 						board2, tree_probs2 = pu.rotate_reflect_imgs(board_c[inds], tree_probs[inds]) # rotate and reflect board randomly
447 | 
448 | 						train_dict = {arch.imgs32: board2,
449 | 								arch.pol_target: tree_probs2,
450 | 								arch.val_target: winner_rc[inds]}
451 | 
452 | 						val_mean_sq_err_tmp, pol_cross_entrop_err_tmp, val_pearsonr_tmp = \
453 | 														arch.sess.run(bp_eval_nodes, feed_dict=train_dict)[1:]
454 | 
455 | 						# update logs
456 | 						val_mean_sq_err += val_mean_sq_err_tmp
457 | 						pol_cross_entrop_err += pol_cross_entrop_err_tmp
458 | 						val_pearsonr += val_pearsonr_tmp
459 | 						global_batch += 1
460 | 						err_denom += 1
461 | 
462 | 				batch_sets_created.value = 0
463 | 		
464 | 			############### `eval` against prior version of self (`main`)
465 | 			set_eval16_to_eval32_start_eval() # update `eval` tf and shared copies to follow backprop (`eval32`)
466 | 			eval_model() # run match(es)
467 | 			with eval_stats_lock:
468 | 				print '-------------------'
469 | 				model_outperforms, self_eval_perc = print_eval_stats()
470 | 				print '------------------'
471 | 			if model_outperforms: # update `eval` AND `main` both tf and shared copies to follow backprop
472 | 				set_all_to_eval32_and_get()
473 | 
474 | 			##### network evaluation against random player and GNU Go
475 | 			global_batch_evald = global_batch
476 | 			global_batch_saved = global_batch
477 | 			t_eval = time.time()
478 | 			print 'evaluating nn'
479 | 
480 | 			d = ret_d(0)
481 | 			
482 | 			################## monitor training progress:
483 | 			# test `eval` against GNU Go and a player that makes only random moves
484 | 			for nm, N_GMS_L in zip(['nn','tree'], [[N_EVAL_NN_GNU_GMS, N_EVAL_NN_GMS], [N_EVAL_TREE_GMS, N_EVAL_TREE_GNU_GMS]]):
485 | 				for gnu, N_GMS in zip([True,False], N_GMS_L):
486 | 					if N_GMS == 0:
487 | 						continue
488 | 					key = '%s%s' % (nm, '' + gnu*'_gnu')
489 | 					t_key = time.time()
490 | 					boards[key] = np.zeros((N_TURNS,) + gv.INPUTS_SHAPE[:-1], dtype='int8')
491 | 					n_mvs = 0.; win_eval = 0.; score_eval = 0.; n_captures_eval = np.zeros(2, dtype='single')
492 | 					for gm in range(N_GMS):
493 | 						arch.sess.run(arch.init_state)
494 | 						pu.init_tree()
495 | 						# init gnu state
496 | 						if gnu:
497 | 							gt.init_board(arch.sess.run(arch.gm_vars['board']))
498 | 
499 | 						for turn in range(N_TURNS):
500 | 							board_tmp = arch.sess.run(arch.gm_vars['board'])
501 | 						
502 | 							#### search / make move
503 | 							if nm == 'tree':
504 | 								run_sim(turn)
505 | 								assert False
506 | 							else:
507 | 								# prob choose first move, deterministically choose remainder
508 | 								if turn == 0:
509 | 									to_coords = arch.sess.run([arch.nn_prob_to_coords_valid_mvs['eval'], arch.nn_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0]
510 | 								else:
511 | 									to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs['eval'], arch.nn_max_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0]
512 | 
513 | 
514 | 							board_tmp2 = arch.sess.run(arch.gm_vars['board'])
515 | 							n_mvs += board_tmp.sum() - board_tmp2.sum()
516 | 
517 | 							# move opposing player
518 | 							if gnu:
519 | 								gt.move_nn(to_coords) 
520 | 
521 | 								# mv gnugo
522 | 								ai_to_coords = gt.move_ai()
523 | 								arch.sess.run(arch.imgs, feed_dict={arch.moving_player: 1})
524 | 								arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: 1, arch.nn_max_to_coords['eval']: ai_to_coords})
525 | 							else:
526 | 								arch.sess.run(arch.imgs, feed_dict = ret_d(1))
527 | 								arch.sess.run(arch.move_random_ai, feed_dict = ret_d(1))
528 | 		
529 | 							boards[key][turn] = arch.sess.run(arch.gm_vars['board'])
530 | 
531 | 							if nm == 'tree':
532 | 								pu.prune_tree(0)
533 | 							# turn
534 | 
535 | 						# save stats
536 | 						win_tmp, score_tmp, n_captures_tmp = arch.sess.run([arch.winner, arch.score, arch.n_captures], feed_dict={arch.moving_player: 0})
537 | 						scores[key] = copy.deepcopy(score_tmp)
538 | 
539 | 						win_eval += win_tmp.mean()
540 | 						score_eval += score_tmp.mean()
541 | 						n_captures_eval += n_captures_tmp.mean(1)
542 | 						# gm
543 | 					
544 | 					# log
545 | 					log['win_' + key].append( (win_eval / (2*np.single(N_GMS))) + .5 )
546 | 					log['n_captures_' + key].append( n_captures_eval[0] / np.single(N_GMS) )
547 | 					log['n_captures_opp_' + key].append( n_captures_eval[1] / np.single(N_GMS) )
548 | 					log['score_' + key].append( score_eval / np.single(N_GMS) )
549 | 					log['n_mvs_' + key].append( n_mvs / np.single(N_GMS * N_TURNS * gv.BATCH_SZ) )
550 | 
551 | 					log['boards_' + key].append( boards[key][-1] )
552 | 					print key, 'eval time', time.time() - t_key
553 | 					# gnu
554 | 				# nm
555 | 			log['eval_batch'].append( global_batch )
556 | 			print 'eval time', time.time() - t_eval
557 | 			# eval
558 | 			####################### end network evaluation
559 | 
560 | 			pol, pol_pre = arch.sess.run([arch.pol['eval'], arch.pol_pre['eval']], feed_dict={arch.moving_player: 0})
561 | 
562 | 			##### log
563 | 			log['val_mean_sq_err'].append ( val_mean_sq_err / err_denom )
564 | 			log['pol_cross_entrop'].append( pol_cross_entrop_err / err_denom )
565 | 			log['val_pearsonr'].append( val_pearsonr / err_denom )
566 | 			log['opt_batch'].append( global_batch )
567 | 
568 | 			log['pol_max_pre'].append( np.median(pol_pre.max(1)) )
569 | 			log['pol_max'].append( np.median(pol.max(1)) )
570 | 
571 | 			log['self_eval_win_rate'].append( np.single(eval_games_won.value) / (eval_batch_sets_played.value*gv.BATCH_SZ) )
572 | 			log['model_promoted'].append( model_outperforms )
573 | 
574 | 			log['self_eval_perc'].append( self_eval_perc )
575 | 
576 | 			val_mean_sq_err = 0
577 | 			pol_cross_entrop_err = 0
578 | 			val_pearsonr = 0
579 | 			err_denom = 0
580 | 		
581 | 			########## print
582 | 			run_time += datetime.now() - t_start
583 | 
584 | 			if (save_counter % 20) == 0:
585 | 				print
586 | 				print Style.BRIGHT + Fore.GREEN + save_nm, Fore.WHITE + 'EPS', EPS, 'start', str(start_time).split('.')[0], 'run time', \
587 | 						str(run_time).split('.')[0]
588 | 				print
589 | 			save_counter += 1
590 | 
591 | 			print_str = '%i' % global_batch
592 | 			for key in print_logs:
593 | 				print_str += ' %s ' % key
594 | 				if isinstance(log[key], int):
595 | 					print_str += str(log[key][-1])
596 | 				else:
597 | 					print_str += '%1.4f' % log[key][-1]
598 | 
599 | 			print_str += ' %4.1f' % (datetime.now() - t_start).total_seconds()
600 | 			print print_str
601 | 			
602 | 			t_start = datetime.now()
603 | 
604 | 			# play sound
605 | 			if os.path.isfile('/home/tapa/play_sound.txt'):
606 | 				pygame.mixer.music.play()
607 | 		
608 | 		############# save
609 | 		if WORKER_ID == MASTER_WORKER:
610 | 			with buffer_lock:
611 | 				# update state vars
612 | 				#shared_nms = ['buffer_loc', 'batch_sets_created', 'batch_set', 's_board', 's_winner', 's_tree_probs', 'weights_changed', 'buffer_lock', 'weights_lock', 'save_nm', 'new_model', 'weights']
613 | 				for key in state_vars + training_ex_vars:
614 | 					if key in ['buffer_loc', 'batch_sets_created', 'batch_sets_created_total', 'batch_set', 'eval_games_won', 'eval_batch_sets_played']:
615 | 						exec('save_d["%s"] = %s.value' % (key, key))
616 | 					elif key in ['tree_probs', 'winner', 'board']:
617 | 						exec('save_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))' % (key, key))
618 | 					else:
619 | 						exec('save_d["%s"] = %s' % (key, key))
620 | 			
621 | 			save_nms = [save_nm]
622 | 			if (datetime.now() - save_t).seconds > CHKP_FREQ:
623 | 				save_nms += [save_nm + str(datetime.now())]
624 | 				save_t = datetime.now()
625 | 			
626 | 			for nm in save_nms:
627 | 				np.save(sdir + nm, save_d)
628 | 				arch.saver.save(arch.sess, sdir + nm)
629 | 			
630 | 			print sdir + nm, 'saved'
631 | 
632 | 
633 | ####################################################################################################################
634 | 
635 | if save_nm is None:
636 | 	new_model = True # set `eval32` to `main`, and `eval` float16 copies
637 | 
638 | 	##### weightings on individual loss terms:
639 | 	LSQ_LAMBDA = 0
640 | 	LSQ_REG_LAMBDA = 0
641 | 	POL_CROSS_ENTROP_LAMBDA = 1
642 | 	VAL_LAMBDA = .025
643 | 	VALR_LAMBDA = 0
644 | 	L2_LAMBDA = 1e-3 # weight regularization 
645 | 	CPUCT = 1
646 | 	
647 | 	N_REP_TRAIN = 5 # number of times more to backprop over training examples (reflections/rotations)
648 | 	
649 | 	N_BATCH_SETS_BLOCK = 7
650 | 	N_BATCH_SETS_TOTAL = 7*5 # number of batch sets to store in training buffer
651 | 	N_BATCH_SETS_MIN = N_BATCH_SETS_TOTAL
652 | 
653 | 	batch_set = 0
654 | 	batch_sets_created = 0
655 | 	batch_sets_created_total = 0
656 | 	buffer_loc = 0
657 | 
658 | 	GATE_THRESH = .5
659 | 	N_GATE_BATCH_SETS = 1
660 | 
661 | 	##### model parameters
662 | 	N_LAYERS = 5 #10 # number of model layers
663 | 	FILTER_SZS = [3]*N_LAYERS
664 | 	STRIDES = [1]*N_LAYERS
665 | 	F = 128 # number of filters
666 | 	N_FILTERS = [F]*N_LAYERS
667 | 	N_FC1 = 128 # number of units in fully connected layer
668 | 	
669 | 	
670 | 	EPS = 2e-1 # backprop step size
671 | 	MOMENTUM = .9
672 | 
673 | 	N_SIM = 800 # number of simulations at each turn
674 | 	N_TURNS = 32 # number of moves per player per game
675 | 
676 | 	#### training buffers
677 | 	BUFFER_SZ = N_BATCH_SETS_TOTAL * N_TURNS * 2 * gv.BATCH_SZ
678 | 
679 | 	board = np.zeros((BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels),  dtype='float16')
680 | 	winner = np.zeros((N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ), dtype='int8')
681 | 	tree_probs = np.zeros((BUFFER_SZ, gv.map_szt), dtype='float32')
682 | 
683 | 	##### number of batch evaluations for testing model
684 | 	N_EVAL_NN_GMS = 1 # model evaluation for printing
685 | 	N_EVAL_NN_GNU_GMS = 1
686 | 	N_EVAL_TREE_GMS = 0 # model eval
687 | 	N_EVAL_TREE_GNU_GMS = 0
688 | 
689 | 	######### save and checkpoint frequency
690 | 	SAVE_FREQ = N_TURNS
691 | 	EVAL_FREQ = SAVE_FREQ*1
692 | 	CHKP_FREQ = 60*60*10*2
693 | 
694 | 	start_time = datetime.now()
695 | 	save_t = datetime.now()
696 | 
697 | 	save_nm = 'go_%1.4fEPS_%iGMSZ_%iN_SIM_%iN_TURNS_%iN_FILTERS_%iN_LAYERS_%iN_BATCH_SETS_TOTAL_%i_N_BATCH_SET_MIN_%iN_REP_TRN_trainbug.npy' % \
698 | 		(EPS, gv.n_rows, N_SIM, N_TURNS, N_FILTERS[0], N_LAYERS, N_BATCH_SETS_TOTAL, N_BATCH_SETS_MIN, N_REP_TRAIN)
699 | 
700 | 	boards = {}; scores = {} # eval
701 | 	save_d = {}
702 | 	for key in save_vars:
703 | 		exec('save_d["%s"] = %s' % (key,key))
704 | 	save_d['script_nm'] = __file__
705 | 
706 | 	global_batch = 0
707 | 	global_batch_saved = 0
708 | 	global_batch_evald = 0
709 | 	save_counter = 0
710 | 
711 | 	run_time = datetime.now() - datetime.now()
712 | 
713 | 	log = {}
714 | 	for key in logs:
715 | 		log[key] = []
716 | 
717 | 	########## save
718 | 	# update state vars
719 | 	for key in state_vars + training_ex_vars:
720 | 		exec('save_d["%s"] = %s' % (key, key))
721 | 	
722 | 	# save
723 | 	save_nms = [save_nm]
724 | 	if (datetime.now() - save_t).seconds > CHKP_FREQ:
725 | 		save_nms += [save_nm + str(datetime.now())]
726 | 		save_t = datetime.now()
727 | 	
728 | 	for nm in save_nms:
729 | 		np.save(sdir + nm, save_d)
730 | else:
731 | 	new_model = False # prevent `main` from being set to `eval32` at loading
732 | 
733 | 	save_d = np.load(sdir + save_nm, allow_pickle=True).item()
734 | 
735 | 	for key in save_vars + state_vars + training_ex_vars:
736 | 		if key == 'save_nm':
737 | 			continue
738 | 		exec('%s = save_d["%s"]' % (key,key))
739 | 
740 | print save_nm
741 | 
742 | ################### shared memory variables
743 | 
744 | ###### self play from `eval` model used for training `eval32`:
745 | s_board = mp.Array('h', board.ravel()) # shape: (BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels)
746 | s_winner = mp.Array('b', winner.ravel()) # (N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ)
747 | s_tree_probs = mp.Array('f', tree_probs.ravel()) # (BUFFER_SZ, gv.map_szt)
748 | 
749 | # indices, counters, & flags 
750 | buffer_loc = mp.Value('i', buffer_loc) # index into above ^ training vars
751 | weights_changed = mp.Value('i', 0) # 0 = no change, 1 = changed
752 | batch_sets_created = mp.Value('i', batch_sets_created)
753 | batch_sets_created_total = mp.Value('i', batch_sets_created_total)
754 | batch_set = mp.Value('i', batch_set)
755 | 
756 | # evaluation (`eval` vs `main` benchmark testing to see when to update `main` to the current `eval32` backprop weights)
757 | scope_next = mp.Value('i', 0) # alternates between 0,1 during model evaluation to dictate if `eval` or `main` starts 1st
758 | eval_games_won = mp.Value('i', 0)
759 | eval_batch_sets_played = mp.Value('i', 2*N_GATE_BATCH_SETS)
760 | eval_batch_sets_main_first = mp.Value('i', 0)
761 | 
762 | buffer_lock = mp.Lock()
763 | weights_lock = mp.Lock()
764 | eval_stats_lock = mp.Lock()
765 | 
766 | weights = []; weights_eval = []
767 | 
768 | ###### launch pool
769 | cmd = 'p = mp.Pool(initializer=init, initargs=('
770 | for nm in shared_nms:
771 | 	cmd += nm + ', '
772 | cmd += '))'
773 | 
774 | ### get weight shapes
775 | exec(cmd)
776 | weight_shapes = p.map(worker_save_shapes, [0])[0]
777 | p.close()
778 | 
779 | for s in weight_shapes:
780 | 	weights.append( mp.Array('h', np.zeros(np.prod(s), dtype='float16')) )
781 | 	weights_eval.append( mp.Array('h', np.zeros(np.prod(s), dtype='float16')) )
782 | 
783 | ######## run
784 | exec(cmd)
785 | p.map(worker, GPU_LIST)
786 | 
787 | #### dbg
788 | '''cmd = 'init('
789 | for nm in shared_nms:
790 | 	cmd += nm + ', '
791 | cmd += ')'
792 | exec(cmd)
793 | worker(0)
794 | '''
795 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #export CC=gcc-6
 2 | #export CXX=g++-6
 3 | rm *.o
 4 | rm *.so
 5 | export TF_INC=TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 6 | export TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 7 | export TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
 8 | nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \
 9 | 	--expt-relaxed-constexpr -arch=sm_52 -use_fast_math #-g -G -lineinfo
10 | nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \
11 | 	--expt-relaxed-constexpr -arch=sm_75 -use_fast_math #-g -G -lineinfo
12 | 
13 | #sm_52
14 | #-gencode=arch=compute_61,code=sm_61 
15 | g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \
16 |   cuda_op_kernel_52.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework \
17 |   -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 #-g #-O3
18 | g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \
19 |   cuda_op_kernel_75.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework \
20 |   -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 #-g #-O3
21 | 
22 | 
23 | cd py_util
24 | echo
25 | echo ".................."
26 | echo "building py_util" 
27 | ./build.sh
28 | cd ..
29 | 


--------------------------------------------------------------------------------
/build_centos.sh:
--------------------------------------------------------------------------------
 1 | CUDA_PATH=/usr/local/cuda/lib64
 2 | 
 3 | #export CC=gcc-6
 4 | #export CXX=g++-6
 5 | rm *.o
 6 | rm *.so
 7 | export TF_INC=TF_INC=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 8 | export TF_INC=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 9 | export TF_LIB=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
10 | #nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \
11 | #	--expt-relaxed-constexpr -arch=sm_52 -use_fast_math #-g -G -lineinfo
12 | #nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \
13 | #	--expt-relaxed-constexpr -arch=sm_75 -use_fast_math #-g -G -lineinfo
14 | 
15 | #sm_52
16 | #-gencode=arch=compute_61,code=sm_61 
17 | TF_CFLAGS=( $(python2 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
18 | TF_LFLAGS=( $(python2 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
19 | 
20 | nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc \
21 |   ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
22 | nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc \
23 |   ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
24 |  
25 | 
26 | g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \
27 |   cuda_op_kernel_52.cu.o ${TF_CFLAGS[@]} -fPIC -L $CUDA_PATH -lcudart ${TF_LFLAGS[@]}
28 | 	
29 | g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \
30 |   cuda_op_kernel_75.cu.o ${TF_CFLAGS[@]} -fPIC -L $CUDA_PATH -lcudart ${TF_LFLAGS[@]}
31 | 
32 | #g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \
33 | #  cuda_op_kernel_52.cu.o ${TF_FLAGS[@]} -fPIC -lcudart -L$TF_LIB  \
34 | #  -L/usr/local/cuda/lib64 -I $TF_INC -I$TF_INC/external/nsync/public -D_GLIBCXX_USE_CXX11_ABI=1 ${TF_FLAGS[@]} #-g #-O3
35 | #g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \
36 | #  cuda_op_kernel_75.cu.o ${TF_FLATS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB  \
37 | #  -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=1 ${TF_FLAGS[@]} -D GOOGLE_CUDA=1 #-g #-O3
38 | 
39 | #g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \
40 | #  cuda_op_kernel_52.cu.o ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB  \
41 | #  -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 ${TF_FLAGS[@]} #-g #-O3
42 | #g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \
43 | #  cuda_op_kernel_75.cu.o ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB  \
44 | #  -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 ${TF_FLAGS[@]} #-g #-O3
45 | 
46 | 
47 | 
48 | cd py_util
49 | echo
50 | echo ".................."
51 | echo "building py_util" 
52 | ./build_centos.sh
53 | cd ..
54 | 


--------------------------------------------------------------------------------
/cuda_includes.h:
--------------------------------------------------------------------------------
 1 | #define CHECK_INIT { if(!op_initialized) init_op_launcher(); }
 2 | #define REQ_INIT ASSERT(op_initialized, "op not initialized")
 3 | 
 4 | #define CHECK_CUDA_ERR {err = cudaGetLastError();if(err != cudaSuccess){\
 5 | 		printf("CUDA error: %s, %s, %i\n",cudaGetErrorString(err),__FILE__,__LINE__); PANIC("");}}
 6 | #define CHECK_CUDA_ERR_R {err = cudaGetLastError();if(err != cudaSuccess){\
 7 | 		printf("CUDA error: %s, %s, %i\n",cudaGetErrorString(err),__FILE__,__LINE__); PANIC("");}}
 8 | #define MALLOC_ERR_CHECK {if (err != cudaSuccess){printf("malloc err line: %i\n",__LINE__);  PANIC("");}}
 9 | #define MALLOC_ERR_CHECK_R {if (err != cudaSuccess){printf("malloc err line: %i\n",__LINE__); PANIC("");}}
10 | 
11 | #ifdef CUDA_DEBUG
12 | 	#define DASSERT(A) assert(A);
13 | #else
14 | 	#define DASSERT(A) 
15 | #endif
16 | 
17 | #define BMEM(A, B, SZ) err = cudaMemcpy(A, B, SZ*sizeof(A[0]), cudaMemcpyDeviceToDevice);  MALLOC_ERR_CHECK
18 | #define RMEM(A, B, SZ) err = cudaMemcpy(B, A, SZ*sizeof(A[0]), cudaMemcpyDeviceToDevice);  MALLOC_ERR_CHECK
19 | 
20 | char op_initialized; 
21 | 
22 | curandState_t* rand_states;
23 | 
24 | /////////////// game state
25 | // [X]2 indicates backup variables used to restore session
26 | 
27 | char *board, *board2, board_cpu[BATCH_MAP_SZ];
28 | 
29 | // previous states to prevent ko
30 | char *board_prev, *board_pprev;
31 | char *board_prev2, *board_pprev2;
32 | 
33 | int16_t * n_captures, *n_captures2; // [N_PLAYERS, BATCH_SZ]
34 | 
35 | int16_t * ai_to_coord; // [BATCH_SZ], output of move_random_ai, input to move_unit
36 | 
37 | char * valid_mv_map_internal; // [BATCH_SZ, MAP_SZ], output of create_batch, input to move_unit
38 | char * moved_internal; // [BATCH_SZ] used in move_random_ai, req. input to move_unit_launcher, results not used
39 | 
40 | // 1 or -1:
41 | #define GET_PLAYER_VAL DASSERT((*moving_player == 0) || (*moving_player == 1)); char player_val = ((*moving_player == 0) * 2 )- 1;
42 | 
43 | #define CHK_VALID_MAP_COORD(COORD) DASSERT((COORD) >= 0 && (COORD) < MAP_SZ)
44 | 
45 | // count valid mvs and store n_valid_mvs
46 | #define COUNT_VALID \
47 | 	int n_valid_mvs = 0;\
48 | 	int16_t valid_mv_inds[MAP_SZ];\
49 | 	MAP_LOOP{\
50 | 		if(valid_mv_map_internal[gm_offset + loc]){\
51 | 			valid_mv_inds[n_valid_mvs] = loc;\
52 | 			n_valid_mvs ++;\
53 | 		}\
54 | 	}\
55 | 	if(!n_valid_mvs){\
56 | 		to_coord[gm] = -1;\
57 | 		return;\
58 | 	} // no valid mvs
59 | 
60 | 


--------------------------------------------------------------------------------
/cuda_op_kernel.cc:
--------------------------------------------------------------------------------
  1 | #include "tensorflow/core/framework/op.h"
  2 | #include "tensorflow/core/framework/op_kernel.h"
  3 | #include "tensorflow/core/framework/shape_inference.h"
  4 | #include "tensorflow/core/framework/tensor.h"
  5 | 
  6 | #include "includes.h"
  7 | using namespace tensorflow;
  8 | 
  9 | #include "vars.cc" // return / set vars
 10 | 
 11 | // return coordinate from probability map, proportionate to probabiltiies
 12 | REGISTER_OP("ProbToCoord")
 13 | 	.Input("prob_map: float16") // [BATCH_SZ, MAP_SZ]
 14 | 	.Output("to_coord: int16");
 15 | 
 16 | // return coordinate from probability map, proportionate to probabiltiies, restricted to only valid mvs
 17 | REGISTER_OP("ProbToCoordValidMvs")
 18 | 	.Input("prob_map: float16")
 19 | 	.Output("to_coord: int16");
 20 | 
 21 | // return max coordinate from probability map, restricted to only valid mvs
 22 | REGISTER_OP("MaxProbToCoordValidMvs")
 23 | 	.Input("prob_map: float16")
 24 | 	.Output("to_coord: int16");
 25 | 
 26 | #define CREATE_BATCH_SHAPES tensorflow::TensorShape imgs_shape, valid_mv_map_shape;\
 27 | 		imgs_shape.AddDim(BATCH_SZ);\
 28 | 		imgs_shape.AddDim(MAP_SZ_X);\
 29 | 		imgs_shape.AddDim(MAP_SZ_Y);\
 30 | 		imgs_shape.AddDim(N_INPUT_CHANNELS);\
 31 | 		\
 32 | 		valid_mv_map_shape.AddDim(BATCH_SZ);\
 33 | 		valid_mv_map_shape.AddDim(MAP_SZ_X);\
 34 | 		valid_mv_map_shape.AddDim(MAP_SZ_Y);
 35 | 	
 36 | REGISTER_OP("CreateBatch")
 37 | 	.Input("moving_player: int8") // [1]
 38 | 	.Output("imgs: float16")
 39 | 	.Output("valid_mv_map: int8")
 40 | 
 41 | 	.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 42 | 		CREATE_BATCH_SHAPES
 43 | 		tensorflow::shape_inference::ShapeHandle imgs_shape_h, valid_mv_map_shape_h;
 44 | 		
 45 | 		c->MakeShapeFromTensorShape(imgs_shape, &imgs_shape_h);
 46 | 		c->MakeShapeFromTensorShape(valid_mv_map_shape, &valid_mv_map_shape_h);
 47 | 
 48 | 		c->set_output(0, imgs_shape_h);
 49 | 		c->set_output(1, valid_mv_map_shape_h);
 50 | 
 51 | 		return Status::OK();
 52 | });
 53 | 
 54 | #define RETURN_WINNER_SHAPES tensorflow::TensorShape winner_shape, score_shape, n_captures_shape;\
 55 | 		winner_shape.AddDim(BATCH_SZ);\
 56 | 		score_shape.AddDim(BATCH_SZ);\
 57 | 		n_captures_shape.AddDim(N_PLAYERS);\
 58 | 		n_captures_shape.AddDim(BATCH_SZ);
 59 | 
 60 | REGISTER_OP("ReturnWinner")
 61 | 	.Input("moving_player: int8") // [1]
 62 | 	.Output("winner: int8")
 63 | 	.Output("score: int16")
 64 | 	.Output("n_captures: int16")
 65 | 
 66 | 	.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 67 | 		RETURN_WINNER_SHAPES
 68 | 		tensorflow::shape_inference::ShapeHandle winner_shape_h, score_shape_h, n_captures_shape_h;
 69 | 
 70 | 		c->MakeShapeFromTensorShape(winner_shape, &winner_shape_h);
 71 | 		c->set_output(0, winner_shape_h);
 72 | 
 73 | 		c->MakeShapeFromTensorShape(score_shape, &score_shape_h);
 74 | 		c->set_output(1, score_shape_h);
 75 | 		
 76 | 		c->MakeShapeFromTensorShape(n_captures_shape, &n_captures_shape_h);
 77 | 		c->set_output(2, n_captures_shape_h);
 78 | 
 79 | 		return Status::OK();
 80 | });
 81 | 
 82 | 
 83 | REGISTER_OP("InitState");
 84 | REGISTER_OP("EndTurn");
 85 | REGISTER_OP("SessionBackup");
 86 | REGISTER_OP("SessionRestore");
 87 | REGISTER_OP("MoveRandomAi")
 88 | 	.Input("moving_player: int8"); // [1]
 89 | 
 90 | REGISTER_OP("MoveUnit")
 91 | 	.Input("to_coord: int16")
 92 | 	.Input("moving_player: int8") // [1]
 93 | 	.Output("moved: int8"); // [BATCH_SZ]
 94 | 
 95 | void prob_to_coord_launcher(float * prob_map, int16_t * to_coord);
 96 | void prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord);
 97 | void max_prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord);
 98 | 
 99 | void session_backup_launcher();
100 | void session_restore_launcher();
101 | void return_inputs_launcher(float* out);
102 | void init_state_launcher();
103 | void move_random_ai_launcher(int8_t * moving_player);
104 | void create_batch_launcher(float * imgs, int8_t * moving_player, char * valid_mv_map);
105 | void move_unit_launcher(int16_t * to_coord, int8_t * moving_player, char *moved);
106 | void return_winner_launcher(int8_t * winner, int8_t *moving_player, int16_t * score, int16_t * n_captures_out);
107 | 
108 | class session_backup : public OpKernel {
109 | 	public:
110 | 	explicit session_backup(OpKernelConstruction* context) : OpKernel(context) {}
111 | 
112 | 	void Compute(OpKernelContext* context) override {
113 | 		session_backup_launcher();
114 | 	}
115 | };
116 | 
117 | class session_restore : public OpKernel {
118 | 	public:
119 | 	explicit session_restore(OpKernelConstruction* context) : OpKernel(context) {}
120 | 
121 | 	void Compute(OpKernelContext* context) override {
122 | 		session_restore_launcher();
123 | 	}
124 | };
125 | 
126 | class prob_to_coord : public OpKernel {
127 | 	public:
128 | 	explicit prob_to_coord(OpKernelConstruction* context) : OpKernel(context) {}
129 | 
130 | 	void Compute(OpKernelContext* context) override {
131 | 		/////////////////////////////////// inputs
132 |     		const Tensor& prob_map_tensor = context->input(0);
133 | 
134 | 		auto prob_map = prob_map_tensor.flat<Eigen::half>();
135 | 
136 | 		// check dims
137 | 		TensorShape prob_map_shape = prob_map_tensor.shape();
138 | 		ASSERT(prob_map_shape.dims() == 2, "number of dims not correct")
139 | 		ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size")
140 | 		ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size")
141 | 
142 | 		////////////////////////////////////// outputs
143 | 		Tensor* to_coord_tensor = nullptr;
144 | 
145 | 		TensorShape to_coord_shape;
146 | 		to_coord_shape.AddDim(BATCH_SZ);
147 | 
148 | 		OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor));
149 | 
150 | 		auto to_coord = to_coord_tensor->template flat<int16>();
151 | 
152 | 		///////////////////
153 | 		prob_to_coord_launcher((float*)prob_map.data(), (int16_t*)to_coord.data());
154 | 	}
155 | };
156 | 
157 | class prob_to_coord_valid_mvs : public OpKernel {
158 | 	public:
159 | 	explicit prob_to_coord_valid_mvs(OpKernelConstruction* context) : OpKernel(context) {}
160 | 
161 | 	void Compute(OpKernelContext* context) override {
162 | 		/////////////////////////////////// inputs
163 |     		const Tensor& prob_map_tensor = context->input(0);
164 | 		auto prob_map = prob_map_tensor.flat<Eigen::half>();
165 | 
166 | 		// check dims
167 | 		TensorShape prob_map_shape = prob_map_tensor.shape();
168 | 		ASSERT(prob_map_shape.dims() == 2, "number of dims not correct")
169 | 		ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size")
170 | 		ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size")
171 | 
172 | 		////////////////////////////////////// outputs
173 | 		Tensor* to_coord_tensor = nullptr;
174 | 
175 | 		TensorShape to_coord_shape;
176 | 		to_coord_shape.AddDim(BATCH_SZ);
177 | 
178 | 		OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor));
179 | 
180 | 		auto to_coord = to_coord_tensor->template flat<int16_t>();
181 | 
182 | 		///////////////////
183 | 		prob_to_coord_valid_mvs_launcher((float*)prob_map.data(), (int16_t*)to_coord.data());
184 | 	}
185 | };
186 | 
187 | class max_prob_to_coord_valid_mvs : public OpKernel {
188 | 	public:
189 | 	explicit max_prob_to_coord_valid_mvs(OpKernelConstruction* context) : OpKernel(context) {}
190 | 
191 | 	void Compute(OpKernelContext* context) override {
192 | 		/////////////////////////////////// inputs
193 |     		const Tensor& prob_map_tensor = context->input(0);
194 | 		auto prob_map = prob_map_tensor.flat<Eigen::half>();
195 | 
196 | 		// check dims
197 | 		TensorShape prob_map_shape = prob_map_tensor.shape();
198 | 		ASSERT(prob_map_shape.dims() == 2, "number of dims not correct")
199 | 		ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size")
200 | 		ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size")
201 | 
202 | 		////////////////////////////////////// outputs
203 | 		Tensor* to_coord_tensor = nullptr;
204 | 
205 | 		TensorShape to_coord_shape;
206 | 		to_coord_shape.AddDim(BATCH_SZ);
207 | 
208 | 		OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor));
209 | 
210 | 		auto to_coord = to_coord_tensor->template flat<int16_t>();
211 | 
212 | 		///////////////////
213 | 		max_prob_to_coord_valid_mvs_launcher((float*)prob_map.data(), (int16_t*)to_coord.data());
214 | 	}
215 | };
216 | 
217 | class return_winner : public OpKernel {
218 | 	public:
219 | 	explicit return_winner(OpKernelConstruction* context) : OpKernel(context) {}
220 | 
221 | 	void Compute(OpKernelContext* context) override {
222 | 		/////////////////////////////////// inputs
223 |     		const Tensor& moving_player_tensor = context->input(0);
224 | 		auto moving_player = moving_player_tensor.flat<int8>();
225 | 
226 | 		// check dims
227 | 		TensorShape moving_player_shape = moving_player_tensor.shape();
228 | 		ASSERT(moving_player_shape.dims() == 0, "number of dims not correct")
229 | 
230 | 		////////////////////////////////////// outputs
231 | 		Tensor* winner_tensor = nullptr, * score_tensor = nullptr, * n_captures_tensor = nullptr;
232 | 		RETURN_WINNER_SHAPES
233 | 
234 | 		OP_REQUIRES_OK(context, context->allocate_output(0, winner_shape, &winner_tensor));
235 | 		OP_REQUIRES_OK(context, context->allocate_output(1, score_shape, &score_tensor));
236 | 		OP_REQUIRES_OK(context, context->allocate_output(2, n_captures_shape, &n_captures_tensor));
237 | 
238 | 		auto winner = winner_tensor->template flat<int8_t>();
239 | 		auto score = score_tensor->template flat<int16_t>();
240 | 		auto n_captures = n_captures_tensor->template flat<int16>();
241 | 
242 | 		///////////////////
243 | 		return_winner_launcher((int8_t*)winner.data(), (int8_t*)moving_player.data(), (int16_t*)score.data(), (int16_t*)n_captures.data());
244 | 	}
245 | };
246 | 
247 | class move_unit : public OpKernel {
248 | 	public:
249 | 	explicit move_unit(OpKernelConstruction* context) : OpKernel(context) {}
250 | 
251 | 	void Compute(OpKernelContext* context) override {
252 | 		///////////////////////////////////// inputs
253 | 		const Tensor& to_coord_tensor = context->input(0);
254 |     		const Tensor& moving_player_tensor = context->input(1);
255 | 
256 | 		auto to_coord = to_coord_tensor.flat<int16>();
257 | 		auto moving_player = moving_player_tensor.flat<int8>();
258 | 
259 | 		// check dims
260 | 		TensorShape to_map_shape = to_coord_tensor.shape();
261 | 		ASSERT(to_map_shape.dims() == 1, "number of dims not correct")
262 | 		ASSERT(to_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size")
263 | 
264 | 		TensorShape moving_player_shape = moving_player_tensor.shape();
265 | 		ASSERT(moving_player_shape.dims() == 0, "number of dims not correct")
266 | 
267 | 		///////////////////////// outputs
268 | 		Tensor* moved_tensor = nullptr;
269 | 
270 | 		TensorShape moved_shape;
271 | 		moved_shape.AddDim(BATCH_SZ);
272 | 
273 | 		OP_REQUIRES_OK(context, context->allocate_output(0, moved_shape, &moved_tensor));
274 | 
275 | 		auto moved = moved_tensor->template flat<int8>();
276 | 
277 | 		///////////////////
278 | 		move_unit_launcher((int16_t*)to_coord.data(), (int8_t*)moving_player.data(), (char*)moved.data());
279 | 	}
280 | };
281 | 
282 | class create_batch : public OpKernel {
283 | 	public:
284 | 	explicit create_batch(OpKernelConstruction* context) : OpKernel(context) {}
285 | 
286 | 	void Compute(OpKernelContext* context) override {
287 | 		///////////////////////////////////// inputs
288 | 		const Tensor& moving_player_tensor = context->input(0);
289 | 		auto moving_player = moving_player_tensor.flat<int8>();
290 | 	
291 | 		// check dims
292 | 		TensorShape moving_player_shape = moving_player_tensor.shape();
293 | 		ASSERT(moving_player_shape.dims() == 0, "number of dims not correct")
294 | 
295 | 
296 | 		////////////////////////////////////// outputs
297 | 		CREATE_BATCH_SHAPES
298 | 		Tensor* imgs_tensor = nullptr, *valid_mv_map_tensor = nullptr;
299 | 		
300 | 		OP_REQUIRES_OK(context, context->allocate_output(0, imgs_shape, &imgs_tensor));
301 | 		OP_REQUIRES_OK(context, context->allocate_output(1, valid_mv_map_shape, &valid_mv_map_tensor));
302 | 
303 | 		auto imgs = imgs_tensor->template flat<Eigen::half>();
304 | 		auto valid_mv_map = valid_mv_map_tensor->template flat<int8>();
305 | 
306 | 		///////////////////
307 | 		create_batch_launcher((float*)imgs.data(), (int8_t*)moving_player.data(), 
308 | 			(char *)valid_mv_map.data());
309 | 	}
310 | };
311 | 
312 | class init_state : public OpKernel {
313 | 	public:
314 | 	explicit init_state(OpKernelConstruction* context) : OpKernel(context) {}
315 | 
316 | 	void Compute(OpKernelContext* context) override {
317 | 		init_state_launcher();
318 | 	}
319 | };
320 | 
321 | class move_random_ai : public OpKernel {
322 | 	public:
323 | 	explicit move_random_ai(OpKernelConstruction* context) : OpKernel(context) {}
324 | 
325 | 	void Compute(OpKernelContext* context) override {
326 | 		///////////////////////////////////// inputs
327 | 		const Tensor& moving_player_tensor = context->input(0);
328 | 
329 |     		auto moving_player = moving_player_tensor.flat<int8>();
330 | 
331 | 		// check dims
332 | 		TensorShape moving_player_shape = moving_player_tensor.shape();
333 | 		ASSERT(moving_player_shape.dims() == 0, "number of dims not correct")
334 | 
335 | 		move_random_ai_launcher((int8_t *)moving_player.data());
336 | 	}
337 | };
338 | 
339 | REGISTER_KERNEL_BUILDER(Name("InitState").Device(DEVICE_GPU), init_state);
340 | REGISTER_KERNEL_BUILDER(Name("MoveRandomAi").Device(DEVICE_GPU), move_random_ai);
341 | REGISTER_KERNEL_BUILDER(Name("CreateBatch").Device(DEVICE_GPU), create_batch);
342 | REGISTER_KERNEL_BUILDER(Name("MoveUnit").Device(DEVICE_GPU), move_unit);
343 | REGISTER_KERNEL_BUILDER(Name("SessionRestore").Device(DEVICE_GPU), session_restore);
344 | REGISTER_KERNEL_BUILDER(Name("SessionBackup").Device(DEVICE_GPU), session_backup);
345 | REGISTER_KERNEL_BUILDER(Name("ReturnWinner").Device(DEVICE_GPU), return_winner);
346 | REGISTER_KERNEL_BUILDER(Name("ProbToCoord").Device(DEVICE_GPU), prob_to_coord);
347 | REGISTER_KERNEL_BUILDER(Name("ProbToCoordValidMvs").Device(DEVICE_GPU), prob_to_coord_valid_mvs);
348 | REGISTER_KERNEL_BUILDER(Name("MaxProbToCoordValidMvs").Device(DEVICE_GPU), max_prob_to_coord_valid_mvs);
349 | 
350 | 


--------------------------------------------------------------------------------
/cuda_op_kernel.cu.cc:
--------------------------------------------------------------------------------
 1 | #if GOOGLE_CUDA
 2 | #define EIGEN_USE_GPU
 3 | #include <curand.h>
 4 | #include <curand_kernel.h>
 5 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 6 | #include "includes.h"
 7 | #include "cuda_includes.h"
 8 | 
 9 | #include "kernels/return_state.cu"
10 | #include "kernels/verify_integrity.cu"
11 | #include "kernels/init_op.cu" // allocates memory
12 | 
13 | #include "kernels/vars.cu.cc"
14 | #include "kernels/init_state.cu" // inits new set of games
15 | 
16 | #include "kernels/move_unit.cu"
17 | #include "kernels/move_random_ai.cu"
18 | #include "kernels/create_batch.cu"
19 | #include "kernels/return_winner.cu"
20 | 
21 | #include "kernels/session_backup.cu.cc"
22 | #include "kernels/prob_to_coord.cu"
23 | #include "kernels/prob_to_coord_valid_mvs.cu"
24 | #include "kernels/max_prob_to_coord_valid_mvs.cu"
25 | 
26 | #endif
27 | 
28 | 


--------------------------------------------------------------------------------
/global_vars.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import random
 3 | import numpy as np
 4 | 
 5 | RAND_SEED = np.int(1e1*time.time()) % 4294967295
 6 | np.random.seed(RAND_SEED)
 7 | random.seed(RAND_SEED)
 8 | 
 9 | n_rows, n_cols = 7,7
10 | N_PLAYERS = 2
11 | 
12 | map_sz = (n_rows, n_cols)
13 | map_szt = np.prod(map_sz)
14 | 
15 | n_input_channels = 3 # present and prior 2 game turns
16 | 
17 | ########### training:
18 | BATCH_SZ = 128
19 | INPUTS_SHAPE = (BATCH_SZ, n_rows, n_cols, n_input_channels)
20 | 
21 | 


--------------------------------------------------------------------------------
/gnu_go_test.py:
--------------------------------------------------------------------------------
  1 | import subprocess as sp
  2 | from subprocess import Popen, PIPE
  3 | from time import sleep
  4 | from fcntl import fcntl, F_GETFL, F_SETFL
  5 | from os import O_NONBLOCK, read
  6 | import global_vars as gv
  7 | import numpy as np
  8 | 
  9 | LEVEL = 1#0
 10 | PAUSE = .001
 11 | row_nm = 'ABCDEFGHIJKLMNOP'
 12 | colors = 'BW'
 13 | f = [None]*gv.BATCH_SZ
 14 | 
 15 | ### start gnugo
 16 | for gm in range(gv.BATCH_SZ):
 17 | 	f[gm] = sp.Popen(['gnugo', '--chinese-rules', '--seed', str(gm+1), '--play-out-aftermath', '--capture-all-dead', '--no-ko', '--never-resign', '--mode','gtp','--boardsize',str(gv.n_rows),'--level', str(LEVEL)], stdout=sp.PIPE, stdin=sp.PIPE)
 18 | 	flags = fcntl(f[gm].stdout, F_GETFL) # get current p.stdout flags
 19 | 	fcntl(f[gm].stdout, F_SETFL, flags | O_NONBLOCK)
 20 | 
 21 | def read_resp(gm):
 22 | 	sleep(PAUSE)
 23 | 	resp = ' '
 24 | 	while resp[-1] != '\n':
 25 | 		sleep(PAUSE)
 26 | 		try:
 27 | 			resp2 = f[gm].stdout.read()
 28 | 			resp += resp2
 29 | 		except:
 30 | 			continue
 31 | 	return resp[1:]
 32 | 
 33 | def req_ok(gm, cmd):
 34 | 	f[gm].stdin.write(cmd)
 35 | 	resp = read_resp(gm)
 36 | 	assert resp[:2] == '= ', 'err reading resp gm %i, cmd %s resp %s' % (gm, cmd, resp)
 37 | 
 38 | def req_ok_or_illegal(gm, cmd):
 39 | 	f[gm].stdin.write(cmd)
 40 | 	resp = read_resp(gm)
 41 | 	assert resp[:2] == '= ' or resp.find('? illegal move') != -1
 42 | 
 43 | def init_board(board):
 44 | 	for gm in range(gv.BATCH_SZ):
 45 | 		req_ok(gm, 'clear_board\n')
 46 | 		for i in range(gv.n_rows):
 47 | 			for j in range(gv.n_cols):
 48 | 				if board[gm,i,j] == 0:
 49 | 					continue
 50 | 				#req_ok(gm, 'play %s %s%i\n' % (colors[np.int((board[gm,i,j]+1.)/2)], row_nm[j], gv.n_rows - i))
 51 | 				f[gm].stdin.write('play %s %s%i\n' % (colors[np.int((board[gm,i,j]+1.)/2)], row_nm[j], gv.n_rows - i))
 52 | 
 53 | def move_nn(to_coords, moving_player=0):
 54 | 	passes = to_coords == -1
 55 | 	to_coords_i = np.array(to_coords)
 56 | 	to_coords_i[passes] = 0
 57 | 
 58 | 	i, j = np.unravel_index(to_coords_i, (gv.n_rows, gv.n_cols))
 59 | 	for gm in range(gv.BATCH_SZ):
 60 | 		#req_ok_or_illegal(gm, 'play %s %s%i\n' % (colors[moving_player], row_nm[j[gm]], gv.n_rows - i[gm]))
 61 | 		if passes[gm]:
 62 | 			cmd = 'play %s pass\n' % colors[moving_player]
 63 | 		else:
 64 | 			cmd = 'play %s %s%i\n' % (colors[moving_player], row_nm[j[gm]], gv.n_rows - i[gm])
 65 | 		f[gm].stdin.write(cmd)
 66 | 
 67 | def move_ai(moving_player=1):
 68 | 	ai_to_coords = -np.ones(gv.BATCH_SZ, dtype='int32')
 69 | 
 70 | 	for gm in range(gv.BATCH_SZ):
 71 | 		while True:
 72 | 			try:
 73 | 				f[gm].stdout.read()
 74 | 				break
 75 | 			except:
 76 | 				j = 1
 77 | 
 78 | 		f[gm].stdin.write('genmove %s\n' % colors[moving_player])
 79 | 	
 80 | 	for gm in range(gv.BATCH_SZ):
 81 | 		ai_mv_orig = read_resp(gm)
 82 | 		ai_mv = ai_mv_orig.split('\n\n')[-2]
 83 | 		if ai_mv[:2] != '= ':
 84 | 			print 'failed gm %i resp %s' % (gm, ai_mv)
 85 | 			continue
 86 | 		#assert ai_mv[:2] == '= ', 'gm %i resp %s' % (gm, ai_mv)
 87 | 		if ai_mv.find('= PASS') != -1:
 88 | 			#print 'pass ', gm
 89 | 			continue
 90 | 		if ai_mv.find('= resign') != -1:
 91 | 			print 'resign ', gm
 92 | 			#assert False
 93 | 			continue
 94 | 		if len(ai_mv) <= 3:
 95 | 			#assert False, 'gm %i resp %s, orig %s' % (gm, ai_mv, ai_mv_orig)
 96 | 			assert 'gm %i resp %s, orig %s' % (gm, ai_mv, ai_mv_orig)
 97 | 			continue
 98 | 		col = row_nm.find(ai_mv[2])
 99 | 		assert col != -1, 'gm %i resp %s' % (gm, ai_mv)
100 | 		row = gv.n_rows - np.int(ai_mv[3:])
101 | 
102 | 		ai_to_coords[gm] = row*gv.n_cols + col
103 | 	return ai_to_coords
104 | 
105 | def show_board(gm):
106 | 	f[gm].stdin.write('showboard\n')
107 | 	print read_resp(gm)
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/includes.h:
--------------------------------------------------------------------------------
 1 | //#define CUDA_DEBUG 1
 2 | 
 3 | #define PANIC(A) { printf(A " %s:%i\n", __FILE__,__LINE__); exit(1); }
 4 | #define ASSERT(S, A) { if(!(S)) PANIC(A) }
 5 | 
 6 | #define BATCH_SZ 128
 7 | #define N_PLAYERS 2
 8 | 
 9 | #define MAP_SZ_X 7
10 | 
11 | #define MAP_SZ_Y MAP_SZ_X
12 | 
13 | #define MAP_SZ (MAP_SZ_X*MAP_SZ_Y)
14 | #define BATCH_MAP_SZ (BATCH_SZ*MAP_SZ_X*MAP_SZ_Y)
15 | 
16 | #define MAP_LOOP for(int16_t loc = 0; loc < MAP_SZ; loc++)
17 | 
18 | #define N_INPUT_CHANNELS 3
19 | 
20 | // return var indices
21 | #define BOARD_IDX 0
22 | #define VALID_MV_MAP_INTERNAL_IDX 2
23 | 
24 | #define RETURN_VARS 1
25 | #define SET_VARS 0
26 | 
27 | 


--------------------------------------------------------------------------------
/kernels/create_batch.cu:
--------------------------------------------------------------------------------
  1 | #define LIBERTY_TMP(COORD, PLAYER_VAL) return_liberty(COORD, PLAYER_VAL, 0, board_tmp, coord_stack, &coord_stack_sz)
  2 | 
  3 | // imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels]
  4 | // valid_mv_map = [gv.BATCH_SZ, gv.n_rows, gv.n_cols]
  5 | 
  6 | // create batch (for nn) from current game state
  7 | __global__ void create_batch_kernel(half * imgs, char * board, char * board_prev, char * board_pprev, int8_t * moving_player, char * valid_mv_map,
  8 | 		char * valid_mv_map_internal){
  9 | 	
 10 | 	int32_t gm = blockIdx.x;
 11 | 	int16_t map_coord = threadIdx.x;
 12 | 	int game_offset = gm*MAP_SZ;
 13 | 	int gcoord = game_offset + map_coord;
 14 | 
 15 | 	GET_PLAYER_VAL
 16 | 
 17 | 	//////////// imgs
 18 | 	int icoord = gm*MAP_SZ*N_INPUT_CHANNELS + map_coord*N_INPUT_CHANNELS;
 19 | 	if(board[gcoord] == player_val)
 20 | 		imgs[icoord] = 1;
 21 | 	else if(board[gcoord] == 0)
 22 | 		imgs[icoord] = 0;
 23 | 	else
 24 | 		imgs[icoord] = -1;
 25 | 
 26 | 	icoord ++;
 27 | 	if(board_prev[gcoord] == player_val)
 28 | 		imgs[icoord] = 1;
 29 | 	else if(board_prev[gcoord] == 0)
 30 | 		imgs[icoord] = 0;
 31 | 	else
 32 | 		imgs[icoord] = -1;
 33 | 
 34 | 	icoord ++;
 35 | 	if(board_pprev[gcoord] == player_val)
 36 | 		imgs[icoord] = 1;
 37 | 	else if(board_pprev[gcoord] == 0)
 38 | 		imgs[icoord] = 0;
 39 | 	else
 40 | 		imgs[icoord] = -1;
 41 | 
 42 | 	//////////// valid moves
 43 | 	// adj search vars
 44 | 	int16_t coord_stack[MAP_SZ];
 45 | 	int coord_stack_sz;
 46 | 
 47 | 	__syncthreads();
 48 | 	if(map_coord != 0) return;
 49 | 
 50 | 	#define ADD_MV { valid_mv_map[gcoord] = 1; valid_mv_map_internal[gcoord] = 1; }
 51 | 	
 52 | 	for(map_coord = 0; map_coord < MAP_SZ; map_coord++){
 53 | 		gcoord = game_offset + map_coord;
 54 | 		
 55 | 		valid_mv_map[gcoord] = 0;
 56 | 		valid_mv_map_internal[gcoord] = 0;
 57 | 
 58 | 		if(board[gcoord] != 0) continue;
 59 | 
 60 | 		// add move
 61 | 		if(LIBERTY(map_coord, player_val)){
 62 | 			ADD_MV
 63 | 			continue;
 64 | 		}
 65 | 
 66 | 		//////////// if no liberty, check if pieces can be captured creating liberty for moving player
 67 | 
 68 | 		// copy board
 69 | 		char board_tmp[MAP_SZ]; // just sotre one game, don't waste space for games not eval'd in this worker
 70 | 		for(int loc = 0; loc < MAP_SZ; loc++)
 71 | 			board_tmp[loc] = board[game_offset + loc];
 72 | 
 73 | 		// if we did move here, would we capture?
 74 | 		char valid_mv = 0;
 75 | 		board_tmp[map_coord] = player_val; // tmp move here
 76 | 		ADJ_LOOP(map_coord)
 77 | 			// remove pieces with no liberty
 78 | 			if(board_tmp[coord_i] == (-player_val) &&
 79 | 				!LIBERTY_TMP(coord_i, -player_val)){
 80 | 					valid_mv = 1;
 81 | 					board_tmp[coord_i] = 0;
 82 | 
 83 | 					// remove adj pieces (to then check if final state matches prior state)
 84 | 					for(int stack_i = 0; stack_i < coord_stack_sz; stack_i++){
 85 | 						int coord_j = coord_stack[stack_i];
 86 | 						DASSERT(board_tmp[coord_j] == (-player_val))
 87 | 						board_tmp[coord_j] = 0;
 88 | 					} // stack
 89 | 
 90 | 				} // opposing player / liberty check
 91 | 		} // adj loop
 92 | 
 93 | 		if(valid_mv == 0)
 94 | 			continue;
 95 | 
 96 | 		////// does this replicate a prior state?
 97 | 		char matching = 1, matching2 = 1;
 98 | 		for(int loc = 0; matching && (loc < MAP_SZ); loc++){
 99 | 			matching = board_pprev[game_offset + loc] == board_tmp[loc];
100 | 		}
101 | 		for(int loc = 0; matching2 && (loc < MAP_SZ); loc++){
102 | 			matching2 = board_prev[game_offset + loc] == board_tmp[loc];
103 | 		}
104 | 
105 | 		if(matching == 0 && matching2 == 0) ADD_MV 
106 | 
107 | 	} // map loop
108 | }
109 | 
110 | void create_batch_launcher(float * imgs, int8_t * moving_player, char * valid_mv_map){
111 | 	REQ_INIT
112 | 
113 | 	create_batch_kernel <<< BATCH_SZ, MAP_SZ >>> ((half*)imgs, board, board_prev, board_pprev, moving_player, valid_mv_map, valid_mv_map_internal);
114 | 
115 | 	VERIFY_BUFFER_INTEGRITY
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/kernels/init_op.cu:
--------------------------------------------------------------------------------
 1 | __global__ void init_rand_states(int32_t RAND_SEED, int32_t map_sz, curandState_t * rand_states){
 2 | 	int32_t offset = blockIdx.x*map_sz + threadIdx.x;
 3 | 	curand_init(RAND_SEED + offset, 0, 1, &rand_states[offset]);
 4 | }
 5 | 
 6 | #define CMALLOC(VAR, SZ) {err = cudaMalloc((void**) &VAR, SZ*sizeof(VAR[0])); MALLOC_ERR_CHECK_R}
 7 | #define MALLOC_CHAR(VAR, SZ) {VAR = (char*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 
 8 | #define MALLOC_INT32(VAR, SZ) {VAR = (int32_t*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 
 9 | #define MALLOC_UINT32(VAR, SZ) {VAR = (uint32_t*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 
10 | 
11 | void init_op_launcher(){
12 | 	cudaError_t err;
13 | 	op_initialized = 1;
14 | 
15 | 	///////////////////////////////// gpu buffers
16 | 	// game state
17 | 	CMALLOC(board, BATCH_MAP_SZ); 
18 | 	CMALLOC(board2, BATCH_MAP_SZ); 
19 | 
20 | 	CMALLOC(board_prev, BATCH_MAP_SZ);
21 | 	CMALLOC(board_prev2, BATCH_MAP_SZ);
22 | 
23 | 	CMALLOC(board_pprev, BATCH_MAP_SZ);
24 | 	CMALLOC(board_pprev2, BATCH_MAP_SZ);
25 | 
26 | 	CMALLOC(n_captures, N_PLAYERS*BATCH_SZ);
27 | 	CMALLOC(n_captures2, N_PLAYERS*BATCH_SZ);
28 | 
29 | 	CMALLOC(ai_to_coord, BATCH_SZ); // input to move_unit, output from move_random_ai
30 | 
31 | 	CMALLOC(valid_mv_map_internal, BATCH_MAP_SZ) // input to move_unit, output from create_batch
32 | 	
33 | 	CMALLOC(moved_internal, BATCH_SZ) // [BATCH_SZ] used in move_random_ai, req. input to move_unit_launcher, results not used
34 | 
35 | 	////// random seed
36 | 	int32_t RAND_SEED = time(NULL);
37 | 	err = cudaMalloc((void**) &rand_states, BATCH_MAP_SZ*sizeof(curandState_t));
38 | 	init_rand_states <<< BATCH_SZ, MAP_SZ >>> (RAND_SEED, MAP_SZ, rand_states);
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/kernels/init_state.cu:
--------------------------------------------------------------------------------
 1 | void init_state_launcher() {
 2 | 	CHECK_INIT
 3 | 
 4 | 	cudaError_t err = cudaMemset(board, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR
 5 | 	err = cudaMemset(board_prev, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR
 6 | 	err = cudaMemset(board_pprev, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR
 7 | 	
 8 | 	err = cudaMemset(n_captures, 0, sizeof(n_captures[0])*N_PLAYERS*BATCH_SZ); CHECK_CUDA_ERR
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/kernels/max_prob_to_coord_valid_mvs.cu:
--------------------------------------------------------------------------------
 1 | __global__ void max_prob_to_coord_valid_mvs_kernel(half * prob_map, int16_t * to_coord, 
 2 | 		char * board, char * valid_mv_map_internal){
 3 | 	int gm = blockIdx.x;
 4 | 	int gm_offset = gm*MAP_SZ;
 5 | 	half * prob_map_cur = &prob_map[gm_offset];
 6 | 
 7 | 	COUNT_VALID
 8 | 
 9 | 	// determine max prob
10 | 	float max_prob = -999;
11 | 	int16_t max_map_loc = -1;
12 | 	for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
13 | 		int map_loc = valid_mv_inds[mv_ind];
14 | 		CHK_VALID_MAP_COORD(map_loc)
15 | 		DASSERT(board[gm*MAP_SZ + map_loc] == 0)
16 | 		if((float)prob_map_cur[map_loc] <= max_prob)
17 | 			continue;
18 | 		max_map_loc = map_loc;
19 | 		max_prob = prob_map_cur[map_loc];
20 | 	}
21 | 
22 | 	to_coord[gm] = max_map_loc;
23 | }
24 | 
25 | void max_prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord){
26 | 	cudaError_t err;
27 | 	REQ_INIT
28 | 
29 | 	max_prob_to_coord_valid_mvs_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, board, 
30 | 		valid_mv_map_internal); CHECK_CUDA_ERR
31 | 
32 | 	VERIFY_BUFFER_INTEGRITY
33 | }
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/kernels/move_random_ai.cu:
--------------------------------------------------------------------------------
 1 | __global__ void move_random_ai_kernel(int16_t * to_coord, char * board, curandState_t* rand_states, char * valid_mv_map_internal){
 2 | 	
 3 | 	int gm = blockIdx.x;
 4 | 	int gm_offset = gm*MAP_SZ;
 5 | 
 6 | 	COUNT_VALID
 7 | 
 8 | 	// select random move
 9 | 	int rand_ind = (curand(&rand_states[gm]) % (n_valid_mvs-1)) + 1;
10 | 	
11 | 	to_coord[gm] = valid_mv_inds[rand_ind];
12 | 
13 | 	DASSERT(to_coord[gm] >= 0 && to_coord[gm] < MAP_SZ && board[gm_offset + to_coord[gm]] == 0)
14 | 
15 | }
16 | 
17 | void move_random_ai_launcher(int8_t * moving_player){
18 | 	cudaError_t err;
19 | 	REQ_INIT
20 | 
21 | 	move_random_ai_kernel <<< BATCH_SZ, 1 >>> (ai_to_coord, board, rand_states, valid_mv_map_internal); CHECK_CUDA_ERR
22 | 
23 | 	move_unit_launcher(ai_to_coord, moving_player, moved_internal); 
24 | 	VERIFY_BUFFER_INTEGRITY
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/kernels/move_unit.cu:
--------------------------------------------------------------------------------
  1 | #define N_ADJ 4
  2 | 
  3 | #define ADJ_LOOP(COORD)  \
  4 | 		int coord_x = COORD / MAP_SZ_Y;\
  5 | 		int coord_y = COORD % MAP_SZ_Y;\
  6 | 		\
  7 | 		int X_adj[N_ADJ] = {0, -1, 1, 0};\
  8 | 		int Y_adj[N_ADJ] = {-1, 0, 0, 1};\
  9 | 		for(int adj = 0; adj < N_ADJ; adj++){\
 10 | 			int coord_px = coord_x + X_adj[adj];\
 11 | 			int coord_py = coord_y + Y_adj[adj];\
 12 | 			if(coord_py < 0 || coord_py >= MAP_SZ_Y ||\
 13 | 				coord_px < 0 || coord_px >= MAP_SZ_X)\
 14 | 					continue;\
 15 | 			int coord_i = coord_px*MAP_SZ_Y + coord_py;\
 16 | 
 17 | 
 18 | __device__ inline int add_adj_to_stack(int16_t coord, int16_t * coord_stack, int coord_stack_sz, 
 19 | 		char * checked, char op_player_val, int game_offset, char * board){
 20 | 	DASSERT(coord >= 0 && coord < MAP_SZ);
 21 | 
 22 | 	ADJ_LOOP(coord)
 23 | 		if(checked[coord_i]) // already checked
 24 | 			continue;
 25 | 
 26 | 		if(board[game_offset + coord_i] == 0) return -1;
 27 | 
 28 | 		// add to stack
 29 | 		if(board[game_offset + coord_i] == op_player_val){
 30 | 			checked[coord_i] = 1;
 31 | 			coord_stack[coord_stack_sz] = coord_i;
 32 | 			coord_stack_sz ++;
 33 | 			DASSERT(coord_stack_sz < MAP_SZ)
 34 | 		}
 35 | 	} // adj
 36 | 
 37 | 	return coord_stack_sz;
 38 | }
 39 | 
 40 | #define ADD_ADJ_TO_STACK(COORD, PLAYER_VAL) *coord_stack_sz = add_adj_to_stack(COORD, coord_stack, \
 41 | 		*coord_stack_sz, checked, PLAYER_VAL, game_offset, board);
 42 | 
 43 | #define LIBERTY(COORD, PLAYER_VAL) return_liberty(COORD, PLAYER_VAL, game_offset, board, coord_stack, &coord_stack_sz)
 44 | __device__ inline char return_liberty(int16_t coord, char player_val, int game_offset, char * board,
 45 | 		int16_t * coord_stack, int * coord_stack_sz){
 46 | 	char checked[MAP_SZ];
 47 | 
 48 | 	//////////// check if there exists a liberty for the placed stone
 49 | 	*coord_stack_sz = 0;
 50 | 	for(int i = 0; i < MAP_SZ; i++) checked[i] = 0; checked[coord] = 1;
 51 | 
 52 | 	ADD_ADJ_TO_STACK(coord, player_val)
 53 | 
 54 | 	for(int stack_i = 0; stack_i < *coord_stack_sz; stack_i++){
 55 | 		int16_t coord_j = coord_stack[stack_i];
 56 | 		
 57 | 		DASSERT(coord_j >= 0 && coord_j < MAP_SZ)
 58 | 		DASSERT(board[game_offset + coord_j] == player_val)
 59 | 		
 60 | 		ADD_ADJ_TO_STACK(coord_j, player_val)
 61 | 
 62 | 	} // stack
 63 | 	
 64 | 	return *coord_stack_sz == -1;
 65 | }
 66 | 
 67 | __global__ void move_unit_kernel(int16_t *to_coord, int8_t *moving_player, char * board, int16_t * n_captures, char * moved, char * valid_mv_map_internal){
 68 | 	int gm = blockIdx.x;
 69 | 	int game_offset = gm * MAP_SZ;
 70 | 
 71 | 	moved[gm] = 0;
 72 | 
 73 | 	if(to_coord[gm] < 0 || to_coord[gm] >= MAP_SZ) return;
 74 | 
 75 | 	DASSERT(*moving_player == 0 || *moving_player == 1);
 76 | 
 77 | 	GET_PLAYER_VAL
 78 | 
 79 | 	int16_t coord = to_coord[gm];
 80 | 
 81 | 	// position not empty. shouldn't happen? (only when nn is making moves directly frm outputs)
 82 | 	if(board[game_offset + coord] != 0) return;
 83 | 
 84 | 	///////////////// check if we have listed this is a valid mv
 85 | 	if(!valid_mv_map_internal[game_offset + coord]) return; // invalid move
 86 | 	
 87 | 	///////////////////////////
 88 | 	
 89 | 	board[game_offset + coord] = player_val;
 90 | 
 91 | 	// adj search vars
 92 | 	int16_t coord_stack[MAP_SZ];
 93 | 	int coord_stack_sz;
 94 | 
 95 | 	///////////// check if we should remove stones
 96 | 	char removed_stones = 0;
 97 | 	
 98 | 	ADJ_LOOP(coord)
 99 | 		if(board[game_offset + coord_i] == (-player_val) &&
100 | 				!LIBERTY(coord_i, -player_val)){
101 | 
102 | 			removed_stones = 1;
103 | 			DASSERT(board[game_offset + coord_i] == (-player_val))
104 | 			board[game_offset + coord_i] = 0;
105 | 			n_captures[*moving_player*BATCH_SZ + gm] ++;
106 | 
107 | 			for(int stack_i = 0; stack_i < coord_stack_sz; stack_i++){
108 | 				int coord_j = coord_stack[stack_i];
109 | 				DASSERT(board[game_offset + coord_j] == (-player_val))
110 | 				board[game_offset + coord_j] = 0;
111 | 				n_captures[*moving_player*BATCH_SZ + gm] ++;
112 | 
113 | 			} // stack
114 | 
115 | 		} // opposing player / liberty check
116 | 	} // adj
117 | 
118 | 	///////////////// if we've not removed stones, make sure there's a liberty for the placed stone
119 | 	if(!removed_stones && !LIBERTY(coord, player_val))
120 | 		board[game_offset + coord] = 0;
121 | 
122 | 	// surrounded & could not capture
123 | 	if(board[game_offset + coord] == 0) return;
124 | 	
125 | 	moved[gm] = 1;
126 | }
127 | 
128 | void move_unit_launcher(int16_t * to_coord, int8_t * moving_player, char * moved){
129 | 	REQ_INIT
130 | 	cudaError_t err;
131 | 
132 | 	BMEM(board_pprev, board_prev, BATCH_MAP_SZ)
133 | 	BMEM(board_prev, board, BATCH_MAP_SZ)
134 | 	move_unit_kernel <<< BATCH_SZ, 1 >>> (to_coord, moving_player, board, n_captures, moved, valid_mv_map_internal);
135 | 
136 | 	CHECK_CUDA_ERR
137 | 	VERIFY_BUFFER_INTEGRITY
138 | }
139 | 
140 | 


--------------------------------------------------------------------------------
/kernels/prob_to_coord.cu:
--------------------------------------------------------------------------------
 1 | #define RAND_RES 100000
 2 | #define PROB ((float)prob_map[MO + loc] / probs_sum_orig)
 3 | 
 4 | __global__ void prob_to_coord_kernel(half * prob_map, int16_t * to_coord, curandState_t* rand_states){
 5 | 	int gm = blockIdx.x;
 6 | 	int MO = gm*MAP_SZ;
 7 | 	float rand_val = (float)(curand(&rand_states[gm]) % RAND_RES);
 8 | 	rand_val /= (float)RAND_RES;
 9 | 
10 | 	float probs_sum_orig = 0;
11 | 	MAP_LOOP
12 | 		probs_sum_orig += (float)prob_map[MO + loc];
13 | 	assert(probs_sum_orig >= 0);
14 | 
15 | 	float probs_sum = 0;
16 | 	MAP_LOOP{
17 | 		if(PROB < 0 || PROB > 1)
18 | 			printf("PROB %f\n", PROB);
19 | 		//DASSERT(PROB >= 0 && PROB <= 1)
20 | 
21 | 		if((rand_val >= probs_sum) && (rand_val < (probs_sum + PROB))){
22 | 			to_coord[gm] = loc;
23 | 			return;
24 | 		}
25 | 		probs_sum += PROB;
26 | 	}
27 | 
28 | 	to_coord[gm] = -1;
29 | 
30 | 	DASSERT(probs_sum <= 1.01)
31 | 	DASSERT(probs_sum >= .999)
32 | }
33 | 
34 | void prob_to_coord_launcher(float * prob_map, int16_t * to_coord){
35 | 	REQ_INIT
36 | 	cudaError_t err; 
37 | 
38 | 	prob_to_coord_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, rand_states);
39 | 
40 | 	CHECK_CUDA_ERR
41 | 	VERIFY_BUFFER_INTEGRITY
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/kernels/prob_to_coord_valid_mvs.cu:
--------------------------------------------------------------------------------
 1 | __global__ void prob_to_coord_valid_mvs_kernel(half * prob_map, int16_t * to_coord, 
 2 | 		char * board, curandState_t* rand_states, char * valid_mv_map_internal){
 3 | 	int gm = blockIdx.x;
 4 | 	int gm_offset = gm*MAP_SZ;
 5 | 	half * prob_map_cur = &prob_map[gm_offset];
 6 | 
 7 | 	COUNT_VALID
 8 | 	
 9 | 	float rand_val = (float)(curand(&rand_states[gm]) % RAND_RES);
10 | 	rand_val /= (float)RAND_RES;
11 | 
12 | 	// compute probs sum over valid mvs
13 | 	float probs_sum_orig = 0;
14 | 	for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
15 | 		int map_loc = valid_mv_inds[mv_ind];
16 | 		CHK_VALID_MAP_COORD(map_loc)
17 | 		DASSERT(board[gm*MAP_SZ + map_loc] == 0)
18 | 		probs_sum_orig += (float)prob_map_cur[map_loc];
19 | 	}
20 | 	if(probs_sum_orig == 0) probs_sum_orig = 1;
21 | 	//assert(probs_sum_orig >= 0);
22 | 	
23 | 	float probs_sum = 0;
24 | 	for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
25 | 		int16_t map_loc = valid_mv_inds[mv_ind];
26 | 		float p = (float)prob_map_cur[map_loc] / probs_sum_orig;
27 | 		//if(!(p >= 0 && p <= 1))
28 | 		//	printf("prob err %f\n", p);
29 | 		//DASSERT(p >= 0 && p <= 1)
30 | 
31 | 		// randomly selected or we're at the last move
32 | 		if(((rand_val >= probs_sum) && (rand_val < (probs_sum + p))) || 
33 | 				(mv_ind == (n_valid_mvs - 1))){
34 | 			to_coord[gm] = map_loc;
35 | 			return;
36 | 		}
37 | 		probs_sum += p;
38 | 	}
39 | 
40 | 	to_coord[gm] = -1;
41 | 	//assert(0);
42 | }
43 | 
44 | void prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord){
45 | 	cudaError_t err;
46 | 	REQ_INIT
47 | 
48 | 	prob_to_coord_valid_mvs_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, board, rand_states, valid_mv_map_internal); CHECK_CUDA_ERR
49 | 
50 | 	VERIFY_BUFFER_INTEGRITY
51 | }
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/kernels/return_state.cu:
--------------------------------------------------------------------------------
1 | char return_device_buffers(){
2 | 	cudaError_t err;
3 | 
4 | 	err = cudaMemcpy(board_cpu, board, BATCH_MAP_SZ*sizeof(board[0]), cudaMemcpyDeviceToHost);  MALLOC_ERR_CHECK
5 | 
6 | 	return 1;
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/kernels/return_winner.cu:
--------------------------------------------------------------------------------
  1 | __device__ inline char add_blank_adj_to_stack(int16_t coord, int16_t * coord_stack, int *coord_stack_sz, 
  2 | 		char * checked, int game_offset, char * board, int * owner){
  3 | 	DASSERT(coord >= 0 && coord < MAP_SZ);
  4 | 
  5 | 	ADJ_LOOP(coord)
  6 | 		if(checked[coord_i]) // already checked
  7 | 			continue;
  8 | 
  9 | 		// touching non-owner, therefore blank space is not owned by owner
 10 | 		if(board[game_offset + coord_i] != 0){
 11 | 			if(*owner != 0){
 12 | 				if(*owner != board[game_offset + coord_i])
 13 | 					return 0;
 14 | 			}else
 15 | 				// set owner
 16 | 				*owner = board[game_offset + coord_i];
 17 | 			
 18 | 		}else{ // space is empty; add to stack
 19 | 
 20 | 			checked[coord_i] = 1;
 21 | 			coord_stack[*coord_stack_sz] = coord_i;
 22 | 			*coord_stack_sz = *coord_stack_sz + 1;
 23 | 
 24 | 			DASSERT(*coord_stack_sz < MAP_SZ)
 25 | 		}
 26 | 	} // adj
 27 | 
 28 | 	return 1; // blank potentially owned by single player
 29 | }
 30 | 
 31 | #define ADD_BLANK_ADJ_TO_STACK(COORD) add_blank_adj_to_stack(COORD, coord_stack, \
 32 | 		&coord_stack_sz, checked, game_offset, board, &owner);
 33 | 
 34 | 
 35 | #define SCORE_START (MAP_SZ*2)
 36 | #define LARGE_VAL 99999
 37 | __global__ void return_winner_kernel(int8_t * winner, char * board, int8_t * moving_player, int16_t * score){ 
 38 | 	int32_t game = blockIdx.x;
 39 | 	int16_t coord = threadIdx.x;
 40 | 	int game_offset = game*MAP_SZ;
 41 | 	int gcoord = game_offset + coord;
 42 | 
 43 | 	GET_PLAYER_VAL
 44 | 
 45 | 	__shared__ unsigned score_tmp;
 46 | 	if(coord == 0) score_tmp = SCORE_START;
 47 | 	__syncthreads();
 48 | 
 49 | 	if(board[gcoord] == player_val) // + 1
 50 | 		atomicInc(&score_tmp, LARGE_VAL);
 51 | 	else if(board[gcoord] == (-player_val))
 52 | 		atomicDec(&score_tmp, LARGE_VAL); // -1
 53 | 	else{
 54 | 		// determine ownership of blank
 55 | 		if(board[gcoord] != 0)
 56 | 			printf("gcoord %i playerval %i board %i\n", gcoord, player_val, board[gcoord]);
 57 | 		DASSERT(board[gcoord] == 0)
 58 | 		
 59 | 		int owner = 0;
 60 | 
 61 | 		// adj search vars
 62 | 		char checked[MAP_SZ];
 63 | 		int16_t coord_stack[MAP_SZ];
 64 | 		int coord_stack_sz = 0;
 65 | 		for(int i = 0; i < MAP_SZ; i++) checked[i] = 0; 
 66 | 		checked[coord] = 1;
 67 | 
 68 | 		int space_owned = ADD_BLANK_ADJ_TO_STACK(coord);
 69 | 	
 70 | 		for(int stack_i = 0; space_owned && (stack_i < coord_stack_sz); stack_i++){
 71 | 			int coord_j = coord_stack[stack_i];
 72 | 
 73 | 			DASSERT(coord_j >= 0 && coord_j < MAP_SZ)
 74 | 			DASSERT(board[game_offset + coord_j] == 0)
 75 | 			
 76 | 			space_owned = ADD_BLANK_ADJ_TO_STACK(coord_j);
 77 | 		}
 78 | 
 79 | 		// add score to winner
 80 | 		if(space_owned && owner != 0){
 81 | 			if(owner == player_val)
 82 | 				atomicInc(&score_tmp, LARGE_VAL);
 83 | 			else
 84 | 				atomicDec(&score_tmp, LARGE_VAL);
 85 | 		}
 86 | 
 87 | 	} // empty space
 88 | 
 89 | 	__syncthreads();
 90 | 	if(coord != 0)
 91 | 		return;
 92 | 
 93 | 	score[game] = (int16_t)(score_tmp) - (int16_t)(SCORE_START);
 94 | 	if(score_tmp > SCORE_START)
 95 | 		winner[game] = 1;
 96 | 	else if(score_tmp < SCORE_START)
 97 | 		winner[game] = -1;
 98 | 	else
 99 | 		winner[game] = 0;
100 | 
101 | }
102 | 
103 | void return_winner_launcher(int8_t * winner, int8_t * moving_player, int16_t * score, int16_t * n_captures_out){
104 | 	REQ_INIT
105 | 
106 | 	cudaError_t err;
107 | 	BMEM(n_captures_out, n_captures, N_PLAYERS*BATCH_SZ)
108 | 
109 | 	return_winner_kernel <<< BATCH_SZ, MAP_SZ >>> (winner, board, moving_player, score);
110 | 	VERIFY_BUFFER_INTEGRITY
111 | }
112 | 


--------------------------------------------------------------------------------
/kernels/session_backup.cu.cc:
--------------------------------------------------------------------------------
 1 | void session_backup_launcher(){
 2 | 	REQ_INIT
 3 | 	cudaError_t err;
 4 | 
 5 | 	BMEM(board2, board, BATCH_MAP_SZ)
 6 | 	BMEM(board_prev2, board_prev, BATCH_MAP_SZ)
 7 | 	BMEM(board_pprev2, board_pprev, BATCH_MAP_SZ)
 8 | 
 9 | 	BMEM(n_captures2, n_captures, BATCH_SZ)
10 | }
11 | 
12 | void session_restore_launcher(){
13 | 	REQ_INIT
14 | 	cudaError_t err;
15 | 
16 | 	RMEM(board2, board, BATCH_MAP_SZ)
17 | 	RMEM(board_prev2, board_prev, BATCH_MAP_SZ)
18 | 	RMEM(board_pprev2, board_pprev, BATCH_MAP_SZ)
19 | 
20 | 	RMEM(n_captures2, n_captures, BATCH_SZ)
21 | }
22 | 


--------------------------------------------------------------------------------
/kernels/vars.cu.cc:
--------------------------------------------------------------------------------
 1 | #define CP_MAP(MAP) {   if(op == RETURN_VARS)\
 2 | 				err = cudaMemcpy(outputs, MAP, BATCH_MAP_SZ*sizeof(MAP[0]), cudaMemcpyDeviceToDevice);\
 3 | 			else\
 4 | 				err = cudaMemcpy(MAP, outputs, BATCH_MAP_SZ*sizeof(MAP[0]), cudaMemcpyDeviceToDevice);\
 5 | 		MALLOC_ERR_CHECK}
 6 | 
 7 | #define CP_MAP_DT(MAP, dt) {   if(op == RETURN_VARS)\
 8 | 					err = cudaMemcpy(outputs, MAP, BATCH_MAP_SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\
 9 | 				else\
10 | 					err = cudaMemcpy(MAP, outputs, BATCH_MAP_SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\
11 | 		MALLOC_ERR_CHECK}
12 | 
13 | #define CP_DT(MAP, SZ, dt) {    if(op == RETURN_VARS)\
14 | 					err = cudaMemcpy(outputs, MAP, SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\
15 | 				else\
16 | 					err = cudaMemcpy(MAP, outputs, SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\
17 | 			MALLOC_ERR_CHECK}
18 | 
19 | 
20 | void vars_launcher(int var_idx, void * outputs, char op){
21 | 	REQ_INIT
22 | 	cudaError_t err;
23 | 	if(var_idx == BOARD_IDX) CP_MAP(board)
24 | 	else if(var_idx == VALID_MV_MAP_INTERNAL_IDX) CP_MAP(valid_mv_map_internal)
25 | 	else PANIC("unknown var_idx, return_vars_launcher");
26 | 
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/kernels/verify_integrity.cu:
--------------------------------------------------------------------------------
 1 | #define ASSERT_S(COND) {if(!(COND)){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1);}}
 2 | #ifdef CUDA_DEBUG
 3 | 	#define VERIFY_BUFFER_INTEGRITY {if(verify_buffer_integrity() != 1){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1); }}
 4 | 	//#define VERIFY_BUFFER_INTEGRITY {printf("verifying %s\n", __FILE__); if(verify_buffer_integrity() != 1){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1); }}
 5 | #else
 6 | 	#define VERIFY_BUFFER_INTEGRITY 
 7 | 
 8 | #endif
 9 | 
10 | char verify_buffer_integrity(){
11 | 	cudaError_t err = cudaDeviceSynchronize(); CHECK_CUDA_ERR
12 | 	
13 | 	if(return_device_buffers() != 1){
14 | 		printf("err returnning buffers %s:%i\n", __FILE__, __LINE__);
15 | 		return 0;
16 | 	}
17 | 	
18 | 	int coord;
19 | 	for(int game = 0; game < BATCH_SZ; game++){
20 | 		////////////////// map tests
21 | 		for(int x = 0; x < MAP_SZ_X; x++){
22 | 			for(int y = 0; y < MAP_SZ_Y; y++){
23 | 				coord = game*MAP_SZ + x*MAP_SZ_Y + y;
24 | 				
25 | 				ASSERT_S((board_cpu[coord] == 0) || (board_cpu[coord] == 1) ||
26 | 						(board_cpu[coord] == -1));
27 | 			} // y
28 | 		} // x
29 | 		
30 | 		////////// todo test stones are not surrounded
31 | 	}		
32 | 	return 1;
33 | }
34 | 


--------------------------------------------------------------------------------
/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001


--------------------------------------------------------------------------------
/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index


--------------------------------------------------------------------------------
/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta


--------------------------------------------------------------------------------
/net_vs_gnugo.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | import pygame
  4 | import numpy as np
  5 | from numpy import sqrt
  6 | from pygame.locals import *
  7 | import time
  8 | import global_vars as gv
  9 | import tensorflow as tf
 10 | import architectures.tree_tf_op_multi as arch
 11 | import gnu_go_test as gt
 12 | 
 13 | ########################################################## configuration:
 14 | save_nm = 'models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy'
 15 | 
 16 | # load the following variables from the model .npy file:
 17 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA',
 18 | 	'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 
 19 | 	'N_TURNS', 'CPUCT']
 20 | 
 21 | save_d = np.load(save_nm, allow_pickle=True).item()
 22 | for key in save_vars:
 23 | 	if key == 'save_nm':
 24 | 		continue
 25 | 	exec('%s = save_d["%s"]' % (key,key))
 26 | 
 27 | ########## over-write number of simulations previously used:
 28 | N_SIM = 2000 #500
 29 | 
 30 | net = 'eval32'
 31 | #net = 'eval'
 32 | #net = 'main'
 33 | 
 34 | run_one_pass_only = True # run only the network (no tree search)
 35 | #run_one_pass_only = False # make moves from the tree search
 36 | 
 37 | if run_one_pass_only == False:
 38 | 	import py_util.py_util as pu
 39 | 
 40 | TURN_MIN = 5 # if we are near the max turns the network was trained on (N_TURNS), how much farther do we simulate?
 41 | NET_PLAYER = 0 # 0: the network plays first, 1: GNU Go plays first
 42 | 
 43 | ############## load model, init variables
 44 | DEVICE = '/gpu:0'
 45 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM,
 46 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, training=False)
 47 | 
 48 | arch.saver.restore(arch.sess, save_nm)
 49 | arch.sess.run(arch.init_state)
 50 | 
 51 | visit_count_map = np.zeros((gv.n_rows, gv.n_cols), dtype='int32')
 52 | 
 53 | def ret_d(player): # return dictionary for input into tensor flow
 54 | 	return {arch.moving_player: player}
 55 | 
 56 | def run_sim(turn, starting_player): # simulate game forward
 57 | 	t_start = time.time()
 58 | 	arch.sess.run(arch.session_backup)
 59 | 	pu.session_backup()
 60 | 
 61 | 	for sim in range(N_SIM):
 62 | 		# backup then make next move
 63 | 		# (this loop, iterates over one full game-play from present turn)
 64 | 		for turn_sim in range(turn, np.max((N_TURNS+1, turn+TURN_MIN))):
 65 | 			for player in [0,1]:
 66 | 				if turn_sim == turn and starting_player == 1 and player == 0: # skip player 0, has already moved
 67 | 					continue
 68 | 
 69 | 				# get valid moves, network policy and value estimates:
 70 | 				valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[net], arch.val[net]], feed_dict=ret_d(player))
 71 | 
 72 | 				# backup visit Q values
 73 | 				if turn_sim != turn:
 74 | 					pu.backup_visit(player, np.array(val, dtype='single'))
 75 | 
 76 | 				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
 77 | 				to_coords = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree)
 78 | 				pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree
 79 | 
 80 | 				arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars)
 81 | 		
 82 | 		# backup terminal state
 83 | 		winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single')
 84 | 		pu.backup_visit(0, winner)
 85 | 		pu.backup_visit(1, -winner)
 86 | 
 87 | 		# return move back to previous node in tree
 88 | 		arch.sess.run(arch.session_restore)
 89 | 		pu.session_restore()
 90 | 	
 91 | 		# print progress
 92 | 		if sim % 20 == 0:
 93 | 			print 'simulation: ', sim, ' (%i sec)' % (time.time() - t_start)
 94 | 
 95 | 
 96 | 
 97 | #################################
 98 | t_start = time.time()
 99 | board = np.zeros((N_TURNS, 2, gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels), dtype='float16')
100 | winner = np.zeros((N_TURNS, gv.BATCH_SZ), dtype='int')
101 | scores = np.zeros((N_TURNS, gv.BATCH_SZ), dtype='int')
102 | 
103 | arch.sess.run(arch.init_state)
104 | if run_one_pass_only == False:
105 | 	pu.init_tree()
106 | 
107 | gt.init_board(arch.sess.run(arch.gm_vars['board']))
108 | gt.move_nn(np.ones(gv.BATCH_SZ, dtype='int')*-1) # when NET_PLAYER=1, for some reason GnuGo doesn't respond unless we pass the first move
109 | 
110 | turn_start_t = time.time()
111 | for turn in range(N_TURNS):
112 | 	for player in [0,1]:
113 | 		# network's turn
114 | 		if player == NET_PLAYER:
115 | 			
116 | 			#### make most probable mv, do not use tree search
117 | 			if run_one_pass_only:
118 | 				# 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined)
119 | 				d = ret_d(player)
120 | 				imgs = arch.sess.run(arch.imgs, feed_dict=d)
121 | 				d[arch.imgs32] = np.asarray(imgs, dtype='float')
122 | 				pol = arch.sess.run(arch.pol[net], feed_dict=d)	
123 | 				d[arch.pol['eval']] = pol
124 | 				
125 | 				board[turn, player] = imgs
126 | 				
127 | 				if turn == 0: # choose in proportion to probability
128 | 					to_coords = arch.sess.run([arch.nn_prob_to_coords_valid_mvs['eval'], arch.nn_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0]
129 | 				else:
130 | 					to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs['eval'], arch.nn_max_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0]
131 | 
132 | 			##### use tree search
133 | 			else:
134 | 				run_sim(turn, player)
135 | 
136 | 				board[turn, player], valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol[net]], feed_dict = ret_d(player)) # generate batch and valid moves
137 | 
138 | 				#########
139 | 				pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
140 | 				visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited
141 | 
142 | 				if turn == 0:
143 | 					to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 
144 | 						arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts
145 | 				else:
146 | 					to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: player,
147 | 							arch.pol[net]: visit_count_map})[0]
148 | 						
149 | 			gt.move_nn(to_coords) # tell gnugo where the network moved
150 | 		
151 | 		# gnugo's turn
152 | 		else:
153 | 			# mv gnugo
154 | 			board[turn, player], valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict = ret_d(player)) # generate batch and valid moves
155 | 			
156 | 			# register valid moves in tree:
157 | 			if run_one_pass_only == False:
158 | 				pu.add_valid_mvs(player, valid_mv_map)
159 | 
160 | 			to_coords = gt.move_ai() # get move from gnu go
161 | 			
162 | 			# update gpu game state w/ move:
163 | 			arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: player, arch.nn_max_to_coords['eval']: to_coords})
164 | 		
165 | 		print turn, player
166 | 		
167 | 		# register move in tree:
168 | 		if run_one_pass_only == False:
169 | 			pu.register_mv(player, np.array(to_coords, dtype='int32'))
170 | 	
171 | 	winner[turn], scores[turn] = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER})
172 | 
173 | 	# prune tree
174 | 	if run_one_pass_only == False and turn != (N_TURNS-1):
175 | 		pu.prune_tree(0) # 0: prune all games in batch, 1: prune only first game
176 | 	
177 | 	if (turn+1) % 2 == 0:
178 | 		print 'eval finished turn %i (%i sec)' % (turn, time.time() - turn_start_t)
179 | 
180 | 
181 | ####### printing
182 | res, score = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER})
183 | if run_one_pass_only:
184 | 	match_str = 'network run-once (per turn) mode'
185 | else:
186 | 	match_str = 'using self-play w/ {} playout batches / turn', N_SIM
187 | 
188 | print 'wins', (res == 1).sum(), (res == 1).sum() / 128., 'ties', (res == 0).sum(), 'opp wins', (res == -1).sum(), match_str
189 | 
190 | 
191 | ######### save results to npy file
192 | fname = '/tmp/'
193 | if run_one_pass_only:
194 | 	fname += 'test_one_pass_vs_gnu.npy'
195 | else:
196 | 	fname += 'test_%i_N_SIM_vs_gnu.npy' % N_SIM
197 | 	print N_SIM
198 | 
199 | np.save(fname, {'run_one_pass_only': run_one_pass_only, 'N_SIM': N_SIM, 'board': board,
200 | 		'res': res, 'score': score, 'winner': winner, 'scores': scores})
201 | 
202 | 


--------------------------------------------------------------------------------
/notebooks/go_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_black.png


--------------------------------------------------------------------------------
/notebooks/go_blank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_blank.png


--------------------------------------------------------------------------------
/notebooks/go_pieces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_pieces.png


--------------------------------------------------------------------------------
/notebooks/go_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_white.png


--------------------------------------------------------------------------------
/play_network_gui.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | import pygame
  4 | import numpy as np
  5 | from numpy import sqrt
  6 | from pygame.locals import *
  7 | import time
  8 | from datetime import datetime
  9 | import global_vars as gv
 10 | import tensorflow as tf
 11 | import architectures.tree_tf_op_multi as arch
 12 | 
 13 | ########################################################## configuration:
 14 | save_nm = 'models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy'
 15 | 
 16 | net = 'eval32'
 17 | #net = 'eval'
 18 | #net = 'main'
 19 | 
 20 | run_one_pass_only = True # run only the network (no tree search)
 21 | #run_one_pass_only = False # make moves from the tree search
 22 | 
 23 | show_txt = False # don't show statistics of each move (Q and P values, visit counts) -- toggle w/ right click after network makes move
 24 | 
 25 | # load the following variables from the model .npy file:
 26 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA',
 27 | 	'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 'N_TURNS', 'CPUCT']
 28 | save_d = np.load(save_nm, allow_pickle=True).item()
 29 | for key in save_vars:
 30 | 	if key == 'save_nm':
 31 | 		continue
 32 | 	exec('%s = save_d["%s"]' % (key,key))
 33 | 
 34 | if run_one_pass_only == False:
 35 | 	import py_util.py_util as pu
 36 | 
 37 | ########## over-write number of simulations previously used:
 38 | # (stop self-play when both of these (the next two) conditions is met)
 39 | SIM_MIN = 2000
 40 | TIME_MIN = 1 # time spent running self-play exceeds this (minutes)
 41 | 
 42 | ###
 43 | TURN_MIN = 5 # if we are near the max turns the network was trained on (N_TURNS), how much farther do we simulate?
 44 | CPUCT = 1
 45 | NET_PLAYER = 0 # 0: the network plays first, 1: you play first
 46 | 
 47 | def human_player():
 48 | 	global NET_PLAYER
 49 | 	assert NET_PLAYER == 1 or NET_PLAYER == 0
 50 | 	return 1 - NET_PLAYER
 51 | 
 52 | ###############################################################################
 53 | save_screenshot_flag = True
 54 | 
 55 | img_sdir = 'go_games_imgs/'
 56 | img_sdir += datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
 57 | os.system('mkdir ' + img_sdir)
 58 | os.system("echo %s > %s/model_location.txt" % (save_nm, img_sdir))
 59 | 
 60 | ############## load model, init variables
 61 | DEVICE = '/gpu:0'
 62 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM,
 63 | 		LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, training=False)
 64 | 
 65 | arch.saver.restore(arch.sess, save_nm)
 66 | arch.sess.run(arch.init_state)
 67 | if run_one_pass_only == False:
 68 | 	pu.init_tree()
 69 | 
 70 | ##### stats to print if show_txt = True
 71 | Q_map = np.zeros((gv.n_rows, gv.n_cols), dtype='single')
 72 | Q_map_next = np.zeros_like(Q_map) # Q values for the move after the current (assuming you make the move the network predicts you will)
 73 | P_map = np.zeros_like(Q_map)
 74 | P_map_next = np.zeros_like(Q_map)
 75 | visit_count_map = np.zeros((gv.n_rows, gv.n_cols), dtype='int32')
 76 | visit_count_map_next = np.zeros_like(visit_count_map)
 77 | 
 78 | t_init = time.time()	
 79 | 
 80 | def ret_d(player): # return dictionary for input into tensor flow
 81 | 	return {arch.moving_player: player}
 82 | 
 83 | def ret_stats(player): # return Q map, P map, and visit count maps
 84 | 	pol = np.zeros((gv.BATCH_SZ, gv.map_szt), dtype='float32')
 85 | 	pol[:,0] = 1
 86 | 	Q_map, P_map, visit_count_map = pu.choose_moves(player, pol, CPUCT)[1:]
 87 | 
 88 | 	Q_map = Q_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0]
 89 | 	P_map = P_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0]
 90 | 	visit_count_map = visit_count_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0]
 91 | 
 92 | 	return Q_map, P_map, visit_count_map
 93 | 
 94 | 
 95 | # move neural network
 96 | def nn_mv():
 97 | 	global Q_map, P_map, visit_count_map, valid_mv_map, pol
 98 | 	global Q_map_next, P_map_next, visit_count_map_next, to_coords
 99 | 	
100 | 	t_start = time.time()
101 | 	arch.sess.run(arch.session_backup)
102 | 	
103 | 	#### make most probable mv, do not use tree search
104 | 	if run_one_pass_only:
105 | 		# 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined)
106 | 		d = ret_d(NET_PLAYER)
107 | 		imgs = arch.sess.run(arch.imgs, feed_dict=d)
108 | 		d[arch.imgs32] = np.asarray(imgs, dtype='float')
109 | 		pol = arch.sess.run(arch.pol[net], feed_dict=d)	
110 | 		d = ret_d(NET_PLAYER)
111 | 		d[arch.pol['eval']] = pol
112 | 
113 | 		if turn == 0:	
114 | 			arch.sess.run(arch.nn_prob_move_unit_valid_mvs['eval'], feed_dict=d)
115 | 		else:
116 | 			arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs['eval'], feed_dict=d)
117 | 
118 | 		#Q_map, P_map, visit_count_map = ret_stats(0)
119 | 	
120 | 	##### use tree search
121 | 	else:
122 | 		#pu.init_tree()
123 | 		pu.session_backup()
124 | 
125 | 		sim = 0
126 | 		# each loop is one simulation
127 | 		while True:
128 | 			if ((time.time() - t_start) > TIME_MIN) and (sim >= SIM_MIN):
129 | 				break
130 | 			
131 | 			# backup then make next move
132 | 			# (this loop, iterates over one full game-play from present turn)
133 | 			for turn_sim in range(turn, np.max((N_TURNS+1, turn+TURN_MIN))):
134 | 				for player in [0,1]:
135 | 					if turn_sim == turn and human_player() == 0 and player == 0: # skip player 0 (human), has already moved
136 | 						continue
137 | 
138 | 					# get valid moves, network policy and value estimates:
139 | 					valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[net], arch.val[net]], feed_dict=ret_d(player))
140 | 
141 | 					# backup visit Q values
142 | 					if turn_sim != turn:
143 | 						pu.backup_visit(player, np.array(val, dtype='single'))
144 | 
145 | 					pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree
146 | 					to_coords = pu.choose_moves(player, np.array(pol, dtype='float32'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree)
147 | 					
148 | 					pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree
149 | 					arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars)
150 | 
151 | 			# backup terminal state
152 | 			winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single')
153 | 			pu.backup_visit(0, winner)
154 | 			pu.backup_visit(1, -winner)
155 | 			
156 | 			# return move to previous node in tree
157 | 			arch.sess.run(arch.session_restore) # reset gpu game state
158 | 			pu.session_restore() # reset cpu tree state
159 | 			
160 | 			######################
161 | 			# print stats from tree
162 | 			if sim % 20 == 0:
163 | 				# get valid moves, network policy and value estimates:
164 | 				valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d(NET_PLAYER))[1]
165 | 				pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # register valid moves in tree
166 | 				
167 | 				visit_count_map_128 = pu.choose_moves(NET_PLAYER, np.array(pol, dtype='float32'), CPUCT)[-1] # to feed back into tf (entries for all 128 games, not just 1)
168 | 				Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER) # stats we will show on screen
169 | 				
170 | 				# move network where it is estimates is its best move
171 | 				to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: NET_PLAYER,
172 | 						arch.pol[net]: visit_count_map_128})[0]
173 | 
174 | 				pu.register_mv(NET_PLAYER, np.asarray(to_coords, dtype='int32')) # register move in tree
175 | 				arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: NET_PLAYER, arch.to_coords_input: to_coords}) # move network (update GPU vars)
176 | 
177 | 				# get network tree estimates as to where it thinks you will move after it moves
178 | 				valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d(human_player()))[1]
179 | 				pu.add_valid_mvs(human_player(), valid_mv_map) # register valid moves in tree
180 | 
181 | 				Q_map_next, P_map_next, visit_count_map_next = ret_stats(human_player())
182 | 
183 | 				arch.sess.run(arch.session_restore) # restore prior tf game state
184 | 				pu.session_restore() # restore prior tree
185 | 
186 | 				draw(True)
187 | 				pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start))
188 | 				
189 | 				print 'simulation: ', sim, ' (%i sec)' % (time.time() - t_start)
190 | 			
191 | 			sim += 1
192 | 
193 | 		### make move
194 | 		
195 | 		# first get valid moves and current policy at board position
196 | 		valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol[net]], feed_dict = ret_d(NET_PLAYER))[1:]
197 | 		pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # set in tree
198 | 		
199 | 		visit_count_map_128 = pu.choose_moves(NET_PLAYER, np.array(pol, dtype='float32'), CPUCT)[-1] # to feed back into tf (entries for all 128 games, not just 1)
200 | 		Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER)
201 | 
202 | 		# makes moves as if this were still part of the self-play (max visit count)
203 | 		#to_coords = arch.sess.run([arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={arch.moving_player: 0, 
204 | 		#				arch.visit_count_map: visit_count_map})[0]
205 | 		
206 | 		# move to max visited node:
207 | 		#if turn != 0:
208 | 		to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: NET_PLAYER,
209 | 						arch.pol[net]: visit_count_map_128})[0]
210 | 		
211 | 		# randomly move proportionatly to vist counts
212 | 		#else:
213 | 		#	to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: 0, 
214 | 		#			arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts
215 | 
216 | 		pu.register_mv(NET_PLAYER, np.array(to_coords, dtype='int32'))
217 | 		
218 | 		print 'pruning...'
219 | 		pu.prune_tree(1) # 0: prune all games in batch, 1: prune only first game
220 | 		print time.time() - t_start
221 | 
222 | 	print 'finished'
223 | 	return arch.sess.run(arch.gm_vars['board'])[0]
224 | 
225 | def save_screenshot(player):
226 | 	if save_screenshot_flag == False:
227 | 		return
228 | 	
229 | 	fname = "%s/%i_%i_%s_net_%s_one_pass_%i_ai_player_%i" % (img_sdir, t_init, turn, player, net, run_one_pass_only, NET_PLAYER)
230 | 	if run_one_pass_only == False:
231 | 		fname += '_%isims' % SIM_MIN
232 | 	
233 | 	pygame.image.save(windowSurface, fname + '.png')
234 | 
235 | ##################### display
236 | psz = 50 # size to display pieces
237 | pszh = psz/2.
238 | n_txt_rows = 4
239 | window_sz = (psz*gv.n_rows, psz*gv.n_cols)
240 | 
241 | BLACK = (0,)*3
242 | LINE_WIDTH = 2
243 | turn = 0
244 | 
245 | windowSurface = pygame.display.set_mode(window_sz, 0, 32)
246 | pygame.display.set_caption('Go GUI')
247 | 
248 | pygame.init()
249 | basicFont = pygame.font.SysFont(None, 15) # < font size
250 | 
251 | whitep = pygame.image.load('notebooks/go_white.png')
252 | blackp = pygame.image.load('notebooks/go_black.png')
253 | blank = pygame.image.load('notebooks/go_blank.png')
254 | 
255 | whitep = pygame.transform.scale(whitep, (psz, psz))
256 | blackp = pygame.transform.scale(blackp, (psz, psz))
257 | blank = pygame.transform.scale(blank, window_sz)
258 | 
259 | 
260 | centers = np.arange(gv.n_rows)*psz + pszh
261 | to_coords_manual = -np.ones(gv.BATCH_SZ, dtype='int32')
262 | 
263 | board = np.zeros((gv.n_rows, gv.n_cols), dtype='int8')
264 | 
265 | # draw text over partially transparent background
266 | # tcoord is the coordinate, tsz is the size, bgc is the color
267 | def draw_txt(txt, tcoord, tsz, bgc):
268 | 	txtBgSurface = pygame.Surface(tsz)
269 | 	txtBgSurface.set_alpha(128)
270 | 	txtBgSurface.fill(bgc)
271 | 	windowSurface.blit(txtBgSurface, tcoord)
272 | 	
273 | 	fc = [255,255,255]
274 | 	text = basicFont.render(txt, True, fc)
275 | 	windowSurface.blit(text, tcoord)
276 | 
277 | 
278 | 
279 | # draw board and optionally text
280 | def draw(update=False):
281 | 	windowSurface.blit(blank, (0,0))
282 | 
283 | 	# draw lines
284 | 	for i in range(gv.n_rows):
285 | 		pygame.draw.line(windowSurface, BLACK, (0, i*psz + pszh), (window_sz[0], i*psz + pszh), LINE_WIDTH)
286 | 		pygame.draw.line(windowSurface, BLACK, (i*psz + pszh, 0), (i*psz + pszh, window_sz[1]), LINE_WIDTH)
287 | 	
288 | 	# loop over all positions on game board
289 | 	for i in range(gv.n_rows):
290 | 		for j in range(gv.n_cols):
291 | 			coord = np.asarray((i*psz, j*psz))
292 | 			# show pieces
293 | 			if board[i,j] == 1:
294 | 				windowSurface.blit(blackp, coord)
295 | 			elif board[i,j] == -1:
296 | 				windowSurface.blit(whitep, coord)
297 | 			
298 | 			##############
299 | 			# print tree statistics (for the network's own movement)
300 | 			if P_map[i,j] != 0 and show_txt:
301 | 				visit_total = visit_count_map.sum()
302 | 				rc = np.int(np.min((255, 3*255.*visit_count_map.reshape(gv.map_sz)[i,j] / np.single(visit_total))))
303 | 				bgc = [rc, 0, 0]
304 | 				
305 | 				# Show Q and P at each location on map
306 | 				txt = '%1.2f %1.2f' % (Q_map.reshape(gv.map_sz)[i,j], P_map.reshape(gv.map_sz)[i,j])
307 | 				tsz = np.asarray(basicFont.size(txt), dtype='single')
308 | 				tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2])
309 | 				draw_txt(txt, tcoord, tsz, bgc)
310 | 				tsz1 = copy.deepcopy(tsz)
311 | 				
312 | 				# Show Q + P, and visit_count_map
313 | 				txt = '%1.2f %i' % (Q_map.reshape(gv.map_sz)[i,j]+P_map.reshape(gv.map_sz)[i,j], visit_count_map.reshape(gv.map_sz)[i,j])
314 | 				tsz = np.asarray(basicFont.size(txt), dtype='single')
315 | 				tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2])
316 | 				tcoord[1] += tsz1[1]
317 | 				draw_txt(txt, tcoord, tsz, bgc)
318 | 				tsz2 = copy.deepcopy(tsz)
319 | 			else:
320 | 				tsz1 = tsz2 = [0,0]
321 | 			
322 | 			###############
323 | 			# print tree statistics (where the network estimates *you* will play)
324 | 			if P_map_next[i,j] and show_txt:
325 | 				visit_total = visit_count_map_next.sum()
326 | 				rc = np.int(np.min((255, 3*255.*visit_count_map_next.reshape(gv.map_sz)[i,j] / np.single(visit_total))))
327 | 				bgc = [0, rc, 0]
328 | 				fc = [255,255,255]
329 | 				
330 | 				# Show Q and P at each location on map 
331 | 				txt = '%1.2f %1.2f' % (Q_map_next.reshape(gv.map_sz)[i,j], P_map_next.reshape(gv.map_sz)[i,j])
332 | 				tsz = np.asarray(basicFont.size(txt), dtype='single')
333 | 				tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2])
334 | 				tcoord[1] += tsz1[1] + tsz2[1]
335 | 				draw_txt(txt, tcoord, tsz, bgc)
336 | 				tsz3 = copy.deepcopy(tsz)
337 | 
338 | 				# Show Q + P, and visit_count_map
339 | 				txt = '%1.2f %i' % (Q_map_next.reshape(gv.map_sz)[i,j]+P_map_next.reshape(gv.map_sz)[i,j], visit_count_map_next.reshape(gv.map_sz)[i,j])
340 | 				tsz = np.asarray(basicFont.size(txt), dtype='single')
341 | 				tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2])
342 | 				tcoord[1] += tsz1[1] + tsz2[1] + tsz3[1]
343 | 				draw_txt(txt, tcoord, tsz, bgc)
344 | 
345 | 
346 | 	if update:
347 | 		pygame.display.update()
348 | 
349 | draw(update=True)
350 | 
351 | if NET_PLAYER == 0: # network makes first move
352 | 	board = nn_mv()
353 | 	draw(update=True)
354 | 	save_screenshot('b')
355 | 
356 | #pygame.mixer.music.load('/home/tapa/gtr-nylon22.mp3')
357 | 
358 | while True:
359 | 	event = pygame.event.wait()
360 | 	
361 | 	# move player, then move network
362 | 	if event.type == MOUSEBUTTONUP:
363 | 		
364 | 		# if right button pressed, toggle showing tree stats
365 | 		if event.button == 3:
366 | 			show_txt = not show_txt
367 | 			draw(update=True)
368 | 			continue
369 | 		
370 | 		# get player move from cursor
371 | 		mouse_pos = np.asarray(event.pos)
372 | 		x = np.argmin((mouse_pos[0] - centers)**2)
373 | 		y = np.argmin((mouse_pos[1] - centers)**2)
374 | 	
375 | 		to_coords_manual[0] = x*gv.n_cols + y
376 | 	
377 | 		board_prev = arch.sess.run(arch.gm_vars['board'])[0]
378 | 
379 | 		imgs, valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict={arch.moving_player: human_player()})
380 | 		
381 | 		# make move for player
382 | 		arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: human_player(), arch.nn_max_to_coords['eval']: to_coords_manual})
383 | 
384 | 		# valid?
385 | 		board = arch.sess.run(arch.gm_vars['board'])[0]
386 | 		if board_prev.sum() == board.sum(): # invalid move
387 | 			print 'invalid mv'
388 | 			continue
389 | 		
390 | 		# register in tree if not in one-pass-only mode
391 | 		if run_one_pass_only == False:
392 | 			pu.add_valid_mvs(human_player(), valid_mv_map) # register valid moves in tree
393 | 			pu.register_mv(human_player(), to_coords_manual)
394 | 
395 | 		win_tmp, score_tmp = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: human_player()})
396 | 		print 'you: turn %i, winner %i, score %i' % (turn, win_tmp[0], score_tmp[0])
397 | 
398 | 		draw(update=True)
399 | 		save_screenshot('w')
400 | 		
401 | 		# network makes move
402 | 		board = nn_mv()
403 | 		draw(update=True)
404 | 		turn += 1
405 | 		save_screenshot('b')
406 | 		
407 | 		if run_one_pass_only == False:
408 | 			pygame.mixer.music.play()
409 | 	
410 | 		win_tmp, score_tmp = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER})
411 | 		print 'network: turn %i, winner %i, score %i' % (turn, win_tmp[0], score_tmp[0])
412 | 
413 | 	if event.type == QUIT:
414 | 		pygame.display.quit()
415 | 		break
416 | 
417 | 


--------------------------------------------------------------------------------
/py_util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/py_util/__init__.py


--------------------------------------------------------------------------------
/py_util/_py_util.c:
--------------------------------------------------------------------------------
 1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 2 | #include "includes.h"
 3 | 
 4 | #include "rotate_reflect_imgs.c"
 5 | #include "init_tree.c"
 6 | #include "add_valid_mvs.c"
 7 | #include "register_mv.c"
 8 | #include "backup_visit.c"
 9 | #include "prune_tree.c"
10 | #include "choose_moves.c"
11 | #include "session_backup.c"
12 | #include "return_tree.c"
13 | 
14 | static PyMethodDef py_util[] = {
15 | 	{"rotate_reflect_imgs", rotate_reflect_imgs, METH_VARARGS},
16 | 	{"init_tree", init_tree, METH_VARARGS},
17 | 	{"add_valid_mvs", add_valid_mvs, METH_VARARGS},
18 | 	{"register_mv", register_mv, METH_VARARGS},
19 | 	{"backup_visit", backup_visit, METH_VARARGS},
20 | 	{"prune_tree", prune_tree, METH_VARARGS},
21 | 	{"choose_moves", choose_moves, METH_VARARGS},
22 | 	{"session_backup", session_backup, METH_VARARGS},
23 | 	{"session_restore", session_restore, METH_VARARGS},
24 | 	{"return_tree", return_tree, METH_VARARGS},
25 | 
26 | 	{NULL, NULL}
27 | };
28 | 
29 | #if defined(_WIN32) || defined(_WIN64)
30 | extern "C" void _declspec(dllexport) init_py_util(){
31 | #else
32 | extern void init_py_util(){
33 | #endif
34 | 	srand(time(NULL));
35 | 
36 | 	(void) Py_InitModule("_py_util", py_util);
37 | 	import_array();
38 | 	
39 | }
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/py_util/add_valid_mvs.c:
--------------------------------------------------------------------------------
 1 | static PyObject *add_valid_mvs(PyObject *self, PyObject *args){
 2 | 	PyArrayObject *valid_mv_map_np;
 3 | 	int moving_player;
 4 | 	char * valid_mv_map;
 5 | 
 6 | 	if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &valid_mv_map_np)) return NULL;
 7 | 	
 8 | 	/////////////////////// check inputs
 9 | 	ASSERT(moving_player == 0 || moving_player == 1, "moving player incorrect")
10 | 	ASSERT(valid_mv_map_np != NULL, "absent inputs")
11 | 	ASSERT(PyArray_TYPE(valid_mv_map_np) == NPY_INT8, "data type incorrect")
12 | 	ASSERT(PyArray_NDIM(valid_mv_map_np) == 3, "dims incorrect")
13 | 	ASSERT(PyArray_STRIDE(valid_mv_map_np, 2) == sizeof(valid_mv_map[0]), "data not contigious or C-order")
14 | 
15 | 	npy_intp * dims_in = PyArray_DIMS(valid_mv_map_np);
16 | 
17 | 	ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect")
18 | 	ASSERT(dims_in[1] == MAP_SZ_X, "map sz incorrect")
19 | 	ASSERT(dims_in[2] == MAP_SZ_Y, "map sz incorrect")
20 | 
21 | 	valid_mv_map = (char *) PyArray_DATA(valid_mv_map_np);
22 | 
23 | 	////////////////////////////
24 | 	for(int gm = 0; gm < BATCH_SZ; gm++){ 
25 | 		int TO;
26 | 		int game_offset = gm*MAP_SZ;
27 | 	
28 | 		#ifdef CUDA_DEBUG
29 | 			if(tree_sz[gm] >= TREE_BUFFER_SZ)
30 | 				printf("tree_sz[%i] = %i tree_start %i\n", gm, tree_sz[gm], tree_start[gm]);
31 | 			if(tree_start[gm] < 0 || tree_start[gm] >= tree_sz[gm])
32 | 				printf("tree_sz[%i] = %i tree_start %i\n", gm, tree_sz[gm], tree_start[gm]);
33 | 		#endif
34 | 
35 | 		int t_ind = tree_start[gm]; TO_FRM_T_IND
36 | 
37 | 		// already created valid moves leaves:
38 | 		if(tree_list_start[TO] != -1){
39 | 			DASSERT(tree_player[TO] == moving_player);
40 | 			#ifdef CUDA_DEBUG
41 | 				int n_valid_mvs_chk = 1;
42 | 				for(int map_coord = 0; map_coord < MAP_SZ; map_coord++){
43 | 					int gcoord = game_offset + map_coord;
44 | 					if(valid_mv_map[gcoord]) n_valid_mvs_chk ++;
45 | 				}
46 | 				if(n_valid_mvs_chk != tree_list_sz[TO]){
47 | 					printf("skipping %i moving_player %i n_valid_mvs_chk %i tree_list_sz %i\n", gm, moving_player,
48 | 						n_valid_mvs_chk, tree_list_sz[TO]);
49 | 					DASSERT(0)
50 | 				}
51 | 			#endif
52 | 			continue;
53 | 		}
54 | 		
55 | 		tree_player[TO] = moving_player;
56 | 		tree_list_start[TO] = list_sz[gm];
57 | 		tree_list_sz[TO] = 0;
58 | 
59 | 		DASSERT(list_sz[gm] < MV_BUFFER_SZ);
60 | 		
61 | 		#define LOE (gm*MV_BUFFER_SZ + list_sz[gm])
62 | 		#define ADD_MV(COORD) { list_valid_mv_inds[LOE] = COORD;\
63 | 				list_valid_tree_inds[LOE] = -1;\
64 | 				list_q_total[LOE] = 0;\
65 | 				list_visit_count[LOE] = 0;\
66 | 				list_prob[LOE] = -1;\
67 | 				tree_list_sz[TO] ++;\
68 | 				list_sz[gm] ++;\
69 | 				assert(list_sz[gm] < MV_BUFFER_SZ); }
70 | 		
71 | 		ADD_MV(-1) // pass move entry
72 | 		
73 | 		for(int map_coord = 0; map_coord < MAP_SZ; map_coord++){
74 | 			int gcoord = game_offset + map_coord;
75 | 			if(!valid_mv_map[gcoord]) continue;
76 | 
77 | 			ADD_MV(map_coord)
78 | 		} // map loop
79 | 	} // gm
80 | 
81 | 	Py_RETURN_NONE;
82 | }
83 | 
84 | 


--------------------------------------------------------------------------------
/py_util/backup_visit.c:
--------------------------------------------------------------------------------
 1 | static PyObject *backup_visit(PyObject *self, PyObject *args){
 2 | 	PyArrayObject * q_np;
 3 | 	float * q;
 4 | 	int moving_player;
 5 | 
 6 | 	if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &q_np)) return NULL;
 7 | 
 8 | 	/////////////////// check inputs
 9 | 	ASSERT(q_np != NULL, "absent inputs")
10 | 	ASSERT(PyArray_TYPE(q_np) == NPY_FLOAT32, "data type incorrect")
11 | 	ASSERT(PyArray_NDIM(q_np) == 1, "dims must be 1")
12 | 	ASSERT(PyArray_STRIDE(q_np, 0) == sizeof(q[0]), "data not contigious or C-order")
13 | 	ASSERT(moving_player == 0 || moving_player == 1, "moving_player incorrect")
14 | 
15 | 	npy_intp * dims_in = PyArray_DIMS(q_np);
16 | 
17 | 	ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect")
18 | 
19 | 	q = (float *) PyArray_DATA(q_np);
20 | 
21 | 	/////////////////////////
22 | 
23 | 	for(int gm = 0; gm < BATCH_SZ; gm++){
24 | 				
25 | 		// tree ind
26 | 		int TO, LO;
27 | 		int t_ind = tree_start[gm]; TO_FRM_T_IND
28 | 
29 | 		while(1){
30 | 			int t_ind_prev = t_ind;
31 | 			if(tree_parent[TO] == -1) // tree root
32 | 				break;
33 | 
34 | 			// inds
35 | 			t_ind = tree_parent[TO]; TO_FRM_T_IND
36 | 			int l_ind = tree_list_start[TO]; LO_FRM_L_IND
37 | 			int n_valid_mvs = tree_list_sz[TO]; CHK_N_VALID_MVS
38 | 
39 | 			// find list index for previous tree ind
40 | 			char found = 0;
41 | 			int LOC;
42 | 			for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){
43 | 				LOC = LO + mv_ind;
44 | 				if(list_valid_tree_inds[LOC] != t_ind_prev) continue;
45 | 				
46 | 				found = 1;
47 | 				break;
48 | 			}
49 | 			assert(found != 0);
50 | 
51 | 			if(tree_player[TO] == moving_player){
52 | 				list_visit_count[LOC] ++;
53 | 				DASSERT((powf(2, 8*sizeof(list_visit_count[0])) - 3) > (float)list_visit_count[LOC]) // overflow check
54 | 				list_q_total[LOC] += q[gm];
55 | 			}
56 | 		
57 | 		}
58 | 	} // gm
59 | 
60 | 	Py_RETURN_NONE;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/py_util/build.sh:
--------------------------------------------------------------------------------
1 | gcc _py_util.c -fPIC -O3 -I/usr/include/python2.7 -I/usr/include/numpy -lpython2.7 -shared -o _py_util.so -Wall
2 | 
3 | 


--------------------------------------------------------------------------------
/py_util/build_centos.sh:
--------------------------------------------------------------------------------
1 | gcc _py_util.c -fPIC -O3 -I/usr/include/python2.7 -I/usr/include/numpy -I/usr/lib64/python2.7/site-packages/numpy/core/include/numpy -lpython2.7 -shared -o _py_util.so -Wall
2 | 
3 | 


--------------------------------------------------------------------------------
/py_util/choose_moves.c:
--------------------------------------------------------------------------------
  1 | // choose maps based on tree search
  2 | 
  3 | /*	.Input("moving_player: int32") // [1]
  4 | 	.Input("pol: float") // map, network's estimted probs
  5 | 	.Input("CPUCT: float") // [1]
  6 | 
  7 | 	.Output("to_coords: int32") // [BATCH_SZ]
  8 | 	.Output("Q_map: float") // map
  9 | 	.Output("P_map: float") // map
 10 | 	.Output("visit_count_map: float") // map
 11 | */
 12 | static PyObject *choose_moves(PyObject *self, PyObject *args){
 13 | 	PyArrayObject *pol_np;
 14 | 	float * pol, CPUCT;
 15 | 	int moving_player;
 16 | 
 17 | 	if(!PyArg_ParseTuple(args, "iO!f", &moving_player, &PyArray_Type, &pol_np, &CPUCT)) return NULL;
 18 | 	
 19 | 	/////////////////////// check inputs
 20 | 	ASSERT(pol_np != NULL, "absent inputs")
 21 | 	ASSERT(PyArray_TYPE(pol_np) == NPY_FLOAT32, "data type incorrect")
 22 | 	ASSERT(PyArray_NDIM(pol_np) == 2, "dims must be 2")
 23 | 	ASSERT(PyArray_STRIDE(pol_np, 1) == sizeof(pol[0]), "data not contigious or C-order")
 24 | 
 25 | 	npy_intp * dims_in = PyArray_DIMS(pol_np);
 26 | 
 27 | 	ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect")
 28 | 	ASSERT(dims_in[1] == (MAP_SZ_X*MAP_SZ_Y), "map sz incorrect")
 29 | 
 30 | 	pol = (float*) PyArray_DATA((PyArrayObject*) pol_np);
 31 | 
 32 | 	///// output
 33 | 	npy_intp dims[4];
 34 | 	dims[0] = BATCH_SZ;
 35 | 	dims[1] = MAP_SZ_X;
 36 | 	dims[2] = MAP_SZ_Y;
 37 | 
 38 | 	PyObject * to_coords_np = PyArray_SimpleNew(1, dims, NPY_INT32);
 39 | 	PyObject * Q_map_np = PyArray_SimpleNew(3, dims, NPY_FLOAT32);
 40 | 	PyObject * P_map_np = PyArray_SimpleNew(3, dims, NPY_FLOAT32);
 41 | 
 42 | 	dims[1] = MAP_SZ_X*MAP_SZ_Y;
 43 | 	PyObject * visit_count_map_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32);
 44 | 
 45 | 	int * to_coords = (int *) PyArray_DATA((PyArrayObject*) to_coords_np);
 46 | 	float * Q_map = (float *) PyArray_DATA((PyArrayObject*) Q_map_np);
 47 | 	float * P_map = (float *) PyArray_DATA((PyArrayObject*) P_map_np);
 48 | 	float * visit_count_map = (float *) PyArray_DATA((PyArrayObject*) visit_count_map_np);
 49 | 
 50 | 	//////////////////////////////////////
 51 | 	for(int gm = 0; gm < BATCH_SZ; gm++){
 52 | 		
 53 | 		////// init
 54 | 		MAP_LOOP{
 55 | 			int MO = gm*MAP_SZ + loc;
 56 | 			P_map[MO] = 0;
 57 | 			Q_map[MO] = 0;
 58 | 			visit_count_map[MO] = 0;
 59 | 		}
 60 | 		
 61 | 		CUR_TREE_INDS
 62 | 
 63 | 		// pass move only valid move
 64 | 		if(n_valid_mvs == 1){
 65 | 			to_coords[gm] = -1;
 66 | 			continue;
 67 | 		}
 68 | 
 69 | 		#define LOC_AND_MO int LOC = LO + mv_ind;\
 70 | 				int map_loc = list_valid_mv_inds[LOC];\
 71 | 				DASSERT(map_loc >= 0 && map_loc < MAP_SZ);\
 72 | 				int MO = gm*MAP_SZ + map_loc;
 73 | 
 74 | 		/////////// sum all valid probs
 75 | 		float prob_sum = 0;
 76 | 		for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
 77 | 			LOC_AND_MO
 78 | 			prob_sum += pol[MO];
 79 | 		}
 80 | 
 81 | 		//////////// set prob value, compute tmp sums of Q & P
 82 | 		int visit_sum = 0; // across mvs
 83 | 
 84 | 		for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
 85 | 			LOC_AND_MO
 86 | 			
 87 | 			// init move prob
 88 | 			if(list_prob[LOC] == -1)
 89 | 				list_prob[LOC] = pol[MO] / prob_sum;
 90 | 
 91 | 			int visit_count_tmp = list_visit_count[LOC];
 92 | 			if(visit_count_tmp == 0) visit_count_tmp = 1;
 93 | 
 94 | 			// set maps
 95 | 			Q_map[MO] = list_q_total[LOC] / visit_count_tmp;
 96 | 			P_map[MO] = (CPUCT * list_prob[LOC]) / (1. + list_visit_count[LOC]);
 97 | 
 98 | 			visit_sum += list_visit_count[LOC];
 99 | 
100 | 			visit_count_map[MO] = list_visit_count[LOC];
101 | 		}
102 | 
103 | 		// compute U for each action, select max action
104 | 		float U_max = 0;
105 | 		int mv_ind_max = -1;
106 | 		float visit_sum_sqrt = sqrtf(visit_sum);
107 | 		for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move
108 | 			LOC_AND_MO
109 | 			P_map[MO] *= visit_sum_sqrt;
110 | 
111 | 			float U_tmp = Q_map[MO] + P_map[MO];
112 | 			if((U_max < U_tmp) || (mv_ind_max == -1)){
113 | 				mv_ind_max = mv_ind;
114 | 				U_max = U_tmp;
115 | 			}
116 | 		}
117 | 		
118 | 		// set to_coords
119 | 		int LOC = LO + mv_ind_max;
120 | 		int map_loc = list_valid_mv_inds[LOC];
121 | 		DASSERT(map_loc >= 0 && map_loc < MAP_SZ);
122 | 		to_coords[gm] = map_loc;
123 | 
124 | 	} // gm
125 | 
126 | 	/////////// return
127 | 	PyObject * ret = PyList_New(4);
128 | 	ASSERT(ret != 0, "err creating output list")
129 | 
130 | 	ASSERT(PyList_SetItem(ret, 0, to_coords_np) == 0, "failed setting item");
131 | 	ASSERT(PyList_SetItem(ret, 1, Q_map_np) == 0, "failed setting item");
132 | 	ASSERT(PyList_SetItem(ret, 2, P_map_np) == 0, "failed setting item");
133 | 	ASSERT(PyList_SetItem(ret, 3, visit_count_map_np) == 0, "failed setting item");
134 | 
135 | 	return ret;
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/py_util/includes.h:
--------------------------------------------------------------------------------
 1 | #include "Python.h"
 2 | #include "arrayobject.h"
 3 | #include <time.h>
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | 
 9 | #include "../includes.h"
10 | 
11 | #ifdef CUDA_DEBUG
12 | 	#define DASSERT(A) ASSERT(A, "assertion error")
13 | #else
14 | 	#define DASSERT(A) 
15 | #endif
16 | 
17 | #define TREE_BUFFER_SZ 500000 //  250000 //190000
18 | //#define MV_BUFFER_SZ 7000000
19 | #define MV_BUFFER_SZ 8000000
20 | 
21 | //5000000 //4500000 //4250000 //4000000 // 3760000
22 | 
23 | 
24 | //#define TREE_BUFFER_SZ 19000//0 //70000//(1 200 000)//(800000)//*600000*2)
25 | //#define MV_BUFFER_SZ 276000//0 //2 000 000 //1200000 //900000 //TREE_BUFFER_SZ
26 | 
27 | #define BMEM(A, B, SZ) memcpy(A, B, SZ*sizeof(A[0]));
28 | #define BMEM2(A, B, SZ) memcpy(A, B, SZ*sizeof(B[0]));
29 | 
30 | //////////////////// tree
31 | // create_batch: creates leaves (ex. list_valid_mv_inds)
32 | // choose_moves: sets list_prob
33 | // backup_visit: sets list_q_total
34 | // move_unit: increments visit count, creates new tree node, sets tree_parent
35 | 
36 | ////// node information:
37 | unsigned tree_sz[BATCH_SZ];
38 | unsigned tree_start[BATCH_SZ], tree_start2[BATCH_SZ]; // tree_sz2: for session backup/restoration
39 | 
40 | #define B_TREE_SZ (BATCH_SZ * TREE_BUFFER_SZ)
41 | char tree_player[B_TREE_SZ], tree_player_back[TREE_BUFFER_SZ];
42 | int tree_parent[B_TREE_SZ], tree_parent_back[TREE_BUFFER_SZ];
43 | 
44 | // start index for list_valid_mv_inds, list_valid_tree_inds:
45 | int tree_list_sz[B_TREE_SZ], tree_list_sz_back[TREE_BUFFER_SZ];
46 | int tree_list_start[B_TREE_SZ], tree_list_start_back[TREE_BUFFER_SZ];
47 | 
48 | ////// lists (leaf information)
49 | #define B_MV_SZ (BATCH_SZ * MV_BUFFER_SZ)
50 | unsigned list_sz[BATCH_SZ];
51 | short list_valid_mv_inds[B_MV_SZ], list_valid_mv_inds_back[MV_BUFFER_SZ]; // (first entry is always the pass mv)
52 | int list_valid_tree_inds[B_MV_SZ], list_valid_tree_inds_back[MV_BUFFER_SZ];
53 | float list_q_total[B_MV_SZ], list_q_total_back[MV_BUFFER_SZ];
54 | float list_prob[B_MV_SZ], list_prob_back[MV_BUFFER_SZ];
55 | unsigned list_visit_count[B_MV_SZ], list_visit_count_back[MV_BUFFER_SZ];
56 | 
57 | // used in prune tree:
58 | unsigned tree_cp_old_stack[TREE_BUFFER_SZ], tree_cp_new_stack[TREE_BUFFER_SZ]; // prune_tree, tree inds to cp
59 | 
60 | ////////////////////////////////////////
61 | #define CHK_T_IND DASSERT(tree_sz[gm] < TREE_BUFFER_SZ);\
62 | 	  	DASSERT(t_ind >= 0 && t_ind < tree_sz[gm]);
63 | 
64 | #define CHK_L_IND DASSERT(list_sz[gm] < MV_BUFFER_SZ);\
65 |  		DASSERT(l_ind >= 0)\
66 | 		DASSERT(l_ind < list_sz[gm])
67 | 
68 | 
69 | #define CHK_N_VALID_MVS DASSERT(n_valid_mvs > 0 && n_valid_mvs <= (MAP_SZ+1));\
70 | 			DASSERT( (n_valid_mvs + tree_list_start[TO]) <= list_sz[gm]);
71 | 
72 | #define TO_FRM_T_IND CHK_T_IND; TO = gm*TREE_BUFFER_SZ + t_ind;
73 | #define LO_FRM_L_IND CHK_L_IND; LO = gm*MV_BUFFER_SZ + l_ind;
74 | 
75 | #define CUR_TREE_INDS_WO_MV_CHK int TO, LO;\
76 | 	int t_ind = tree_start[gm]; TO_FRM_T_IND\
77 | 	int l_ind = tree_list_start[TO]; LO_FRM_L_IND\
78 | 	int n_valid_mvs = tree_list_sz[TO]; 
79 | 
80 | #define CUR_TREE_INDS CUR_TREE_INDS_WO_MV_CHK \
81 | 		      CHK_N_VALID_MVS
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/py_util/init_tree.c:
--------------------------------------------------------------------------------
 1 | #define ZERO(A, S) memset(A, 0, (S)*sizeof(A[0]));
 2 | 
 3 | /*void init_vecs(){
 4 | 	
 5 | 	ZERO(tree_player, BATCH_SZ * TREE_BUFFER_SZ)
 6 | 	ZERO(list_q_total, BATCH_SZ * MV_BUFFER_SZ)
 7 | 	ZERO(list_visit_count, BATCH_SZ * MV_BUFFER_SZ)
 8 | 
 9 | 		
10 | 
11 | 	for(int i = 0; i < (BATCH_SZ*TREE_BUFFER_SZ); i++){
12 | 		tree_parent[i] = -1;
13 | 		tree_list_start[i] = -1;
14 | 		tree_list_sz[i] = -1;
15 | 	}
16 | 
17 | 	for(int i = 0; i < (BATCH_SZ*MV_BUFFER_SZ); i++){
18 | 		list_valid_mv_inds[i] = -1;
19 | 		list_valid_tree_inds[i] = -1;
20 | 		list_prob[i] = -1;
21 | 	}
22 | }*/
23 | 
24 | static PyObject *init_tree(PyObject *self, PyObject *args){
25 | 	ZERO(tree_start, BATCH_SZ)
26 | 	ZERO(list_sz, BATCH_SZ)
27 | 
28 | 	for(int i = 0; i < BATCH_SZ; i++){
29 | 		tree_sz[i] = 1;
30 | 		tree_list_start[i*TREE_BUFFER_SZ] = -1;
31 | 		tree_parent[i*TREE_BUFFER_SZ] = -1;
32 | 	}
33 | 
34 | 	//init_vecs();	
35 | 
36 | 	Py_RETURN_NONE;
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/py_util/prune_tree.c:
--------------------------------------------------------------------------------
  1 | #define ADD_NODE_TO_STACK(NODE) \
  2 | 	tree_cp_old_stack[stack_sz] = NODE;\
  3 | 	tree_cp_new_stack[stack_sz] = tree_sz_back;\
  4 | 	\
  5 | 	stack_sz ++;\
  6 | 	tree_sz_back ++;\
  7 | 	DASSERT(stack_sz < TREE_BUFFER_SZ)\
  8 | 	DASSERT(tree_sz_back < TREE_BUFFER_SZ)\
  9 | 	DASSERT(tree_sz_back <= tree_sz[gm])
 10 | 
 11 | #define CHK_PREV_TREE_IND(IND, SZ) DASSERT((int)IND >= 0 && IND < SZ) 
 12 | 
 13 | static PyObject *prune_tree(PyObject *self, PyObject *args){
 14 | 	int single_game;
 15 | 
 16 | 	if(!PyArg_ParseTuple(args, "i", &single_game)) return NULL;
 17 | 
 18 | 	// only prune first game, reset everything else
 19 | 	int games_loop = BATCH_SZ;
 20 | 	if(single_game == 1){
 21 | 		games_loop = 1;
 22 | 		
 23 | 		memset(&tree_start[1], 0, sizeof(tree_start[0])*(BATCH_SZ-1));
 24 | 		memset(&list_sz[1], 0, sizeof(list_sz[0])*(BATCH_SZ-1));
 25 | 
 26 | 		for(int i = 1; i < BATCH_SZ; i++){
 27 | 			tree_sz[i] = 1;
 28 | 			tree_list_start[i*TREE_BUFFER_SZ] = -1;
 29 | 			tree_parent[i*TREE_BUFFER_SZ] = -1;
 30 | 		}
 31 | 	}
 32 | 
 33 | 	for(int gm = 0; gm < games_loop; gm++){
 34 | 		int TOFF = gm*TREE_BUFFER_SZ;
 35 | 		int LOFF = gm*MV_BUFFER_SZ;
 36 | 
 37 | 		int stack_sz = 0;
 38 | 		int tree_sz_back = 0;
 39 | 		int list_sz_back = 0;
 40 | 	
 41 | 		DASSERT(tree_sz[gm] < TREE_BUFFER_SZ)
 42 | 		DASSERT(list_sz[gm] < MV_BUFFER_SZ)
 43 | 
 44 | 		//////////////////
 45 | 		// start from tree_start[gm] and mv forward keeping all leaves
 46 | 		ADD_NODE_TO_STACK(tree_start[gm])
 47 | 		
 48 | 		for(int stack_loc = 0; stack_loc < stack_sz; stack_loc++){
 49 | 			CHK_PREV_TREE_IND(tree_cp_new_stack[stack_loc], tree_sz_back)
 50 | 			CHK_PREV_TREE_IND(tree_cp_old_stack[stack_loc], tree_sz[gm])
 51 | 			
 52 | 			int TO_NEW = tree_cp_new_stack[stack_loc];
 53 | 			int TO = TOFF + tree_cp_old_stack[stack_loc];
 54 | 		
 55 | 			///////////// cp node
 56 | 			tree_player_back[TO_NEW] = tree_player[TO];
 57 | 			tree_list_sz_back[TO_NEW] = tree_list_sz[TO];
 58 | 
 59 | 			if(tree_list_start[TO] != -1) // new list slot
 60 | 				tree_list_start_back[TO_NEW] = list_sz_back;
 61 | 			else
 62 | 				tree_list_start_back[TO_NEW] = -1;
 63 | 
 64 | 			/////////////////////// set tree_parent
 65 | 
 66 | 			// parent of new root is non-existant
 67 | 			if(tree_cp_old_stack[stack_loc] == tree_start[gm]){
 68 | 				tree_parent_back[TO_NEW] = -1;
 69 | 			}else{
 70 | 				// find new tree_parent index
 71 | 				char found = 0; int stack_loc_j;
 72 | 				for(stack_loc_j = 0; stack_loc_j < stack_sz; stack_loc_j++){
 73 | 					if(tree_cp_old_stack[stack_loc_j] != tree_parent[TO])
 74 | 						continue;
 75 | 					found = 1;
 76 | 					break;
 77 | 				}
 78 | 				assert(found == 1);
 79 | 				
 80 | 				tree_parent_back[TO_NEW] = tree_cp_new_stack[stack_loc_j];
 81 | 			}
 82 | 
 83 | 			/////////////// cp list
 84 | 			DASSERT(tree_list_sz[TO] <= (MAP_SZ+1))
 85 | 			DASSERT(tree_list_sz[TO] >= 0)
 86 | 			DASSERT((tree_list_start_back[TO_NEW] >= 0 && tree_list_start_back[TO_NEW] <= list_sz_back) || tree_list_start_back[TO_NEW] == -1)
 87 | 
 88 | 			for(int mv_ind = 0; mv_ind < tree_list_sz[TO]; mv_ind++){
 89 | 				int LO = LOFF + tree_list_start[TO] + mv_ind;
 90 | 				int LO_NEW = tree_list_start_back[TO_NEW] + mv_ind;
 91 | 
 92 | 				// cp list
 93 | 				list_valid_mv_inds_back[LO_NEW] = list_valid_mv_inds[LO];
 94 | 				list_q_total_back[LO_NEW] = list_q_total[LO];
 95 | 				list_prob_back[LO_NEW] = list_prob[LO];
 96 | 				list_visit_count_back[LO_NEW] = list_visit_count[LO];
 97 | 
 98 | 				// tree node to copy
 99 | 				if(list_valid_tree_inds[LO] != -1){
100 | 					list_valid_tree_inds_back[LO_NEW] = tree_sz_back;
101 | 
102 | 					ADD_NODE_TO_STACK(list_valid_tree_inds[LO])
103 | 				}else
104 | 					list_valid_tree_inds_back[LO_NEW] = -1;
105 | 
106 | 				list_sz_back ++;
107 | 				DASSERT(list_sz_back <= list_sz[gm])
108 | 			}
109 | 		}			
110 | 	
111 | 		/////////// copy over
112 | 		tree_start[gm] = 0;
113 | 		tree_sz[gm] = tree_sz_back;
114 | 		list_sz[gm] = list_sz_back;
115 | 
116 | 		DASSERT((tree_sz[gm] < TREE_BUFFER_SZ) && (tree_sz[gm] > 0))
117 | 		BMEM2(&tree_player[TOFF], tree_player_back, tree_sz[gm])
118 | 		BMEM2(&tree_parent[TOFF], tree_parent_back, tree_sz[gm])
119 | 		BMEM2(&tree_list_sz[TOFF], tree_list_sz_back, tree_sz[gm])
120 | 		BMEM2(&tree_list_start[TOFF], tree_list_start_back, tree_sz[gm])
121 | 
122 | 		DASSERT(list_sz[gm] < MV_BUFFER_SZ)
123 | 		BMEM2(&list_valid_mv_inds[LOFF], list_valid_mv_inds_back, list_sz[gm])
124 | 		BMEM2(&list_valid_tree_inds[LOFF], list_valid_tree_inds_back, list_sz[gm])
125 | 		BMEM2(&list_q_total[LOFF], list_q_total_back, list_sz[gm])
126 | 		BMEM2(&list_prob[LOFF], list_prob_back, list_sz[gm])
127 | 		BMEM2(&list_visit_count[LOFF], list_visit_count_back, list_sz[gm])
128 | 	} // gm
129 | 
130 | 	Py_RETURN_NONE;
131 | }
132 | 
133 | 


--------------------------------------------------------------------------------
/py_util/py_util.py:
--------------------------------------------------------------------------------
1 | from _py_util import *
2 | 
3 | 


--------------------------------------------------------------------------------
/py_util/py_util_dyn.py:
--------------------------------------------------------------------------------
1 | from _py_util_dyn import *
2 | 
3 | 


--------------------------------------------------------------------------------
/py_util/register_mv.c:
--------------------------------------------------------------------------------
  1 | // register move in tree, initialize node if not already initialized
  2 | static PyObject *register_mv(PyObject *self, PyObject *args){
  3 | 	PyArrayObject *chosen_coord_np;
  4 | 	int moving_player, * chosen_coord;
  5 | 
  6 | 	if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &chosen_coord_np)) return NULL;
  7 | 	
  8 | 	/////////////////////// check inputs
  9 | 	ASSERT(moving_player == 0 || moving_player == 1, "moving player incorrect")
 10 | 	ASSERT(chosen_coord_np != NULL, "absent inputs")
 11 | 	ASSERT(PyArray_TYPE(chosen_coord_np) == NPY_INT32, "data type incorrect")
 12 | 	ASSERT(PyArray_NDIM(chosen_coord_np) == 1, "dims incorrect")
 13 | 	ASSERT(PyArray_STRIDE(chosen_coord_np, 0) == sizeof(chosen_coord[0]), "data not contigious or C-order")
 14 | 
 15 | 	npy_intp * dims_in = PyArray_DIMS(chosen_coord_np);
 16 | 
 17 | 	ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect")
 18 | 
 19 | 	chosen_coord = (int *) PyArray_DATA(chosen_coord_np);
 20 | 
 21 | 	///////////////////////////////
 22 | 
 23 | 	for(int gm = 0; gm < BATCH_SZ; gm++){
 24 | 		//if(chosen_coord[gm] == -1) continue;
 25 | 
 26 | 		#ifdef CUDA_DEBUG
 27 | 			if(tree_sz[gm] >= TREE_BUFFER_SZ){
 28 | 				printf("tree_sz[%i] %i\n", gm, tree_sz[gm]);
 29 | 				DASSERT(0);
 30 | 			}
 31 | 			if(tree_start[gm] < 0 || tree_start[gm] >= tree_sz[gm]){
 32 | 				printf("tree_sz[%i] %i\n", gm, tree_sz[gm]);
 33 | 				printf("tree_start %i\n", tree_start[gm]);
 34 | 				DASSERT(0);
 35 | 			}
 36 | 			if(list_sz[gm] >= MV_BUFFER_SZ){
 37 | 				printf("list_sz[%i] %i\n", gm, list_sz[gm]);
 38 | 				DASSERT(0);
 39 | 			}
 40 | 			int t_ind2 = tree_start[gm];
 41 | 			int TO2 = gm*TREE_BUFFER_SZ + t_ind2;
 42 | 			if(tree_list_start[TO2] < 0 || tree_list_start[TO2] >= list_sz[gm]){
 43 | 				printf("list_sz[%i] %i\n", gm, list_sz[gm]);
 44 | 				printf("tree_list_start[%i] %i\n", TO2, tree_list_start[TO2]);
 45 | 				DASSERT(0);
 46 | 			}
 47 | 		#endif
 48 | 
 49 | 		CUR_TREE_INDS	
 50 | 		
 51 | 		// find list index for chosen move
 52 | 		char found = 0;
 53 | 		int LOC;
 54 | 		for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){
 55 | 			LOC = LO + mv_ind;
 56 | 			if(list_valid_mv_inds[LOC] != chosen_coord[gm]) continue;
 57 | 			
 58 | 			found = 1;
 59 | 			break;
 60 | 		}
 61 | 	
 62 | 		#ifdef CUDA_DEBUG
 63 | 			if(found == 0){
 64 | 				printf("could not find valid move: gm %i chosen_coord %i n_valid_mvs %i\n", gm, chosen_coord[gm], n_valid_mvs);
 65 | 				for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){
 66 | 					LOC = LO + mv_ind;
 67 | 					printf("valid: %i\n", list_valid_mv_inds[LOC]);
 68 | 				}
 69 | 				for(int gm2 = 0; gm2 < BATCH_SZ; gm2++)
 70 | 					printf("to_coords[%i] %i\n", gm2, chosen_coord[gm2]);
 71 | 				//LOC = LO;
 72 | 			}
 73 | 		#endif
 74 | 		ASSERT(found != 0, "could not find move");
 75 | 
 76 | 		// update pointer to tree_start
 77 | 		int t_ind_new;
 78 | 		if(list_valid_tree_inds[LOC] == -1){ 
 79 | 			
 80 | 			// create new node, return t_ind_new
 81 | 			list_valid_tree_inds[LOC] = tree_sz[gm];
 82 | 
 83 | 			t_ind_new = tree_sz[gm];
 84 | 			int TO_NEW = gm*TREE_BUFFER_SZ + t_ind_new;
 85 | 			
 86 | 			tree_parent[TO_NEW] = t_ind;
 87 | 			tree_player[TO_NEW] = moving_player == 0;
 88 | 			tree_list_start[TO_NEW] = -1;
 89 | 			tree_list_sz[TO_NEW] = 0;
 90 | 
 91 | 			tree_sz[gm] ++;
 92 | 			ASSERT(tree_sz[gm] < TREE_BUFFER_SZ, "tree buffer size exceeded");
 93 | 		}else{ 
 94 | 			
 95 | 			// return t_ind_new from list
 96 | 			t_ind_new = list_valid_tree_inds[LOC];		
 97 | 			DASSERT(t_ind_new >= 0 && t_ind_new < TREE_BUFFER_SZ);
 98 | 			
 99 | 			#ifdef CUDA_DEBUG
100 | 				int TO_NEW = gm*TREE_BUFFER_SZ + t_ind_new;
101 | 			#endif
102 | 
103 | 			DASSERT(tree_parent[TO_NEW] == t_ind)
104 | 			DASSERT(tree_player[TO_NEW] == (!moving_player))
105 | 		}
106 | 		tree_start[gm] = t_ind_new;
107 | 	} // gm
108 | 
109 | 	Py_RETURN_NONE;
110 | }
111 | 
112 | 


--------------------------------------------------------------------------------
/py_util/return_probs_map.c:
--------------------------------------------------------------------------------
 1 | // return probs from tree visit counts
 2 | static PyObject *return_probs_map(PyObject *self, PyObject *args){
 3 | 	int N_TURNS;
 4 | 
 5 | 	if(!PyArg_ParseTuple(args, "i", &N_TURNS)) return NULL;
 6 | 	
 7 | 	ASSERT(N_TURNS > 0, "N_TURNS must be > 0")
 8 | 
 9 | 	//////// dbg
10 | 	int max_tree_sz = tree_sz[0];
11 | 	int max_list_sz = list_sz[0];
12 | 	for(int gm = 1; gm < BATCH_SZ; gm++){
13 | 		if(max_tree_sz < tree_sz[gm])
14 | 			max_tree_sz = tree_sz[gm];
15 | 		if(max_list_sz < list_sz[gm])
16 | 			max_list_sz = list_sz[gm];
17 | 	}
18 | 	printf("max tree_sz: %i, list_sz %i\n", max_tree_sz, max_list_sz);
19 | 	////////
20 | 
21 | 	///// output
22 | 	npy_intp dims[3];
23 | 	dims[0] = N_TURNS * N_PLAYERS * BATCH_SZ;
24 | 	dims[1] = MAP_SZ_X * MAP_SZ_Y;
25 | 
26 | 	PyObject * probs_map_np = PyArray_SimpleNew(2, dims, NPY_FLOAT);
27 | 
28 | 	float * probs_map = (float *) PyArray_DATA((PyArrayObject*) probs_map_np);
29 | 
30 | 	//////////////////////////////////////
31 | 	for(int gm = 0; gm < BATCH_SZ; gm++){
32 | 
33 | 		int TO;
34 | 		int t_ind = tree_start[gm]; TO_FRM_T_IND
35 | 		DASSERT(0 == tree_player[TO])
36 | 
37 | 		unsigned tree_loc = tree_parent[TO];
38 | 
39 | 		// traverse tree backward, alternating players
40 | 		for(int turn = N_TURNS-1; turn >= 0; turn--)   for(char player = 1; player >= 0; player--){
41 | 			float * probs_map_cur = &probs_map[turn*N_PLAYERS*BATCH_SZ*MAP_SZ + player*BATCH_SZ*MAP_SZ + gm*MAP_SZ];
42 | 
43 | 			// init
44 | 			MAP_LOOP probs_map_cur[loc] = 0;
45 | 			int TO, LO;
46 | 
47 | 			// inds
48 | 			int t_ind = tree_loc; TO_FRM_T_IND
49 | 			int l_ind = tree_list_start[TO]; LO_FRM_L_IND
50 | 			int n_valid_mvs = tree_list_sz[TO]; CHK_N_VALID_MVS
51 | 
52 | 			DASSERT(n_valid_mvs >= 1);
53 | 			DASSERT(player == tree_player[TO]);
54 | 
55 | 			tree_loc = tree_parent[TO];
56 | 			
57 | 			// set map, sum visits
58 | 			int visit_sum = 0;
59 | 			for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){
60 | 				int map_loc = list_valid_mv_inds[LO + mv_ind];
61 | 
62 | 				DASSERT(map_loc >= -1 && map_loc < MAP_SZ);
63 | 				if(map_loc == -1) continue;
64 | 
65 | 				probs_map_cur[map_loc] = (float)list_visit_count[LO + mv_ind]; 
66 | 				visit_sum += list_visit_count[LO + mv_ind];
67 | 			}
68 | 				
69 | 			//  normalize
70 | 			for(int mv_ind = 0; (visit_sum != 0) && (mv_ind < n_valid_mvs); mv_ind++){
71 | 				int map_loc = list_valid_mv_inds[LO + mv_ind];
72 | 
73 | 				DASSERT(map_loc >= -1 && map_loc < MAP_SZ);
74 | 				if(map_loc == -1) continue;
75 | 
76 | 				probs_map_cur[map_loc] /= (float)visit_sum; 
77 | 			}
78 | 		} // turn / player loops
79 | 	} // gm
80 | 
81 | 	return probs_map_np;
82 | }
83 | 
84 | 


--------------------------------------------------------------------------------
/py_util/return_tree.c:
--------------------------------------------------------------------------------
 1 | /*	tree_sz, tree_start, tree_player, tree_parent, tree_list_sz, tree_list_start, \
 2 | 		list_sz, list_valid_mv_inds, list_valid_tree_inds, list_q_total, list_prob, \
 3 | 		list_visit_count = tf_op.return_tree()
 4 | */
 5 | static PyObject *return_tree(PyObject *self, PyObject *args){
 6 | 	
 7 | 	///// output
 8 | 	npy_intp dims[4];
 9 | 	dims[0] = BATCH_SZ;
10 | 	dims[1] = TREE_BUFFER_SZ;
11 | 
12 | 	PyObject * tree_sz_np = PyArray_SimpleNew(1, dims, NPY_UINT32);
13 | 	PyObject * tree_start_np = PyArray_SimpleNew(1, dims, NPY_UINT32);
14 | 	
15 | 	PyObject * tree_player_np = PyArray_SimpleNew(2, dims, NPY_INT8);
16 | 	PyObject * tree_parent_np = PyArray_SimpleNew(2, dims, NPY_INT32);
17 | 
18 | 	PyObject * tree_list_sz_np = PyArray_SimpleNew(2, dims, NPY_INT32);
19 | 	PyObject * tree_list_start_np = PyArray_SimpleNew(2, dims, NPY_INT32);
20 | 
21 | 	PyObject * list_sz_np = PyArray_SimpleNew(1, dims, NPY_UINT32);
22 | 
23 | 	dims[1] = MV_BUFFER_SZ;
24 | 	PyObject * list_valid_mv_inds_np = PyArray_SimpleNew(2, dims, NPY_INT16);
25 | 	PyObject * list_valid_tree_inds_np = PyArray_SimpleNew(2, dims, NPY_INT32);
26 | 	PyObject * list_q_total_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32);
27 | 	PyObject * list_prob_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32);
28 | 	PyObject * list_visit_count_np = PyArray_SimpleNew(2, dims, NPY_UINT32);
29 | 
30 | 	ASSERT(tree_sz_np && tree_start_np && tree_player_np && tree_parent_np && tree_list_sz_np &&
31 | 		tree_list_start_np && list_sz_np && list_valid_mv_inds_np && list_valid_tree_inds_np &&
32 | 		list_q_total_np && list_prob_np && list_visit_count_np, "error creating python outputs");
33 | 
34 | 	unsigned * tree_sz_ret = (unsigned *) PyArray_DATA((PyArrayObject*) tree_sz_np);
35 | 	unsigned * tree_start_ret = (unsigned *) PyArray_DATA((PyArrayObject*) tree_start_np);
36 | 	
37 | 	char * tree_player_ret = (char *) PyArray_DATA((PyArrayObject*) tree_player_np);
38 | 	int * tree_parent_ret = (int *) PyArray_DATA((PyArrayObject*) tree_parent_np);
39 | 
40 | 	int * tree_list_sz_ret = (int *) PyArray_DATA((PyArrayObject*) tree_list_sz_np);
41 | 	int * tree_list_start_ret = (int *) PyArray_DATA((PyArrayObject*) tree_list_start_np);
42 | 
43 | 	unsigned * list_sz_ret = (unsigned *) PyArray_DATA((PyArrayObject*) list_sz_np);
44 | 
45 | 	short * list_valid_mv_inds_ret = (short *) PyArray_DATA((PyArrayObject*) list_valid_mv_inds_np);
46 | 	int * list_valid_tree_inds_ret = (int *) PyArray_DATA((PyArrayObject*) list_valid_mv_inds_np);
47 | 	float * list_q_total_ret = (float *) PyArray_DATA((PyArrayObject*) list_q_total_np);
48 | 	float * list_prob_ret = (float *) PyArray_DATA((PyArrayObject*) list_prob_np);
49 | 	unsigned * list_visit_count_ret = (unsigned *) PyArray_DATA((PyArrayObject*) list_visit_count_np);
50 | 
51 | 	////////////////////////////////////// copy
52 | 	BMEM(tree_sz_ret, tree_sz, BATCH_SZ)
53 | 	BMEM(tree_start_ret, tree_start, BATCH_SZ)
54 | 
55 | 	BMEM(tree_player_ret, tree_player, B_TREE_SZ)
56 | 	BMEM(tree_parent_ret, tree_parent, B_TREE_SZ)
57 | 
58 | 	BMEM(tree_list_sz_ret, tree_list_sz, B_TREE_SZ)
59 | 	BMEM(tree_list_start_ret, tree_list_start, B_TREE_SZ)
60 | 
61 | 	BMEM(list_sz_ret, list_sz, BATCH_SZ)
62 | 
63 | 	BMEM(list_valid_mv_inds_ret, list_valid_mv_inds, B_MV_SZ)
64 | 	BMEM(list_valid_tree_inds_ret, list_valid_tree_inds, B_MV_SZ)
65 | 	BMEM(list_q_total_ret, list_q_total, B_MV_SZ)
66 | 	BMEM(list_prob_ret, list_prob, B_MV_SZ)
67 | 	BMEM(list_visit_count_ret, list_visit_count, B_MV_SZ)
68 | 	
69 | 	/////////// return
70 | 	PyObject * ret = PyList_New(12);
71 | 	ASSERT(ret != 0, "err creating output list")
72 | 
73 | 	ASSERT(PyList_SetItem(ret, 0, tree_sz_np) == 0, "failed setting item");
74 | 	ASSERT(PyList_SetItem(ret, 1, tree_start_np) == 0, "failed setting item");
75 | 	ASSERT(PyList_SetItem(ret, 2, tree_player_np) == 0, "failed setting item");
76 | 	ASSERT(PyList_SetItem(ret, 3, tree_parent_np) == 0, "failed setting item");
77 | 	ASSERT(PyList_SetItem(ret, 4, tree_list_sz_np) == 0, "failed setting item");
78 | 	ASSERT(PyList_SetItem(ret, 5, tree_list_start_np) == 0, "failed setting item");
79 | 	ASSERT(PyList_SetItem(ret, 6, list_sz_np) == 0, "failed setting item");
80 | 	ASSERT(PyList_SetItem(ret, 7, list_valid_mv_inds_np) == 0, "failed setting item");
81 | 	ASSERT(PyList_SetItem(ret, 8, list_valid_tree_inds_np) == 0, "failed setting item");
82 | 	ASSERT(PyList_SetItem(ret, 9, list_q_total_np) == 0, "failed setting item");
83 | 	ASSERT(PyList_SetItem(ret, 10, list_prob_np) == 0, "failed setting item");
84 | 	ASSERT(PyList_SetItem(ret, 11, list_visit_count_np) == 0, "failed setting item");
85 | 
86 | 	return ret;
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/py_util/rotate_reflect_imgs.c:
--------------------------------------------------------------------------------
 1 | // inputs: imgs[batch_sz, map_sz_x, map_sz_y, channels]
 2 | // randomly rotate/reflect each image
 3 | static PyObject *rotate_reflect_imgs(PyObject *self, PyObject *args){
 4 | 	PyArrayObject *imgs_np, *tree_probs_np;
 5 | 	PyObject *imgs_r_np, *tree_probs_r_np;
 6 | 	float * imgs, *imgs_r, *tree_probs, *tree_probs_r;
 7 | 
 8 | 	if(!PyArg_ParseTuple(args, "O!O!", &PyArray_Type, &imgs_np, &PyArray_Type, &tree_probs_np)) return NULL;
 9 | 	
10 | 	/////////////////////// check inputs
11 | 	ASSERT(imgs_np != NULL, "absent inputs")
12 | 	ASSERT(PyArray_TYPE(imgs_np) == NPY_FLOAT32 && PyArray_TYPE(tree_probs_np) == NPY_FLOAT32, "data type incorrect")
13 | 	ASSERT(PyArray_NDIM(imgs_np) == 4 && PyArray_NDIM(tree_probs_np) == 2, "dims must be 4")
14 | 	ASSERT(PyArray_STRIDE(imgs_np, 3) == sizeof(imgs[0]) && PyArray_STRIDE(tree_probs_np, 1) == sizeof(tree_probs[0]), "data not contigious or C-order")
15 | 
16 | 	npy_intp * dims_in = PyArray_DIMS(imgs_np);
17 | 	npy_intp * pdims_in = PyArray_DIMS(tree_probs_np);
18 | 
19 | 	ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect")
20 | 
21 | 	int map_sz_x = dims_in[1];
22 | 	int map_sz_y = dims_in[2];
23 | 	int n_chan = dims_in[3];
24 | 
25 | 	ASSERT(map_sz_x == map_sz_y, "board must be sq")
26 | 	ASSERT(pdims_in[0] == BATCH_SZ && pdims_in[1] == (map_sz_x*map_sz_y), "tree_probs incorrect")
27 | 
28 | 	imgs_r_np = PyArray_SimpleNew(4, dims_in, NPY_FLOAT);
29 | 	tree_probs_r_np = PyArray_SimpleNew(2, pdims_in, NPY_FLOAT);
30 | 
31 | 	imgs = (float *) PyArray_DATA(imgs_np);
32 | 	tree_probs = (float *) PyArray_DATA(tree_probs_np);
33 | 
34 | 	imgs_r = (float *) PyArray_DATA((PyArrayObject*) imgs_r_np);
35 | 	tree_probs_r = (float *) PyArray_DATA((PyArrayObject*) tree_probs_r_np);
36 | 
37 | 	float * imgs_r_pre = malloc(BATCH_SZ*map_sz_x*map_sz_y*n_chan*sizeof(imgs[0]));
38 | 	float * tree_probs_r_pre = malloc(BATCH_SZ*map_sz_x*map_sz_y*sizeof(imgs[0]));
39 | 
40 | 	ASSERT(imgs_r_pre && tree_probs_r_pre, "failed allocating");
41 | 
42 | 	#define MAP_LOOP_SEP for(int x = 0; x < map_sz_x; x++){ for(int y = 0; y < map_sz_y; y++){
43 | 
44 | 	#define CP(X, Y) MAP_LOOP_SEP\
45 | 				memcpy(&imgs_r_pre[gm_off + x*map_sz_y*n_chan + y*n_chan], \
46 | 					&imgs[gm_off + (X)*map_sz_y*n_chan + (Y)*n_chan], n_chan*sizeof(imgs[0]));\
47 | 				tree_probs_r_pre[pgm_off + x*map_sz_y + y] = \
48 | 					tree_probs[pgm_off + (X)*map_sz_y + Y];\
49 | 			}}
50 | 
51 | 	#define CP_F(X, Y) MAP_LOOP_SEP\
52 | 				memcpy(&imgs_r[gm_off + x*map_sz_y*n_chan + y*n_chan], \
53 | 					&imgs_r_pre[gm_off + (X)*map_sz_y*n_chan + (Y)*n_chan], n_chan*sizeof(imgs[0]));\
54 | 				tree_probs_r[pgm_off + x*map_sz_y + y] = \
55 | 					tree_probs_r_pre[pgm_off + (X)*map_sz_y + Y];\
56 | 			}}
57 | 	for(int gm = 0; gm < BATCH_SZ; gm++){
58 | 		int op = rand() % 4;
59 | 		int trans = rand() % 2;
60 | 		int gm_off = gm*map_sz_x*map_sz_y*n_chan;
61 | 		int pgm_off = gm*map_sz_x*map_sz_y;
62 | 
63 | 		//////////////////////////////////
64 | 		if(op == 0){ // no transform
65 | 			memcpy(&imgs_r_pre[gm_off], &imgs[gm_off], map_sz_x*map_sz_y*n_chan*sizeof(imgs[0]));
66 | 			memcpy(&tree_probs_r_pre[pgm_off], &tree_probs[pgm_off], map_sz_x*map_sz_y*sizeof(imgs[0]));
67 | 		}else if(op == 1){ // imgs[::-1]
68 | 			CP(map_sz_x - 1 - x, y)
69 | 		}else if(op == 2){ // imgs[:,::-1]
70 | 			CP(x, map_sz_y - 1 - y)
71 | 		}else if(op == 3){ // imgs[::-1, ::-1]
72 | 			CP(map_sz_x - 1 - x, map_sz_y - 1 - y)
73 | 		}
74 | 
75 | 		/////////// transpose
76 | 		if(trans == 1){
77 | 			CP_F(y, x)
78 | 		}else{ // direct cp
79 | 			memcpy(&imgs_r[gm_off], &imgs_r_pre[gm_off], map_sz_x*map_sz_y*n_chan*sizeof(imgs[0]));
80 | 			memcpy(&tree_probs_r[pgm_off], &tree_probs_r_pre[pgm_off], map_sz_x*map_sz_y*sizeof(imgs[0]));
81 | 		}
82 | 	}
83 | 	
84 | 	PyObject * ret = PyList_New(2);
85 | 	ASSERT(ret != 0, "err creating output list")
86 | 
87 | 	ASSERT(PyList_SetItem(ret, 0, imgs_r_np) == 0, "failed setting item");
88 | 	ASSERT(PyList_SetItem(ret, 1, tree_probs_r_np) == 0, "failed setting item");
89 | 
90 | 	return ret;
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/py_util/session_backup.c:
--------------------------------------------------------------------------------
 1 | static PyObject *session_backup(PyObject *self, PyObject *args){
 2 | 	
 3 | 	BMEM(tree_start2, tree_start, BATCH_SZ)
 4 | 
 5 | 	Py_RETURN_NONE;
 6 | }
 7 | 
 8 | static PyObject *session_restore(PyObject *self, PyObject *args){
 9 | 	
10 | 	BMEM(tree_start, tree_start2, BATCH_SZ)
11 | 
12 | 	Py_RETURN_NONE;
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | while true; do
2 | 	python bp_tree.py
3 | done
4 | 


--------------------------------------------------------------------------------
/vars.cc:
--------------------------------------------------------------------------------
 1 | void vars_launcher(int var_idx, void * outputs, char op);
 2 | 
 3 | /// return
 4 | #define MAP_COMPUTE(IDX, OP) {tensorflow::TensorShape shape;\
 5 | 		shape.AddDim(BATCH_SZ);\
 6 | 		shape.AddDim(MAP_SZ_X);\
 7 | 		shape.AddDim(MAP_SZ_Y);\
 8 | 		Tensor* tensor = nullptr;\
 9 | 		OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\
10 | 		auto outputs = tensor->template flat<int32>();\
11 | 		vars_launcher(IDX, outputs.data(), RETURN_VARS);}
12 | 
13 | #define MAP_COMPUTE_CHAR(IDX, OP) {tensorflow::TensorShape shape;\
14 | 		shape.AddDim(BATCH_SZ);\
15 | 		shape.AddDim(MAP_SZ_X);\
16 | 		shape.AddDim(MAP_SZ_Y);\
17 | 		Tensor* tensor = nullptr;\
18 | 		OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\
19 | 		auto outputs = tensor->template flat<int8>();\
20 | 		vars_launcher(IDX, outputs.data(), RETURN_VARS);}
21 | 
22 | #define COMPUTE_BATCH_SZ_DT(IDX, DT, OP) {tensorflow::TensorShape shape;\
23 | 		shape.AddDim(BATCH_SZ);\
24 | 		Tensor* tensor = nullptr;\
25 | 		OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\
26 | 		auto outputs = tensor->template flat<DT>();\
27 | 		vars_launcher(IDX, outputs.data(), RETURN_VARS);}
28 | 
29 | //// set
30 | #define SET_MAP_COMPUTE(IDX, OP) {tensorflow::TensorShape shape;\
31 | 		shape.AddDim(BATCH_SZ);\
32 | 		shape.AddDim(MAP_SZ_X);\
33 | 		shape.AddDim(MAP_SZ_Y);\
34 | 		const Tensor& inputs_tensor = context->input(0);\
35 | 		auto inputs = inputs_tensor.flat<int32>();\
36 | 		vars_launcher(IDX, (void*)inputs.data(), SET_VARS);}
37 | 
38 | #define SET_MAP_COMPUTE_CHAR(IDX, OP) {tensorflow::TensorShape shape;\
39 | 		shape.AddDim(BATCH_SZ);\
40 | 		shape.AddDim(MAP_SZ_X);\
41 | 		shape.AddDim(MAP_SZ_Y);\
42 | 		const Tensor& inputs_tensor = context->input(0);\
43 | 		auto inputs = inputs_tensor.flat<int8>();\
44 | 		vars_launcher(IDX, (void*)inputs.data(), SET_VARS);}
45 | 
46 | #define SET_COMPUTE_BATCH_SZ_DT(IDX, DT, OP) {tensorflow::TensorShape shape;\
47 | 		shape.AddDim(BATCH_SZ);\
48 | 		const Tensor& inputs_tensor = context->input(0);\
49 | 		auto inputs = inputs_tensor.flat<DT>();\
50 | 		vars_launcher(IDX, (void*)inputs.data(), SET_VARS);}	
51 | 
52 | #include "vars_class_return.cc"
53 | #include "vars_class_set.cc"
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/vars_class_return.cc:
--------------------------------------------------------------------------------
 1 | // maps
 2 | REGISTER_OP("Board").Output("outputs: int8");
 3 | REGISTER_OP("ValidMvMapInternal").Output("outputs: int8");
 4 | 
 5 | // maps
 6 | class Board : public OpKernel {
 7 | 	public:
 8 | 	explicit Board(OpKernelConstruction* context) : OpKernel(context) {}
 9 | 	void Compute(OpKernelContext* context) override {
10 | 		MAP_COMPUTE_CHAR(BOARD_IDX, RETURN_VARS)
11 | 	}
12 | };
13 | 
14 | class ValidMvMapInternal : public OpKernel {
15 | 	public:
16 | 	explicit ValidMvMapInternal(OpKernelConstruction* context) : OpKernel(context) {}
17 | 	void Compute(OpKernelContext* context) override {
18 | 		MAP_COMPUTE_CHAR(VALID_MV_MAP_INTERNAL_IDX, RETURN_VARS)
19 | 	}
20 | };
21 | 
22 | 
23 | // maps
24 | REGISTER_KERNEL_BUILDER(Name("Board").Device(DEVICE_GPU), Board);
25 | REGISTER_KERNEL_BUILDER(Name("ValidMvMapInternal").Device(DEVICE_GPU), Board);
26 | 
27 | 


--------------------------------------------------------------------------------
/vars_class_set.cc:
--------------------------------------------------------------------------------
 1 | // maps
 2 | REGISTER_OP("SetBoard").Input("inputs: int8");
 3 | REGISTER_OP("SetValidMvMapInternal").Input("inputs: int8");
 4 | 
 5 | // maps
 6 | class SetBoard : public OpKernel {
 7 | 	public:
 8 | 	explicit SetBoard(OpKernelConstruction* context) : OpKernel(context) {}
 9 | 	void Compute(OpKernelContext* context) override {
10 | 		SET_MAP_COMPUTE_CHAR(BOARD_IDX, SET_VARS)
11 | 	}
12 | };
13 | 
14 | class SetValidMvMapInternal : public OpKernel {
15 | 	public:
16 | 	explicit SetValidMvMapInternal(OpKernelConstruction* context) : OpKernel(context) {}
17 | 	void Compute(OpKernelContext* context) override {
18 | 		SET_MAP_COMPUTE_CHAR(VALID_MV_MAP_INTERNAL_IDX, SET_VARS)
19 | 	}
20 | };
21 | 
22 | // maps
23 | REGISTER_KERNEL_BUILDER(Name("SetBoard").Device(DEVICE_GPU), SetBoard);
24 | REGISTER_KERNEL_BUILDER(Name("SetValidMvMapInternal").Device(DEVICE_GPU), SetValidMvMapInternal);
25 | 
26 | 


--------------------------------------------------------------------------------