├── .gitignore ├── README.md ├── __init__.py ├── architectures ├── __init__.py ├── tree_tf_op.py ├── tree_tf_op_kfac.py └── tree_tf_op_multi.py ├── bp_tree.py ├── build.sh ├── build_centos.sh ├── cuda_includes.h ├── cuda_op_kernel.cc ├── cuda_op_kernel.cu.cc ├── global_vars.py ├── gnu_go_test.py ├── includes.h ├── kernels ├── create_batch.cu ├── init_op.cu ├── init_state.cu ├── max_prob_to_coord_valid_mvs.cu ├── move_random_ai.cu ├── move_unit.cu ├── prob_to_coord.cu ├── prob_to_coord_valid_mvs.cu ├── return_state.cu ├── return_winner.cu ├── session_backup.cu.cc ├── vars.cu.cc └── verify_integrity.cu ├── models ├── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001 ├── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index └── go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta ├── net_vs_gnugo.py ├── notebooks ├── go_black.png ├── go_blank.png ├── go_pieces.png ├── go_white.png └── training_visualizations.ipynb ├── play_network_gui.py ├── py_util ├── __init__.py ├── _py_util.c ├── add_valid_mvs.c ├── backup_visit.c ├── build.sh ├── build_centos.sh ├── choose_moves.c ├── includes.h ├── init_tree.c ├── prune_tree.c ├── py_util.py ├── py_util_dyn.py ├── register_mv.c ├── return_probs_map.c ├── return_tree.c ├── rotate_reflect_imgs.c └── session_backup.c ├── run.sh ├── vars.cc ├── vars_class_return.cc └── vars_class_set.cc /.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb 2 | notebooks/.ipynb_checkpoints/* 3 | models/* 4 | *.prof 5 | p.py 6 | *.o 7 | *.swn 8 | *.lprof 9 | *.so 10 | *.npy 11 | *.pyc 12 | *.swp 13 | *.swo 14 | *.exp 15 | *.lib 16 | *.pyd 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # alpha_go_zero_implementation 2 | An implementation of the Alpha Go Zero algorithm, runnable on a single GPU, released into the public domain. 3 | I provide a semi-detailed overview along with instructions for compiling and running at http://arcanefortune.com/alpha_go_initial.php 4 | An updated discussion of the code can be found at: http://arcanefortune.com/alpha_go_update.php 5 | 6 | See "build.sh" and "build_centos.sh" for examples on how to compile the code. It can vary slightly with each platform. "build_centos8.sh" works 7 | on Centos 8 as of Jan-23-2020. See the links above for more details on the setups I've compiled on. 8 | 9 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/__init__.py -------------------------------------------------------------------------------- /architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/architectures/__init__.py -------------------------------------------------------------------------------- /architectures/tree_tf_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import sys 4 | sys.path.append("..") 5 | import global_vars as gv 6 | import os 7 | 8 | DEVICE = '/gpu:1' 9 | 10 | hdir = os.getenv('HOME') 11 | 12 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels] 13 | map_prod = np.prod(gv.map_sz) 14 | 15 | with tf.device(DEVICE): 16 | sess = tf.InteractiveSession() 17 | tf_op = tf.load_op_library('cuda_op_kernel.so') 18 | 19 | ##################### set / load vars 20 | set_var_int32 = tf.placeholder(tf.int32, shape=[None]) 21 | set_var_int8 = tf.placeholder(tf.int8, shape=[None]) 22 | 23 | gm_var_nms = ['board', 'valid_mv_map_internal'] 24 | 25 | gm_var_placeholders = ['set_var_int8']*2 26 | 27 | gm_vars = {}; set_gm_vars = {} 28 | 29 | def return_vars(): 30 | v = {} 31 | for var in gm_var_nms: 32 | exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var)) 33 | return v 34 | 35 | def set_vars(v): 36 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 37 | exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var)) 38 | 39 | ######################## 40 | 41 | def tf_pearsonr(val, val_target_nmean): 42 | val_nmean = val - tf.reduce_mean(val) 43 | val_target_nmean = val_target - tf.reduce_mean(val_target) 44 | 45 | val_std = tf.sqrt(tf.reduce_sum(val_nmean**2)) 46 | val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2)) 47 | 48 | return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std) 49 | 50 | 51 | def init_model(N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \ 52 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, WEIGHT_STD=1e-2): 53 | 54 | global convs, weights, outputs, output_nms, pol, pol_pre, pol_mean_sq_err, train_step 55 | global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 56 | global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map 57 | global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit 58 | global tree_to_coords, nn_max_to_coords, nn_prob_to_coords 59 | global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit 60 | global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord 61 | global sess, imgs, valid_mv_map, pol_target, val_target, moving_player 62 | global gm_vars, set_gm_vars, oFC1, session_restore, session_backup 63 | global winner, dir_pre, dir_a 64 | global games_running, score, n_captures, pol_cross_entrop_err 65 | global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs 66 | global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs 67 | assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES) 68 | 69 | with tf.device(DEVICE): 70 | #### init state 71 | init_state = tf_op.init_state() 72 | 73 | dir_pre = tf.placeholder(tf.float32, shape=()) 74 | dir_a = tf.placeholder(tf.float32, shape=()) 75 | 76 | moving_player = tf.placeholder(tf.int32, shape=()) 77 | winner, score, n_captures = tf_op.return_winner(moving_player) 78 | 79 | games_running = tf.ones(gv.BATCH_SZ, dtype=tf.int8) 80 | 81 | session_restore = tf_op.session_restore() 82 | session_backup = tf_op.session_backup() 83 | 84 | ##### vars 85 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 86 | exec('gm_vars["%s"] = tf_op.%s()' % (var, var)) 87 | exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder)) 88 | 89 | #### imgs 90 | imgs, valid_mv_map = tf_op.create_batch(moving_player) 91 | #print imgs.shape, imgs_shape 92 | assert imgs.shape == tf.placeholder(tf.float32, shape=imgs_shape).shape, 'tf op shape not matching global_vars' 93 | move_random_ai = tf_op.move_random_ai(moving_player) 94 | 95 | global move_frm_inputs, to_coords_input 96 | to_coords_input = tf.placeholder(tf.int32, shape=gv.BATCH_SZ) 97 | move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player) 98 | 99 | #### 100 | pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod]) 101 | val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ]) 102 | 103 | convs = []; weights = []; outputs = []; output_nms = [] 104 | 105 | convs += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 106 | strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0', 107 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA), 108 | bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA))))] 109 | 110 | weights += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv0')[0]] 111 | outputs += [convs[-1]] 112 | output_nms += ['conv0'] 113 | 114 | for i in range(1, len(N_FILTERS)): 115 | output_nms += ['conv' + str(i)] 116 | 117 | conv_out = tf.contrib.layers.batch_norm(\ 118 | tf.layers.conv2d(inputs=convs[i-1], filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2, 119 | strides=[STRIDES[i]]*2, padding="same", activation=None, name=output_nms[-1], 120 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA), 121 | bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA))) 122 | 123 | # residual bypass 124 | if (i % 2) == 0: 125 | conv_out += convs[i-2] 126 | 127 | convs += [tf.nn.relu(conv_out)] 128 | 129 | weights += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, output_nms[-1])[0]] 130 | outputs += [convs[-1]] 131 | 132 | out_sz = np.int(np.prod(convs[-1].shape[1:])) 133 | convr = tf.reshape(convs[-1], [gv.BATCH_SZ, out_sz]) 134 | 135 | ################### pol 136 | # FC layer 137 | wFC1p = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD), name='wFC1') 138 | bFC1p = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='bFC1') 139 | 140 | oFC1p = tf.nn.relu(tf.matmul(convr, wFC1p) + bFC1p) 141 | 142 | weights += [wFC1p] 143 | outputs += [oFC1p] 144 | output_nms += ['oFC1p'] 145 | 146 | # FC layer 147 | wFC2p = tf.Variable(tf.random_normal([N_FC1, map_prod], stddev=WEIGHT_STD), name='wFC2') 148 | bFC2p = tf.Variable(tf.random_normal([map_prod], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='bFC2') 149 | 150 | pol_pre = tf.nn.relu(tf.matmul(oFC1p, wFC2p) + bFC2p) 151 | 152 | weights += [wFC2p] 153 | outputs += [pol_pre] 154 | output_nms += ['pol_pre'] 155 | 156 | pol = tf.nn.softmax(pol_pre) 157 | outputs += [pol] 158 | output_nms += ['pol'] 159 | 160 | nn_max_to_coords = tf.argmax(pol_pre, 1, output_type=tf.int32) 161 | nn_prob_to_coords = tf_op.prob_to_coord(pol, dir_pre, dir_a) 162 | nn_prob_to_coords_valid_mvs = tf_op.prob_to_coord_valid_mvs(pol) 163 | nn_max_prob_to_coords_valid_mvs = tf_op.max_prob_to_coord_valid_mvs(pol) 164 | 165 | # move unit 166 | nn_max_move_unit = tf_op.move_unit(nn_max_to_coords, moving_player) 167 | nn_prob_move_unit = tf_op.move_unit(nn_prob_to_coords, moving_player) 168 | nn_prob_move_unit_valid_mvs = tf_op.move_unit(nn_prob_to_coords_valid_mvs, moving_player) 169 | nn_max_prob_move_unit_valid_mvs = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs, moving_player) 170 | 171 | # sq 172 | sq_err = tf.reduce_sum((pol - pol_target)**2, axis=1) 173 | pol_mean_sq_err = tf.reduce_mean(sq_err) 174 | 175 | # sq reg 176 | sq_err_reg = tf.reduce_sum(pol_pre**2, axis=1) 177 | pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg) 178 | 179 | # cross entrop 180 | pol_ln = tf.log(pol) 181 | pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln) 182 | 183 | ################# val 184 | # FC layer 185 | wFC1v = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD), name='val_wFC1') 186 | bFC1v = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='val_bFC1') 187 | 188 | #oFC1v = tf.nn.relu(tf.matmul(convr, wFC1v) + bFC1v) 189 | oFC1v = tf.matmul(convr, wFC1v) + bFC1v 190 | 191 | weights += [wFC1v] 192 | outputs += [oFC1v] 193 | output_nms += ['oFC1v'] 194 | 195 | # FC layer 196 | wFC2v = tf.Variable(tf.random_normal([N_FC1, 1], stddev=WEIGHT_STD), name='val') 197 | bFC2v = tf.Variable(tf.random_normal([1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD), name='val') 198 | 199 | val = tf.tanh(tf.squeeze(tf.matmul(oFC1v, wFC2v) + bFC2v)) 200 | 201 | weights += [wFC2v] 202 | outputs += [val] 203 | output_nms += ['val'] 204 | 205 | # sq error 206 | val_mean_sq_err = tf.reduce_mean((val - val_target)**2) 207 | 208 | # pearson 209 | val_pearsonr = tf_pearsonr(val, val_target) 210 | 211 | ########## FC l2 reg 212 | FC_L2_reg = 0 213 | for weights in [wFC1v, wFC2v, bFC1v, bFC2v, wFC1p, wFC2p, bFC1p, bFC2p]: 214 | FC_L2_reg += tf.reduce_sum(weights**2) 215 | FC_L2_reg *= (L2_LAMBDA/2.) 216 | 217 | ################### movement from tree statistics 218 | visit_count_map = tf.placeholder(tf.float32, shape=(gv.BATCH_SZ, gv.map_szt)) 219 | 220 | tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map, dir_pre, dir_a) 221 | tree_det_visit_coord = tf.argmax(visit_count_map, 1, output_type=tf.int32) 222 | 223 | tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player) 224 | tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player) 225 | 226 | ################### initialize 227 | 228 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 229 | 230 | loss = LSQ_LAMBDA * pol_mean_sq_err + \ 231 | LSQ_REG_LAMBDA * pol_mean_sq_reg_err + \ 232 | POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \ 233 | VAL_LAMBDA * val_mean_sq_err + \ 234 | VALR_LAMBDA * val_pearsonr + \ 235 | tf.losses.get_regularization_loss() + FC_L2_reg 236 | 237 | with tf.control_dependencies(update_ops): 238 | train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss) 239 | 240 | sess.run(tf.global_variables_initializer()) 241 | 242 | # saving 243 | saver = tf.train.Saver() 244 | -------------------------------------------------------------------------------- /architectures/tree_tf_op_kfac.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import sys 4 | sys.path.append("..") 5 | import global_vars as gv 6 | import os 7 | import kfac 8 | sess = tf.InteractiveSession() 9 | 10 | hdir = os.getenv('HOME') 11 | tf_op = tf.load_op_library('cuda_op_kernel.so') 12 | 13 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels] 14 | map_prod = np.prod(gv.map_sz) 15 | 16 | ##################### set / load vars 17 | set_var_int32 = tf.placeholder(tf.int32, shape=[None]) 18 | set_var_int8 = tf.placeholder(tf.int8, shape=[None]) 19 | 20 | gm_var_nms = ['board', 'valid_mv_map_internal'] 21 | 22 | gm_var_placeholders = ['set_var_int8']*2 23 | 24 | gm_vars = {}; set_gm_vars = {} 25 | 26 | def return_vars(): 27 | v = {} 28 | for var in gm_var_nms: 29 | exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var)) 30 | return v 31 | 32 | def set_vars(v): 33 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 34 | exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var)) 35 | 36 | ######################## 37 | 38 | def tf_pearsonr(val, val_target_nmean): 39 | val_nmean = val - tf.reduce_mean(val) 40 | val_target_nmean = val_target - tf.reduce_mean(val_target) 41 | 42 | val_std = tf.sqrt(tf.reduce_sum(val_nmean**2)) 43 | val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2)) 44 | 45 | return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std) 46 | 47 | 48 | def init_model(N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \ 49 | POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, WEIGHT_STD=1e-2): 50 | 51 | global convs, weights, outputs, output_nms, pol, pol_pre, pol_mean_sq_err, train_step 52 | global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 53 | global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map 54 | global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit 55 | global tree_to_coords, nn_max_to_coords, nn_prob_to_coords 56 | global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit 57 | global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord 58 | global sess, imgs, valid_mv_map, pol_target, val_target, moving_player 59 | global gm_vars, set_gm_vars, oFC1, session_restore, session_backup 60 | global winner, dir_pre, dir_a 61 | global games_running, score, n_captures, pol_cross_entrop_err 62 | global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs 63 | global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs 64 | assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES) 65 | 66 | #### init state 67 | layer_collection = kfac.LayerCollection() 68 | init_state = tf_op.init_state() 69 | 70 | dir_pre = tf.placeholder(tf.float32, shape=()) 71 | dir_a = tf.placeholder(tf.float32, shape=()) 72 | 73 | moving_player = tf.placeholder(tf.int32, shape=()) 74 | winner, score, n_captures = tf_op.return_winner(moving_player) 75 | 76 | games_running = tf.ones(gv.BATCH_SZ, dtype=tf.int8) 77 | 78 | session_restore = tf_op.session_restore() 79 | session_backup = tf_op.session_backup() 80 | 81 | ##### vars 82 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 83 | exec('gm_vars["%s"] = tf_op.%s()' % (var, var)) 84 | exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder)) 85 | 86 | #### imgs 87 | imgs, valid_mv_map = tf_op.create_batch(moving_player) 88 | #print imgs.shape, imgs_shape 89 | assert imgs.shape == tf.placeholder(tf.float32, shape=imgs_shape).shape, 'tf op shape not matching global_vars' 90 | move_random_ai = tf_op.move_random_ai(moving_player) 91 | 92 | global move_frm_inputs, to_coords_input 93 | to_coords_input = tf.placeholder(tf.int32, shape=gv.BATCH_SZ) 94 | move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player) 95 | 96 | #### 97 | pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod]) 98 | val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ]) 99 | 100 | convs = []; weights = []; outputs = []; output_nms = [] 101 | 102 | layer = tf.layers.Conv2D(filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 103 | kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), 104 | strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0') 105 | preactivations = layer(imgs) 106 | activations = tf.nn.relu(preactivations) 107 | 108 | layer_collection.register_conv2d((layer.kernel, layer.bias), (1,1,1,1), "SAME", imgs, preactivations) 109 | 110 | convs += [activations] 111 | 112 | for i in range(1, len(N_FILTERS)): 113 | layer = tf.layers.Conv2D(filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2, 114 | kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), 115 | strides=[STRIDES[i]]*2, padding="same", activation=None, name='conv%i' % i) 116 | 117 | preactivations = layer(convs[i-1]) 118 | 119 | layer_collection.register_conv2d((layer.kernel, layer.bias), (1,1,1,1), "SAME", convs[i-1], preactivations) 120 | 121 | # residual bypass 122 | if (i % 2) == 0: 123 | preactivations += convs[i-2] 124 | 125 | activations = tf.nn.relu(preactivations) 126 | convs += [activations] 127 | 128 | out_sz = np.int(np.prod(convs[-1].shape[1:])) 129 | convr = tf.reshape(convs[-1], [gv.BATCH_SZ, out_sz]) 130 | 131 | ################### pol 132 | # FC layer 133 | layer = tf.layers.Dense(N_FC1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='FC1') 134 | preactivations = layer(convr) 135 | oFC1p = tf.nn.relu(preactivations) 136 | 137 | layer_collection.register_fully_connected((layer.kernel, layer.bias), convr, preactivations) 138 | 139 | # FC layer 140 | layer = tf.layers.Dense(map_prod, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='FC2') 141 | preactivations = layer(oFC1p) 142 | pol_pre = tf.nn.relu(preactivations) 143 | 144 | layer_collection.register_fully_connected((layer.kernel, layer.bias), oFC1p, preactivations) 145 | 146 | layer_collection.register_categorical_predictive_distribution(pol_pre) 147 | 148 | pol = tf.nn.softmax(pol_pre) 149 | 150 | nn_max_to_coords = tf.argmax(pol_pre, 1, output_type=tf.int32) 151 | nn_prob_to_coords = tf_op.prob_to_coord(pol, dir_pre, dir_a) 152 | nn_prob_to_coords_valid_mvs = tf_op.prob_to_coord_valid_mvs(pol) 153 | nn_max_prob_to_coords_valid_mvs = tf_op.max_prob_to_coord_valid_mvs(pol) 154 | 155 | # move unit 156 | nn_max_move_unit = tf_op.move_unit(nn_max_to_coords, moving_player) 157 | nn_prob_move_unit = tf_op.move_unit(nn_prob_to_coords, moving_player) 158 | nn_prob_move_unit_valid_mvs = tf_op.move_unit(nn_prob_to_coords_valid_mvs, moving_player) 159 | nn_max_prob_move_unit_valid_mvs = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs, moving_player) 160 | 161 | # sq 162 | sq_err = tf.reduce_sum((pol - pol_target)**2, axis=1) 163 | pol_mean_sq_err = tf.reduce_mean(sq_err) 164 | 165 | # sq reg 166 | sq_err_reg = tf.reduce_sum(pol_pre**2, axis=1) 167 | pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg) 168 | 169 | # cross entrop 170 | pol_ln = tf.log(pol) 171 | pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln) 172 | 173 | global oFC1v, preactivations 174 | ################# val 175 | # FC layer 176 | layer = tf.layers.Dense(N_FC1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='v_FC1') 177 | preactivations = layer(convr) 178 | oFC1v = preactivations 179 | 180 | layer_collection.register_fully_connected((layer.kernel, layer.bias), convr, preactivations) 181 | 182 | # FC layer 183 | layer = tf.layers.Dense(1, kernel_initializer=tf.random_normal_initializer(stddev=WEIGHT_STD), name='v_FC2') 184 | preactivations = layer(oFC1v) 185 | val = tf.squeeze(tf.tanh(preactivations)) 186 | 187 | layer_collection.register_fully_connected((layer.kernel, layer.bias), oFC1v, preactivations) 188 | 189 | layer_collection.register_normal_predictive_distribution(val, var=1.0) 190 | 191 | # sq error 192 | val_mean_sq_err = tf.reduce_mean((val - val_target)**2) 193 | 194 | # pearson 195 | val_pearsonr = tf_pearsonr(val, val_target) 196 | 197 | 198 | ################### movement from tree statistics 199 | visit_count_map = tf.placeholder(tf.float32, shape=(gv.BATCH_SZ, gv.map_szt)) 200 | 201 | tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map, dir_pre, dir_a) 202 | tree_det_visit_coord = tf.argmax(visit_count_map, 1, output_type=tf.int32) 203 | 204 | tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player) 205 | tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player) 206 | 207 | ################### initialize 208 | 209 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 210 | 211 | loss = POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \ 212 | VAL_LAMBDA * val_mean_sq_err 213 | 214 | params = tf.trainable_variables() 215 | grads = tf.gradients(loss, params) 216 | grad_params = list(zip(grads, params)) 217 | 218 | learning_rate = .25 219 | damping_lambda = .01 220 | moving_avg_decay=.99 221 | kfac_norm_constraint = .0001 222 | kfac_momentum = .9 223 | 224 | optimizer = kfac.optimizer.KfacOptimizer(layer_collection=layer_collection, damping=damping_lambda, 225 | learning_rate=EPS, cov_ema_decay=moving_avg_decay, 226 | momentum=kfac_momentum, norm_constraint=kfac_norm_constraint) 227 | 228 | train_step = optimizer.apply_gradients(grad_params) 229 | 230 | 231 | #with tf.control_dependencies(update_ops): 232 | # #train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss) 233 | # train_step = tf.train.GradientDescentOptimizer(EPS).minimize(loss) 234 | 235 | sess.run(tf.global_variables_initializer()) 236 | 237 | # saving 238 | saver = tf.train.Saver() 239 | -------------------------------------------------------------------------------- /architectures/tree_tf_op_multi.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import sys 4 | sys.path.append("..") 5 | import global_vars as gv 6 | import os 7 | 8 | hdir = os.getenv('HOME') 9 | 10 | imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels] 11 | map_prod = np.prod(gv.map_sz) 12 | 13 | gm_var_nms = ['board', 'valid_mv_map_internal'] 14 | gm_var_placeholders = ['set_var_int8']*2 15 | 16 | gm_vars = {}; set_gm_vars = {} 17 | 18 | def return_vars(): 19 | v = {} 20 | with tf.device(DEVICE): 21 | for var in gm_var_nms: 22 | exec('v["%s"] = sess.run(gm_vars["%s"])' % (var, var)) 23 | return v 24 | 25 | def set_vars(v): 26 | with tf.device(DEVICE): 27 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 28 | exec('sess.run(set_gm_vars["%s"], feed_dict={%s: v["%s"].ravel()})' % (var, placeholder, var)) 29 | 30 | def tf_pearsonr(val, val_target_nmean): 31 | val_nmean = val - tf.reduce_mean(val) 32 | val_target_nmean = val_target - tf.reduce_mean(val_target) 33 | 34 | val_std = tf.sqrt(tf.reduce_sum(val_nmean**2)) 35 | val_target_std = tf.sqrt(tf.reduce_sum(val_target_nmean**2)) 36 | 37 | return -tf.reduce_sum(val_nmean * val_target_nmean) / (val_std * val_target_std) 38 | 39 | 40 | # the `training` input dictates whether batch norm statistics are updated 41 | def init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, \ 42 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, 43 | VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, WEIGHT_STD=1e-2, training=True): 44 | 45 | global sess, tf_op, set_var_int32, set_var_int8 46 | global convs, weights, output_nms, pol, pol_pre, pol_mean_sq_err, train_step 47 | global val, val_mean_sq_err, pol_loss, entrop, saver, update_ops 48 | global val_pearsonr, pol_mean_sq_reg_err, loss, Q_map, P_map, visit_count_map 49 | global move_random_ai, init_state, nn_move_unit, nn_prob_move_unit 50 | global tree_to_coords, nn_max_to_coords, nn_prob_to_coords 51 | global tree_prob_move_unit, backup_visit, backup_visit_terminal, tree_det_move_unit 52 | global nn_prob_move_unit, nn_max_move_unit, tree_prob_visit_coord, tree_det_visit_coord 53 | global sess, imgs, imgs32, valid_mv_map, pol_target, val_target, moving_player 54 | global gm_vars, set_gm_vars, oFC1, session_restore, session_backup 55 | global winner, to_coords_input 56 | global score, n_captures, pol_cross_entrop_err 57 | global nn_prob_to_coords_valid_mvs, nn_max_prob_to_coords_valid_mvs 58 | global nn_prob_move_unit_valid_mvs, nn_max_prob_move_unit_valid_mvs 59 | global move_frm_inputs 60 | assert len(N_FILTERS) == len(FILTER_SZS) == len(STRIDES) 61 | 62 | imgs = {}; valid_mv_map = {} 63 | move_random_ai = {} 64 | 65 | convs = {}; weights = {}; output_nms = {} 66 | pol = {}; pol_pre = {}; val = {} 67 | nn_max_to_coords = {}; nn_prob_to_coords = {}; nn_prob_to_coords_valid_mvs = {} 68 | nn_max_prob_to_coords_valid_mvs = {} 69 | nn_max_move_unit = {}; nn_prob_move_unit = {}; nn_prob_move_unit_valid_mvs = {} 70 | nn_max_prob_move_unit_valid_mvs = {} 71 | 72 | with tf.device(DEVICE): 73 | sess = tf.InteractiveSession() 74 | if DEVICE == '/gpu:0': 75 | tf_op = tf.load_op_library('cuda_op_kernel_75.so') 76 | else: 77 | tf_op = tf.load_op_library('cuda_op_kernel_52.so') 78 | 79 | ##################### set / load vars 80 | set_var_int32 = tf.placeholder(tf.int32, shape=[None]) 81 | set_var_int8 = tf.placeholder(tf.int8, shape=[None]) 82 | 83 | #### init state 84 | init_state = tf_op.init_state() 85 | 86 | moving_player = tf.placeholder(tf.int8, shape=()) 87 | winner, score, n_captures = tf_op.return_winner(moving_player) 88 | 89 | visit_count_map = tf.placeholder(tf.float16, shape=(gv.BATCH_SZ, gv.map_szt)) # map of visits 90 | to_coords_input = tf.placeholder(tf.int16, shape=gv.BATCH_SZ) # simply the coordinates 91 | 92 | pol_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ, map_prod]) 93 | val_target = tf.placeholder(tf.float32, shape=[gv.BATCH_SZ]) 94 | 95 | session_restore = tf_op.session_restore() 96 | session_backup = tf_op.session_backup() 97 | 98 | ##### vars 99 | for var, placeholder in zip(gm_var_nms, gm_var_placeholders): 100 | exec('gm_vars["%s"] = tf_op.%s()' % (var, var)) 101 | exec('set_gm_vars["%s"] = tf_op.set_%s(%s)' % (var, var, placeholder)) 102 | 103 | #### imgs 104 | imgs, valid_mv_map = tf_op.create_batch(moving_player) # output is float16 105 | imgs32 = tf.cast(imgs, tf.float32) 106 | assert imgs.shape == tf.placeholder(tf.float16, shape=imgs_shape).shape, 'tf op shape not matching global_vars' 107 | move_random_ai = tf_op.move_random_ai(moving_player) 108 | 109 | move_frm_inputs = tf_op.move_unit(to_coords_input, moving_player) # deterministically move from input coordinates 110 | 111 | ################### movement from tree statistics (must be supplied--these are placeholders) 112 | tree_prob_visit_coord = tf_op.prob_to_coord(visit_count_map) 113 | tree_det_visit_coord = tf.cast(tf.argmax(visit_count_map, 1, output_type=tf.int32), tf.int16) 114 | 115 | tree_det_move_unit = tf_op.move_unit(tree_det_visit_coord, moving_player) 116 | tree_prob_move_unit = tf_op.move_unit(tree_prob_visit_coord, moving_player) 117 | 118 | ############ specifics of how 3 networks will be initialized (on each card) 119 | scopes = ['eval', 'main', 'eval32'] 120 | dtypes = ['float16', 'float16', 'float32'] 121 | if training: 122 | #trainings = [False, False, True] 123 | trainings = [True, True, True] 124 | else: 125 | trainings = [False, False, False] 126 | 127 | for s in scopes: 128 | convs[s] = []; weights[s] = []; output_nms[s] = [] 129 | 130 | ################ network (f32 and f16 weights) 131 | for s, d, t in zip(scopes, dtypes, trainings): 132 | with tf.variable_scope(s): 133 | if s == 'eval32': 134 | # conv2d: "channels_last (default) corresponds to inputs with shape (batch, height, width, channels)" 135 | # https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/layers/Conv2D 136 | 137 | # batch_norm: "Can be used as a normalizer function for conv2d and fully_connected. The normalization 138 | # is over all but the last dimension if data_format is NHWC (default)" 139 | # https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/contrib/layers/batch_norm 140 | convs[s] += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs32, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 141 | strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0', 142 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA), 143 | bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t))] 144 | else: # float16 models 145 | convs[s] += [tf.nn.relu(tf.contrib.layers.batch_norm(tf.layers.conv2d(inputs=imgs, filters=N_FILTERS[0], kernel_size=[FILTER_SZS[0]]*2, 146 | strides=[STRIDES[0]]*2, padding="same", activation=None, name='conv0', 147 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA), 148 | bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t))] 149 | 150 | weights[s] += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, s + '/conv0')[0]] 151 | output_nms[s] += ['conv0'] 152 | 153 | # convolutional layers 154 | for i in range(1, len(N_FILTERS)): 155 | output_nms[s] += ['conv' + str(i)] 156 | 157 | conv_out = tf.contrib.layers.batch_norm(\ 158 | tf.layers.conv2d(inputs=convs[s][i-1], filters=N_FILTERS[i], kernel_size=[FILTER_SZS[i]]*2, 159 | strides=[STRIDES[i]]*2, padding="same", activation=None, name=output_nms[s][-1], 160 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA), 161 | bias_regularizer=tf.contrib.layers.l2_regularizer(scale=L2_LAMBDA)), is_training=t) 162 | 163 | # residual bypass 164 | if (i % 2) == 0: 165 | conv_out += convs[s][i-2] 166 | 167 | convs[s] += [tf.nn.relu(conv_out)] 168 | 169 | weights[s] += [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, s + '/' + output_nms[s][-1])[0]] 170 | 171 | out_sz = np.int(np.prod(convs[s][-1].shape[1:])) 172 | convr = tf.reshape(convs[s][-1], [gv.BATCH_SZ, out_sz]) 173 | 174 | ################### policy output head (pol) 175 | # FC layer 176 | wFC1p = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD, dtype=d), name='wFC1') 177 | bFC1p = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='bFC1') 178 | 179 | oFC1p = tf.nn.relu(tf.matmul(convr, wFC1p) + bFC1p) 180 | 181 | weights[s] += [wFC1p] 182 | output_nms[s] += ['oFC1p'] 183 | 184 | # FC layer 185 | wFC2p = tf.Variable(tf.random_normal([N_FC1, map_prod], stddev=WEIGHT_STD, dtype=d), name='wFC2') 186 | bFC2p = tf.Variable(tf.random_normal([map_prod], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='bFC2') 187 | 188 | pol_pre[s] = tf.nn.relu(tf.matmul(oFC1p, wFC2p) + bFC2p) 189 | 190 | weights[s] += [wFC2p] 191 | output_nms[s] += ['pol_pre'] 192 | 193 | pol[s] = tf.nn.softmax(pol_pre[s]) 194 | output_nms[s] += ['pol'] 195 | 196 | #if s != 'eval32': 197 | nn_max_to_coords[s] = tf.cast(tf.argmax(pol_pre[s], 1, output_type=tf.int32), 'int16') 198 | if s == 'eval32': 199 | pol16 = tf.cast(pol[s], tf.float16) 200 | nn_prob_to_coords[s] = tf_op.prob_to_coord(pol16) 201 | nn_prob_to_coords_valid_mvs[s] = tf_op.prob_to_coord_valid_mvs(pol16) 202 | nn_max_prob_to_coords_valid_mvs[s] = tf_op.max_prob_to_coord_valid_mvs(pol16) 203 | 204 | else: 205 | nn_prob_to_coords[s] = tf_op.prob_to_coord(pol[s]) 206 | nn_prob_to_coords_valid_mvs[s] = tf_op.prob_to_coord_valid_mvs(pol[s]) 207 | nn_max_prob_to_coords_valid_mvs[s] = tf_op.max_prob_to_coord_valid_mvs(pol[s]) 208 | 209 | ####### move unit 210 | # (these take as input coordinates and return flags indicating if movement was possible for each game) 211 | nn_max_move_unit[s] = tf_op.move_unit(nn_max_to_coords[s], moving_player) 212 | nn_prob_move_unit[s] = tf_op.move_unit(nn_prob_to_coords[s], moving_player) 213 | nn_prob_move_unit_valid_mvs[s] = tf_op.move_unit(nn_prob_to_coords_valid_mvs[s], moving_player) 214 | nn_max_prob_move_unit_valid_mvs[s] = tf_op.move_unit(nn_max_prob_to_coords_valid_mvs[s], moving_player) 215 | 216 | ################# value output head (val) 217 | # FC layer 218 | wFC1v = tf.Variable(tf.random_normal([out_sz, N_FC1], stddev=WEIGHT_STD, dtype=d), name='val_wFC1') 219 | bFC1v = tf.Variable(tf.random_normal([N_FC1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='val_bFC1') 220 | 221 | #oFC1v = tf.nn.relu(tf.matmul(convr, wFC1v) + bFC1v) 222 | oFC1v = tf.matmul(convr, wFC1v) + bFC1v 223 | 224 | weights[s] += [wFC1v] 225 | output_nms[s] += ['oFC1v'] 226 | 227 | # FC layer 228 | wFC2v = tf.Variable(tf.random_normal([N_FC1, 1], stddev=WEIGHT_STD, dtype=d), name='val') 229 | bFC2v = tf.Variable(tf.random_normal([1], mean=WEIGHT_STD*2, stddev=WEIGHT_STD, dtype=d), name='val') 230 | 231 | val[s] = tf.tanh(tf.squeeze(tf.matmul(oFC1v, wFC2v) + bFC2v)) 232 | 233 | weights[s] += [wFC2v] 234 | output_nms[s] += ['val'] 235 | 236 | 237 | ################### initialize loss 238 | if s == 'eval32': 239 | ########## FC l2 reg 240 | FC_L2_reg = 0 241 | for t_weights in [wFC1v, wFC2v, bFC1v, bFC2v, wFC1p, wFC2p, bFC1p, bFC2p]: 242 | FC_L2_reg += tf.reduce_sum(t_weights**2) 243 | FC_L2_reg *= (L2_LAMBDA/2.) 244 | 245 | ##### pol 246 | # sq 247 | sq_err = tf.reduce_sum((pol[s] - pol_target)**2, axis=1) 248 | pol_mean_sq_err = tf.reduce_mean(sq_err) 249 | 250 | # sq reg 251 | sq_err_reg = tf.reduce_sum(pol_pre[s]**2, axis=1) 252 | pol_mean_sq_reg_err = tf.reduce_mean(sq_err_reg) 253 | 254 | # cross entrop 255 | pol_ln = tf.log(pol[s]) 256 | pol_cross_entrop_err = -tf.reduce_mean(pol_target*pol_ln) 257 | 258 | #### val 259 | # sq error 260 | val_mean_sq_err = tf.reduce_mean((val[s] - val_target)**2) 261 | 262 | # pearson 263 | val_pearsonr = tf_pearsonr(val[s], val_target) 264 | 265 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=s) 266 | 267 | loss = LSQ_LAMBDA * pol_mean_sq_err + \ 268 | LSQ_REG_LAMBDA * pol_mean_sq_reg_err + \ 269 | POL_CROSS_ENTROP_LAMBDA * pol_cross_entrop_err + \ 270 | VAL_LAMBDA * val_mean_sq_err + \ 271 | VALR_LAMBDA * val_pearsonr + \ 272 | tf.losses.get_regularization_loss(s) + FC_L2_reg 273 | 274 | with tf.control_dependencies(update_ops): 275 | train_step = tf.train.MomentumOptimizer(EPS, MOMENTUM).minimize(loss) 276 | 277 | sess.run(tf.global_variables_initializer()) 278 | 279 | # saving 280 | saver = tf.train.Saver() 281 | -------------------------------------------------------------------------------- /bp_tree.py: -------------------------------------------------------------------------------- 1 | # ------------ 2 | # model copies: 3 | # ------------ 4 | # eval32: model to run bp on, the model which all others are eventually updated to 5 | # eval: float16 versions of `eval32`. updated to follow backprop (the `eval32` model) 6 | # main: older version of `eval` that `eval` model must win against with certainty p < .05 7 | # Once the benchmark is reached, `main` is updated to `eval32`. 8 | # `main` is used to create all training batches 9 | 10 | import os.path 11 | import pygame 12 | import scipy.stats 13 | import copy 14 | import random 15 | import multiprocessing as mp 16 | import time 17 | import numpy as np 18 | import tensorflow as tf 19 | import global_vars as gv 20 | from datetime import datetime 21 | import architectures.tree_tf_op_multi as arch # the tensorflow model definitions 22 | import py_util.py_util as pu # operates and stores the move branching tree 23 | import gnu_go_test as gt # playing against gnu go 24 | from colorama import Fore, Style 25 | sdir = 'models/' # directory to save and load models 26 | 27 | ################################### configuration: 28 | #### load previous model or start from scratch? (set save_nm = None if you want to start from scratch, i.e, create a new model) 29 | #save_nm = None 30 | save_nm = 'go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy' 31 | 32 | if True: # run on two gpus 33 | MASTER_WORKER = 0 34 | GPU_LIST = [0,1] # gpu card ids 35 | else: # run on one gpu only 36 | MASTER_WORKER = 1 37 | GPU_LIST = [1] 38 | 39 | ###### variables to save 40 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA', 'N_REP_TRAIN', 41 | 'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 'N_TURNS', 'CPUCT', 42 | 'N_EVAL_NN_GMS', 'N_EVAL_NN_GNU_GMS', 'N_EVAL_TREE_GMS', 'N_EVAL_TREE_GNU_GMS', 'CHKP_FREQ', 'BUFFER_SZ', 'N_BATCH_SETS_MIN', 'N_BATCH_SETS_BLOCK', 'N_BATCH_SETS_TOTAL', 43 | 'save_nm', 'start_time', 'EVAL_FREQ', 'boards', 'scores', 'GATE_THRESH', 'N_GATE_BATCH_SETS'] 44 | 45 | training_ex_vars = ['board', 'winner', 'tree_probs', 'batch_set', 'batch_sets_created', 'batch_sets_created_total', 'buffer_loc'] 46 | 47 | logs = ['val_mean_sq_err', 'pol_cross_entrop', 'pol_max_pre', 'pol_max', 'val_pearsonr','opt_batch','eval_batch', 48 | 'self_eval_win_rate', 'model_promoted', 'self_eval_perc'] 49 | print_logs = ['val_mean_sq_err', 'pol_cross_entrop', 'pol_max', 'val_pearsonr'] 50 | 51 | for nm in ['tree', 'nn']: 52 | for suffix in ['', '_gnu']: 53 | for key in ['win', 'n_captures', 'n_captures_opp', 'score', 'n_mvs', 'boards']: 54 | logs += ['%s_%s%s' % (key, nm, suffix)] 55 | 56 | state_vars = ['log', 'run_time', 'global_batch', 'global_batch_saved', 'global_batch_evald', 'save_counter','boards', 'save_t'] # updated each save 57 | 58 | ########################################## 59 | def ret_d(player): # return dictionary for input into tensorflow 60 | return {arch.moving_player: player} 61 | 62 | # simulate making moves (i.e., use the tree search) 63 | # `scopes` controls which models to use (and their ordering of who plays first) 64 | def run_sim(turn, starting_player, scopes=['main', 'main']): 65 | arch.sess.run(arch.session_backup) 66 | pu.session_backup() 67 | 68 | for sim in range(N_SIM): 69 | # backup then make next move 70 | for turn_sim in range(turn, N_TURNS+1): 71 | for player, s in zip([0,1], scopes): 72 | if turn_sim == turn and starting_player == 1 and player == 0: # skip player 0, has already moved 73 | continue 74 | 75 | # get valid moves, network policy and value estimates: 76 | valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[s], arch.val[s]], feed_dict=ret_d(player)) 77 | 78 | # backup visit Q values 79 | if turn_sim != turn: 80 | pu.backup_visit(player, np.array(val, dtype='single')) 81 | 82 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 83 | to_coords = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree) 84 | pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree 85 | 86 | arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars) 87 | 88 | ############ backup terminal state 89 | winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single') 90 | 91 | # update tree with values (outcomes) of each game) 92 | pu.backup_visit(0, winner) 93 | pu.backup_visit(1, -winner) 94 | 95 | # return move back to previous node in tree 96 | arch.sess.run(arch.session_restore) # reset gpu game state 97 | pu.session_restore() # reset cpu tree state 98 | 99 | 100 | #################################### 101 | shared_nms = ['buffer_loc', 'batch_sets_created', 'batch_sets_created_total', 'batch_set', 's_board', 's_winner', 's_tree_probs', 'weights_changed', 'buffer_lock', 'weights_lock', 'save_nm', 'new_model', 'weights', 'weights_eval',\ 102 | 'eval_games_won', 'eval_batch_sets_played', 'eval_stats_lock', 'scope_next', 'eval_batch_sets_main_first'] 103 | # ^ update sv() to handle shared variables 104 | 105 | def init(i_buffer_loc, i_batch_sets_created, i_batch_sets_created_total, i_batch_set, i_s_board, i_s_winner, i_s_tree_probs, i_weights_changed, i_buffer_lock, i_weights_lock, i_save_nm, i_new_model, i_weights, i_weights_eval, i_eval_games_won, i_eval_batch_sets_played, i_eval_stats_lock, i_scope_next, i_eval_batch_sets_main_first): 106 | for nm in shared_nms: 107 | exec('global ' + nm) 108 | exec('%s = i_%s' % (nm, nm)) 109 | 110 | ##################################################################################################################### 111 | def worker_save_shapes(i): 112 | #### restore 113 | save_d = np.load(sdir + save_nm, allow_pickle=True).item() 114 | 115 | for key in save_vars + state_vars + training_ex_vars: 116 | if (key == 'save_nm') or (key in shared_nms): 117 | continue 118 | exec('%s = save_d["%s"]' % (key,key)) 119 | 120 | ############# init / load model 121 | DEVICE = '/gpu:%i' % i 122 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, 123 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA) 124 | 125 | 126 | weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='main') 127 | weights_l = [] 128 | for k in range(len(weights)): 129 | weights_l.append(tuple(weights[k].shape.as_list())) 130 | 131 | return weights_l 132 | 133 | # worker: sets weights from shared variables if they've been updated by the master worker 134 | def set_weights(): 135 | if WORKER_ID == MASTER_WORKER: # return if we are the master worker 136 | return False 137 | 138 | with weights_lock: 139 | if weights_changed.value == 0: # weights haven't been changed 140 | return False 141 | 142 | for i in range(len(weights_current)): 143 | # set `main` model copy from shared weights 144 | w = np.frombuffer(weights[i].get_obj(), 'float16') 145 | w = w.reshape(tuple(weights_current[i].shape.as_list())) 146 | weights_current[i].load(w) 147 | 148 | # set `eval` model copy from shared weights 149 | w = np.frombuffer(weights_eval[i].get_obj(), 'float16') 150 | w = w.reshape(tuple(weights_eval_current[i].shape.as_list())) 151 | weights_eval_current[i].load(w) 152 | 153 | weights_changed.value = 0 154 | return True 155 | 156 | # master: set shared variables to values loaded from restore file 157 | # (the values were from the checkpoint into the tensorflow variables) 158 | # (this is only done once ever -- once model training is started for 1st time) 159 | def set_all_shared_to_loaded(): 160 | assert WORKER_ID == MASTER_WORKER # only the master worker should do this 161 | with weights_lock: 162 | weights_current_vals = arch.sess.run(weights_current) # `main` from tf 163 | weights_eval_current_vals = arch.sess.run(weights_eval_current) 164 | 165 | for i in range(len(weights_current)): 166 | # set `main` shared variables = `main` from tf 167 | w = np.frombuffer(weights[i].get_obj(), 'float16') 168 | w[:] = weights_current_vals[i].ravel() 169 | 170 | # set `eval` shared variables = `main` from tf 171 | w = np.frombuffer(weights_eval[i].get_obj(), 'float16') 172 | w[:] = weights_eval_current_vals[i].ravel() 173 | 174 | # master: set shared variables `main` and `eval` (and tf vars) from current tensorflow copy of eval32 175 | def set_all_to_eval32_and_get(): 176 | assert WORKER_ID == MASTER_WORKER # only the master worker should do this 177 | with weights_lock: 178 | weights_eval32_current_vals = arch.sess.run(weights_eval32_current) # `eval32` from tf 179 | 180 | for i in range(len(weights_current)): 181 | # set `main` shared variables = `eval32` from tf 182 | w = np.frombuffer(weights[i].get_obj(), 'float16') 183 | w[:] = weights_eval32_current_vals[i].ravel() 184 | 185 | # set `eval` shared variables = `eval32` from tf 186 | w = np.frombuffer(weights_eval[i].get_obj(), 'float16') 187 | w[:] = weights_eval32_current_vals[i].ravel() 188 | 189 | # update tf copy 190 | weights_current[i].load(weights_eval32_current_vals[i]) # `main` 191 | weights_eval_current[i].load(weights_eval32_current_vals[i]) # `eval` 192 | 193 | weights_changed.value = 1 194 | 195 | # master: update `eval` to values from backprop (current `eval32` tf weights) 196 | def set_eval16_to_eval32_start_eval(): 197 | assert WORKER_ID == MASTER_WORKER 198 | with weights_lock and eval_stats_lock: 199 | weights_eval32_current_vals = arch.sess.run(weights_eval32_current) # `eval32` from tf 200 | 201 | for i in range(len(weights_current)): 202 | # set `eval` shared variables = `eval32` from tf 203 | w = np.frombuffer(weights_eval[i].get_obj(), 'float16') 204 | w[:] = weights_eval32_current_vals[i].ravel() 205 | 206 | # update tensorflow `eval` model = `eval32` 207 | weights_eval_current[i].load(weights_eval32_current_vals[i]) 208 | 209 | weights_changed.value = 1 210 | eval_games_won.value = 0 211 | eval_batch_sets_played.value = 0 212 | scope_next.value = 0 213 | eval_batch_sets_main_first.value = 0 214 | 215 | 216 | def print_eval_stats(): 217 | p_val = scipy.stats.binom_test(eval_games_won.value, eval_batch_sets_played.value*gv.BATCH_SZ, alternative='greater') 218 | model_outperforms = p_val < .05 219 | perc = 100*np.single(eval_games_won.value)/(eval_batch_sets_played.value * gv.BATCH_SZ) 220 | pstr = 'eval wins %i' % eval_games_won.value 221 | pstr += ' sets played %i' % eval_batch_sets_played.value 222 | pstr += ' percent %1.2f' % perc 223 | pstr += ' p %1.3f' % p_val 224 | pstr += ' pass %i' % model_outperforms 225 | print pstr 226 | return model_outperforms, perc 227 | 228 | # plays 2*N_GATE_BATCH_SETS rounds of batches, ensuring ordering of eval and main are balanced 229 | # will also terminate at end of current batch eval if N_GATE_BATCH_SETS+1 have been played 230 | # scope_next: alternates between 0,1 at start of each new batch set. to order which player goes first 231 | def eval_model(): 232 | set_weights() 233 | 234 | while True: 235 | arch.sess.run(arch.init_state) 236 | pu.init_tree() 237 | turn_start_t = time.time() 238 | 239 | ### choose order 240 | with eval_stats_lock: 241 | if scope_next.value == 0: 242 | scopes = ['main', 'eval'] 243 | else: 244 | scopes = ['eval', 'main'] 245 | 246 | scope_next.value = 1 - scope_next.value 247 | 248 | scopes = np.asarray(scopes) 249 | 250 | for turn in range(N_TURNS): 251 | ### make move 252 | for player, s in zip([0,1], scopes): 253 | if eval_batch_sets_played.value >= (2*N_GATE_BATCH_SETS): 254 | return # finished 255 | 256 | run_sim(turn, player, scopes=scopes) 257 | 258 | valid_mv_map, pol = arch.sess.run([arch.valid_mv_map, arch.pol[s]], feed_dict = ret_d(player)) # generate batch and valid moves 259 | 260 | ######### 261 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 262 | visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited 263 | 264 | to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 265 | arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts 266 | 267 | pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree 268 | 269 | pu.prune_tree(0) 270 | 271 | if (turn+1) % 2 == 0: 272 | print 'eval finished turn %i (%i sec) GPU %i eval_batch_sets_played %i' % (turn, time.time() - turn_start_t, WORKER_ID, eval_batch_sets_played.value) 273 | 274 | 275 | with eval_stats_lock: 276 | # do not add any more stats for these conditions 277 | if eval_batch_sets_main_first.value >= N_GATE_BATCH_SETS and scopes[0] == 'main': 278 | continue 279 | if (eval_batch_sets_played.value - eval_batch_sets_main_first.value) >= N_GATE_BATCH_SETS and scopes[0] == 'eval': 280 | continue 281 | 282 | eval_player = np.nonzero(scopes == 'eval')[0][0] 283 | res = arch.sess.run(arch.winner, feed_dict={arch.moving_player: eval_player}) 284 | print 'ties', (res == 0).sum(), 'wins', (res == 1).sum(), 'rate %2.3f' % ((res == 1).sum()/np.single(gv.BATCH_SZ)), 'opp wins', (res == -1).sum(), scopes 285 | eval_games_won.value += np.int((res == 1).sum()) 286 | eval_batch_sets_played.value += 1 287 | eval_batch_sets_main_first.value += int(scopes[0] == 'main') 288 | print_eval_stats() 289 | 290 | 291 | def worker(i_WORKER_ID): 292 | global WORKER_ID, weights_current, weights_eval_current, weights_eval32_current, val_mean_sq_err, pol_cross_entrop_err, val_pearsonr 293 | global board, winner, tree_probs, save_d, bp_eval_nodes, t_start, run_time, save_nm 294 | WORKER_ID = i_WORKER_ID 295 | 296 | err_denom = 0; val_pearsonr = 0 297 | val_mean_sq_err = 0; pol_cross_entrop_err = 0; 298 | t_start = datetime.now() 299 | run_time = datetime.now() - datetime.now() 300 | 301 | #### restore 302 | save_d = np.load(sdir + save_nm, allow_pickle=True).item() 303 | 304 | for key in save_vars + state_vars + training_ex_vars: 305 | if (key == 'save_nm') or (key in shared_nms): 306 | continue 307 | exec('global ' + key) 308 | exec('%s = save_d["%s"]' % (key,key)) 309 | 310 | EPS_ORIG = EPS 311 | #EPS = 2e-3 ###################################################### < overrides previous backprop step sizes 312 | 313 | ############# init / load model 314 | DEVICE = '/gpu:%i' % WORKER_ID 315 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, 316 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA) 317 | 318 | bp_eval_nodes = [arch.train_step, arch.val_mean_sq_err, arch.pol_cross_entrop_err, arch.val_pearsonr] 319 | 320 | # ops for trainable weights 321 | weights_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='main') 322 | weights_eval_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='eval/') 323 | weights_eval32_current = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='eval32') 324 | 325 | if new_model == False: 326 | print 'restore nm %s' % save_nm 327 | arch.saver.restore(arch.sess, sdir + save_nm) 328 | if WORKER_ID == MASTER_WORKER: 329 | set_all_shared_to_loaded() 330 | else: #### sync model weights 331 | if WORKER_ID == MASTER_WORKER: 332 | set_all_to_eval32_and_get() 333 | else: 334 | while set_weights() == False: # wait for weights to be set 335 | continue 336 | ###### shared variables 337 | board = np.frombuffer(s_board.get_obj(), 'float16').reshape((BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels)) 338 | winner = np.frombuffer(s_winner.get_obj(), 'int8').reshape((N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ)) 339 | tree_probs = np.frombuffer(s_tree_probs.get_obj(), 'float32').reshape((BUFFER_SZ, gv.map_szt)) 340 | 341 | ######## local variables 342 | # BUFFER_SZ = N_BATCH_SETS * N_TURNS * 2 * gv.BATCH_SZ 343 | L_BUFFER_SZ = N_TURNS * 2 * gv.BATCH_SZ 344 | board_local = np.zeros((L_BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels), dtype='float16') 345 | winner_local = np.zeros((N_TURNS, 2, gv.BATCH_SZ), dtype='int8') 346 | tree_probs_local = np.zeros((L_BUFFER_SZ, gv.map_szt), dtype='float32') 347 | 348 | if EPS_ORIG != EPS: 349 | #save_nm += 'EPS_%2.4f.npy' % EPS 350 | save_d['EPS'] = EPS 351 | print 'saving to', save_nm 352 | 353 | ### sound 354 | if WORKER_ID == MASTER_WORKER: 355 | pygame.init() 356 | pygame.mixer.music.load('/home/tapa/gtr-nylon22.mp3') 357 | 358 | ###### 359 | while True: 360 | #### generate training batches with `main` model 361 | arch.sess.run(arch.init_state) 362 | pu.init_tree() 363 | turn_start_t = time.time() 364 | buffer_loc_local = 0 365 | for turn in range(N_TURNS): 366 | ### make move 367 | for player in [0,1]: 368 | set_weights() 369 | run_sim(turn, player) # using `main` model 370 | 371 | inds = buffer_loc_local + np.arange(gv.BATCH_SZ) # inds to save training vars at 372 | board_local[inds], valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol['main']], feed_dict = ret_d(player)) # generate batch and valid moves 373 | 374 | ######### 375 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 376 | visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited 377 | 378 | tree_probs_local[inds] = visit_count_map / visit_count_map.sum(1)[:,np.newaxis] 379 | 380 | to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 381 | arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts 382 | 383 | pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree 384 | 385 | ############### 386 | 387 | buffer_loc_local += gv.BATCH_SZ 388 | 389 | pu.prune_tree(0) 390 | 391 | if (turn+1) % 2 == 0: 392 | print 'finished turn %i (%i sec) GPU %i batch_sets_created %i (total %i)' % (turn, time.time() - turn_start_t, WORKER_ID, batch_sets_created.value, batch_sets_created_total.value) 393 | 394 | ##### create prob maps 395 | for player in [0,1]: 396 | winner_local[:, player] = arch.sess.run(arch.winner, feed_dict={arch.moving_player: player}) 397 | 398 | #### set shared buffers with training variables we just generated from self-play 399 | with buffer_lock: 400 | board[buffer_loc.value:buffer_loc.value + buffer_loc_local] = board_local 401 | tree_probs[buffer_loc.value:buffer_loc.value + buffer_loc_local] = tree_probs_local 402 | winner[batch_set.value] = winner_local 403 | 404 | buffer_loc.value += buffer_loc_local 405 | batch_sets_created.value += 1 406 | batch_sets_created_total.value += 1 407 | batch_set.value += 1 408 | 409 | # save checkpoint 410 | if buffer_loc.value >= BUFFER_SZ or batch_set.value >= N_BATCH_SETS_TOTAL: 411 | buffer_loc.value = 0 412 | batch_set.value = 0 413 | 414 | # save batch only 415 | batch_d = {} 416 | for key in ['tree_probs', 'winner', 'board']: 417 | exec('batch_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))' % (key, key)) 418 | batch_save_nm = sdir + save_nm + '_batches' + str(batch_sets_created_total.value) 419 | np.save(batch_save_nm, batch_d) 420 | print 'saved', batch_save_nm 421 | batch_d = {} 422 | 423 | 424 | ################ train/eval/test 425 | if WORKER_ID == MASTER_WORKER and batch_sets_created.value >= N_BATCH_SETS_BLOCK and batch_sets_created_total.value >= N_BATCH_SETS_MIN: 426 | ########### train 427 | with buffer_lock: 428 | if batch_sets_created_total.value < (N_BATCH_SETS_MIN + N_BATCH_SETS_BLOCK): # don't overtrain on the initial set 429 | batch_sets_created.value = N_BATCH_SETS_BLOCK 430 | 431 | if batch_sets_created.value >= N_BATCH_SETS_TOTAL: # if for some reason master worker gets delayed 432 | batch_sets_created.value = N_BATCH_SETS_BLOCK 433 | 434 | board_c = np.array(board, dtype='single') 435 | winner_rc = np.array(winner.ravel(), dtype='single') 436 | 437 | valid_entries = np.prod(np.isnan(tree_probs) == False, 1) * np.nansum(tree_probs, 1) # remove examples with nans or no probabilties 438 | inds_valid = np.nonzero(valid_entries)[0] 439 | print len(inds_valid), 'out of', BUFFER_SZ, 'valid training examples' 440 | 441 | for rep in range(N_REP_TRAIN): 442 | random.shuffle(inds_valid) 443 | for batch in range(N_TURNS * batch_sets_created.value): 444 | inds = inds_valid[batch*gv.BATCH_SZ + np.arange(gv.BATCH_SZ)] 445 | 446 | board2, tree_probs2 = pu.rotate_reflect_imgs(board_c[inds], tree_probs[inds]) # rotate and reflect board randomly 447 | 448 | train_dict = {arch.imgs32: board2, 449 | arch.pol_target: tree_probs2, 450 | arch.val_target: winner_rc[inds]} 451 | 452 | val_mean_sq_err_tmp, pol_cross_entrop_err_tmp, val_pearsonr_tmp = \ 453 | arch.sess.run(bp_eval_nodes, feed_dict=train_dict)[1:] 454 | 455 | # update logs 456 | val_mean_sq_err += val_mean_sq_err_tmp 457 | pol_cross_entrop_err += pol_cross_entrop_err_tmp 458 | val_pearsonr += val_pearsonr_tmp 459 | global_batch += 1 460 | err_denom += 1 461 | 462 | batch_sets_created.value = 0 463 | 464 | ############### `eval` against prior version of self (`main`) 465 | set_eval16_to_eval32_start_eval() # update `eval` tf and shared copies to follow backprop (`eval32`) 466 | eval_model() # run match(es) 467 | with eval_stats_lock: 468 | print '-------------------' 469 | model_outperforms, self_eval_perc = print_eval_stats() 470 | print '------------------' 471 | if model_outperforms: # update `eval` AND `main` both tf and shared copies to follow backprop 472 | set_all_to_eval32_and_get() 473 | 474 | ##### network evaluation against random player and GNU Go 475 | global_batch_evald = global_batch 476 | global_batch_saved = global_batch 477 | t_eval = time.time() 478 | print 'evaluating nn' 479 | 480 | d = ret_d(0) 481 | 482 | ################## monitor training progress: 483 | # test `eval` against GNU Go and a player that makes only random moves 484 | for nm, N_GMS_L in zip(['nn','tree'], [[N_EVAL_NN_GNU_GMS, N_EVAL_NN_GMS], [N_EVAL_TREE_GMS, N_EVAL_TREE_GNU_GMS]]): 485 | for gnu, N_GMS in zip([True,False], N_GMS_L): 486 | if N_GMS == 0: 487 | continue 488 | key = '%s%s' % (nm, '' + gnu*'_gnu') 489 | t_key = time.time() 490 | boards[key] = np.zeros((N_TURNS,) + gv.INPUTS_SHAPE[:-1], dtype='int8') 491 | n_mvs = 0.; win_eval = 0.; score_eval = 0.; n_captures_eval = np.zeros(2, dtype='single') 492 | for gm in range(N_GMS): 493 | arch.sess.run(arch.init_state) 494 | pu.init_tree() 495 | # init gnu state 496 | if gnu: 497 | gt.init_board(arch.sess.run(arch.gm_vars['board'])) 498 | 499 | for turn in range(N_TURNS): 500 | board_tmp = arch.sess.run(arch.gm_vars['board']) 501 | 502 | #### search / make move 503 | if nm == 'tree': 504 | run_sim(turn) 505 | assert False 506 | else: 507 | # prob choose first move, deterministically choose remainder 508 | if turn == 0: 509 | to_coords = arch.sess.run([arch.nn_prob_to_coords_valid_mvs['eval'], arch.nn_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0] 510 | else: 511 | to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs['eval'], arch.nn_max_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0] 512 | 513 | 514 | board_tmp2 = arch.sess.run(arch.gm_vars['board']) 515 | n_mvs += board_tmp.sum() - board_tmp2.sum() 516 | 517 | # move opposing player 518 | if gnu: 519 | gt.move_nn(to_coords) 520 | 521 | # mv gnugo 522 | ai_to_coords = gt.move_ai() 523 | arch.sess.run(arch.imgs, feed_dict={arch.moving_player: 1}) 524 | arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: 1, arch.nn_max_to_coords['eval']: ai_to_coords}) 525 | else: 526 | arch.sess.run(arch.imgs, feed_dict = ret_d(1)) 527 | arch.sess.run(arch.move_random_ai, feed_dict = ret_d(1)) 528 | 529 | boards[key][turn] = arch.sess.run(arch.gm_vars['board']) 530 | 531 | if nm == 'tree': 532 | pu.prune_tree(0) 533 | # turn 534 | 535 | # save stats 536 | win_tmp, score_tmp, n_captures_tmp = arch.sess.run([arch.winner, arch.score, arch.n_captures], feed_dict={arch.moving_player: 0}) 537 | scores[key] = copy.deepcopy(score_tmp) 538 | 539 | win_eval += win_tmp.mean() 540 | score_eval += score_tmp.mean() 541 | n_captures_eval += n_captures_tmp.mean(1) 542 | # gm 543 | 544 | # log 545 | log['win_' + key].append( (win_eval / (2*np.single(N_GMS))) + .5 ) 546 | log['n_captures_' + key].append( n_captures_eval[0] / np.single(N_GMS) ) 547 | log['n_captures_opp_' + key].append( n_captures_eval[1] / np.single(N_GMS) ) 548 | log['score_' + key].append( score_eval / np.single(N_GMS) ) 549 | log['n_mvs_' + key].append( n_mvs / np.single(N_GMS * N_TURNS * gv.BATCH_SZ) ) 550 | 551 | log['boards_' + key].append( boards[key][-1] ) 552 | print key, 'eval time', time.time() - t_key 553 | # gnu 554 | # nm 555 | log['eval_batch'].append( global_batch ) 556 | print 'eval time', time.time() - t_eval 557 | # eval 558 | ####################### end network evaluation 559 | 560 | pol, pol_pre = arch.sess.run([arch.pol['eval'], arch.pol_pre['eval']], feed_dict={arch.moving_player: 0}) 561 | 562 | ##### log 563 | log['val_mean_sq_err'].append ( val_mean_sq_err / err_denom ) 564 | log['pol_cross_entrop'].append( pol_cross_entrop_err / err_denom ) 565 | log['val_pearsonr'].append( val_pearsonr / err_denom ) 566 | log['opt_batch'].append( global_batch ) 567 | 568 | log['pol_max_pre'].append( np.median(pol_pre.max(1)) ) 569 | log['pol_max'].append( np.median(pol.max(1)) ) 570 | 571 | log['self_eval_win_rate'].append( np.single(eval_games_won.value) / (eval_batch_sets_played.value*gv.BATCH_SZ) ) 572 | log['model_promoted'].append( model_outperforms ) 573 | 574 | log['self_eval_perc'].append( self_eval_perc ) 575 | 576 | val_mean_sq_err = 0 577 | pol_cross_entrop_err = 0 578 | val_pearsonr = 0 579 | err_denom = 0 580 | 581 | ########## print 582 | run_time += datetime.now() - t_start 583 | 584 | if (save_counter % 20) == 0: 585 | print 586 | print Style.BRIGHT + Fore.GREEN + save_nm, Fore.WHITE + 'EPS', EPS, 'start', str(start_time).split('.')[0], 'run time', \ 587 | str(run_time).split('.')[0] 588 | print 589 | save_counter += 1 590 | 591 | print_str = '%i' % global_batch 592 | for key in print_logs: 593 | print_str += ' %s ' % key 594 | if isinstance(log[key], int): 595 | print_str += str(log[key][-1]) 596 | else: 597 | print_str += '%1.4f' % log[key][-1] 598 | 599 | print_str += ' %4.1f' % (datetime.now() - t_start).total_seconds() 600 | print print_str 601 | 602 | t_start = datetime.now() 603 | 604 | # play sound 605 | if os.path.isfile('/home/tapa/play_sound.txt'): 606 | pygame.mixer.music.play() 607 | 608 | ############# save 609 | if WORKER_ID == MASTER_WORKER: 610 | with buffer_lock: 611 | # update state vars 612 | #shared_nms = ['buffer_loc', 'batch_sets_created', 'batch_set', 's_board', 's_winner', 's_tree_probs', 'weights_changed', 'buffer_lock', 'weights_lock', 'save_nm', 'new_model', 'weights'] 613 | for key in state_vars + training_ex_vars: 614 | if key in ['buffer_loc', 'batch_sets_created', 'batch_sets_created_total', 'batch_set', 'eval_games_won', 'eval_batch_sets_played']: 615 | exec('save_d["%s"] = %s.value' % (key, key)) 616 | elif key in ['tree_probs', 'winner', 'board']: 617 | exec('save_d["%s"] = copy.deepcopy(np.array(s_%s.get_obj()))' % (key, key)) 618 | else: 619 | exec('save_d["%s"] = %s' % (key, key)) 620 | 621 | save_nms = [save_nm] 622 | if (datetime.now() - save_t).seconds > CHKP_FREQ: 623 | save_nms += [save_nm + str(datetime.now())] 624 | save_t = datetime.now() 625 | 626 | for nm in save_nms: 627 | np.save(sdir + nm, save_d) 628 | arch.saver.save(arch.sess, sdir + nm) 629 | 630 | print sdir + nm, 'saved' 631 | 632 | 633 | #################################################################################################################### 634 | 635 | if save_nm is None: 636 | new_model = True # set `eval32` to `main`, and `eval` float16 copies 637 | 638 | ##### weightings on individual loss terms: 639 | LSQ_LAMBDA = 0 640 | LSQ_REG_LAMBDA = 0 641 | POL_CROSS_ENTROP_LAMBDA = 1 642 | VAL_LAMBDA = .025 643 | VALR_LAMBDA = 0 644 | L2_LAMBDA = 1e-3 # weight regularization 645 | CPUCT = 1 646 | 647 | N_REP_TRAIN = 5 # number of times more to backprop over training examples (reflections/rotations) 648 | 649 | N_BATCH_SETS_BLOCK = 7 650 | N_BATCH_SETS_TOTAL = 7*5 # number of batch sets to store in training buffer 651 | N_BATCH_SETS_MIN = N_BATCH_SETS_TOTAL 652 | 653 | batch_set = 0 654 | batch_sets_created = 0 655 | batch_sets_created_total = 0 656 | buffer_loc = 0 657 | 658 | GATE_THRESH = .5 659 | N_GATE_BATCH_SETS = 1 660 | 661 | ##### model parameters 662 | N_LAYERS = 5 #10 # number of model layers 663 | FILTER_SZS = [3]*N_LAYERS 664 | STRIDES = [1]*N_LAYERS 665 | F = 128 # number of filters 666 | N_FILTERS = [F]*N_LAYERS 667 | N_FC1 = 128 # number of units in fully connected layer 668 | 669 | 670 | EPS = 2e-1 # backprop step size 671 | MOMENTUM = .9 672 | 673 | N_SIM = 800 # number of simulations at each turn 674 | N_TURNS = 32 # number of moves per player per game 675 | 676 | #### training buffers 677 | BUFFER_SZ = N_BATCH_SETS_TOTAL * N_TURNS * 2 * gv.BATCH_SZ 678 | 679 | board = np.zeros((BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels), dtype='float16') 680 | winner = np.zeros((N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ), dtype='int8') 681 | tree_probs = np.zeros((BUFFER_SZ, gv.map_szt), dtype='float32') 682 | 683 | ##### number of batch evaluations for testing model 684 | N_EVAL_NN_GMS = 1 # model evaluation for printing 685 | N_EVAL_NN_GNU_GMS = 1 686 | N_EVAL_TREE_GMS = 0 # model eval 687 | N_EVAL_TREE_GNU_GMS = 0 688 | 689 | ######### save and checkpoint frequency 690 | SAVE_FREQ = N_TURNS 691 | EVAL_FREQ = SAVE_FREQ*1 692 | CHKP_FREQ = 60*60*10*2 693 | 694 | start_time = datetime.now() 695 | save_t = datetime.now() 696 | 697 | save_nm = 'go_%1.4fEPS_%iGMSZ_%iN_SIM_%iN_TURNS_%iN_FILTERS_%iN_LAYERS_%iN_BATCH_SETS_TOTAL_%i_N_BATCH_SET_MIN_%iN_REP_TRN_trainbug.npy' % \ 698 | (EPS, gv.n_rows, N_SIM, N_TURNS, N_FILTERS[0], N_LAYERS, N_BATCH_SETS_TOTAL, N_BATCH_SETS_MIN, N_REP_TRAIN) 699 | 700 | boards = {}; scores = {} # eval 701 | save_d = {} 702 | for key in save_vars: 703 | exec('save_d["%s"] = %s' % (key,key)) 704 | save_d['script_nm'] = __file__ 705 | 706 | global_batch = 0 707 | global_batch_saved = 0 708 | global_batch_evald = 0 709 | save_counter = 0 710 | 711 | run_time = datetime.now() - datetime.now() 712 | 713 | log = {} 714 | for key in logs: 715 | log[key] = [] 716 | 717 | ########## save 718 | # update state vars 719 | for key in state_vars + training_ex_vars: 720 | exec('save_d["%s"] = %s' % (key, key)) 721 | 722 | # save 723 | save_nms = [save_nm] 724 | if (datetime.now() - save_t).seconds > CHKP_FREQ: 725 | save_nms += [save_nm + str(datetime.now())] 726 | save_t = datetime.now() 727 | 728 | for nm in save_nms: 729 | np.save(sdir + nm, save_d) 730 | else: 731 | new_model = False # prevent `main` from being set to `eval32` at loading 732 | 733 | save_d = np.load(sdir + save_nm, allow_pickle=True).item() 734 | 735 | for key in save_vars + state_vars + training_ex_vars: 736 | if key == 'save_nm': 737 | continue 738 | exec('%s = save_d["%s"]' % (key,key)) 739 | 740 | print save_nm 741 | 742 | ################### shared memory variables 743 | 744 | ###### self play from `eval` model used for training `eval32`: 745 | s_board = mp.Array('h', board.ravel()) # shape: (BUFFER_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels) 746 | s_winner = mp.Array('b', winner.ravel()) # (N_BATCH_SETS_TOTAL, N_TURNS, 2, gv.BATCH_SZ) 747 | s_tree_probs = mp.Array('f', tree_probs.ravel()) # (BUFFER_SZ, gv.map_szt) 748 | 749 | # indices, counters, & flags 750 | buffer_loc = mp.Value('i', buffer_loc) # index into above ^ training vars 751 | weights_changed = mp.Value('i', 0) # 0 = no change, 1 = changed 752 | batch_sets_created = mp.Value('i', batch_sets_created) 753 | batch_sets_created_total = mp.Value('i', batch_sets_created_total) 754 | batch_set = mp.Value('i', batch_set) 755 | 756 | # evaluation (`eval` vs `main` benchmark testing to see when to update `main` to the current `eval32` backprop weights) 757 | scope_next = mp.Value('i', 0) # alternates between 0,1 during model evaluation to dictate if `eval` or `main` starts 1st 758 | eval_games_won = mp.Value('i', 0) 759 | eval_batch_sets_played = mp.Value('i', 2*N_GATE_BATCH_SETS) 760 | eval_batch_sets_main_first = mp.Value('i', 0) 761 | 762 | buffer_lock = mp.Lock() 763 | weights_lock = mp.Lock() 764 | eval_stats_lock = mp.Lock() 765 | 766 | weights = []; weights_eval = [] 767 | 768 | ###### launch pool 769 | cmd = 'p = mp.Pool(initializer=init, initargs=(' 770 | for nm in shared_nms: 771 | cmd += nm + ', ' 772 | cmd += '))' 773 | 774 | ### get weight shapes 775 | exec(cmd) 776 | weight_shapes = p.map(worker_save_shapes, [0])[0] 777 | p.close() 778 | 779 | for s in weight_shapes: 780 | weights.append( mp.Array('h', np.zeros(np.prod(s), dtype='float16')) ) 781 | weights_eval.append( mp.Array('h', np.zeros(np.prod(s), dtype='float16')) ) 782 | 783 | ######## run 784 | exec(cmd) 785 | p.map(worker, GPU_LIST) 786 | 787 | #### dbg 788 | '''cmd = 'init(' 789 | for nm in shared_nms: 790 | cmd += nm + ', ' 791 | cmd += ')' 792 | exec(cmd) 793 | worker(0) 794 | ''' 795 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #export CC=gcc-6 2 | #export CXX=g++-6 3 | rm *.o 4 | rm *.so 5 | export TF_INC=TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 6 | export TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 7 | export TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 8 | nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \ 9 | --expt-relaxed-constexpr -arch=sm_52 -use_fast_math #-g -G -lineinfo 10 | nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \ 11 | --expt-relaxed-constexpr -arch=sm_75 -use_fast_math #-g -G -lineinfo 12 | 13 | #sm_52 14 | #-gencode=arch=compute_61,code=sm_61 15 | g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \ 16 | cuda_op_kernel_52.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework \ 17 | -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 #-g #-O3 18 | g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \ 19 | cuda_op_kernel_75.cu.o -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB -ltensorflow_framework \ 20 | -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 #-g #-O3 21 | 22 | 23 | cd py_util 24 | echo 25 | echo ".................." 26 | echo "building py_util" 27 | ./build.sh 28 | cd .. 29 | -------------------------------------------------------------------------------- /build_centos.sh: -------------------------------------------------------------------------------- 1 | CUDA_PATH=/usr/local/cuda/lib64 2 | 3 | #export CC=gcc-6 4 | #export CXX=g++-6 5 | rm *.o 6 | rm *.so 7 | export TF_INC=TF_INC=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 8 | export TF_INC=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 9 | export TF_LIB=$(python2 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 10 | #nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \ 11 | # --expt-relaxed-constexpr -arch=sm_52 -use_fast_math #-g -G -lineinfo 12 | #nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC \ 13 | # --expt-relaxed-constexpr -arch=sm_75 -use_fast_math #-g -G -lineinfo 14 | 15 | #sm_52 16 | #-gencode=arch=compute_61,code=sm_61 17 | TF_CFLAGS=( $(python2 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) 18 | TF_LFLAGS=( $(python2 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) 19 | 20 | nvcc -std=c++11 -c -o cuda_op_kernel_52.cu.o cuda_op_kernel.cu.cc \ 21 | ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr 22 | nvcc -std=c++11 -c -o cuda_op_kernel_75.cu.o cuda_op_kernel.cu.cc \ 23 | ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr 24 | 25 | 26 | g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \ 27 | cuda_op_kernel_52.cu.o ${TF_CFLAGS[@]} -fPIC -L $CUDA_PATH -lcudart ${TF_LFLAGS[@]} 28 | 29 | g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \ 30 | cuda_op_kernel_75.cu.o ${TF_CFLAGS[@]} -fPIC -L $CUDA_PATH -lcudart ${TF_LFLAGS[@]} 31 | 32 | #g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \ 33 | # cuda_op_kernel_52.cu.o ${TF_FLAGS[@]} -fPIC -lcudart -L$TF_LIB \ 34 | # -L/usr/local/cuda/lib64 -I $TF_INC -I$TF_INC/external/nsync/public -D_GLIBCXX_USE_CXX11_ABI=1 ${TF_FLAGS[@]} #-g #-O3 35 | #g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \ 36 | # cuda_op_kernel_75.cu.o ${TF_FLATS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB \ 37 | # -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=1 ${TF_FLAGS[@]} -D GOOGLE_CUDA=1 #-g #-O3 38 | 39 | #g++ -std=c++11 -shared -o cuda_op_kernel_52.so cuda_op_kernel.cc \ 40 | # cuda_op_kernel_52.cu.o ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB \ 41 | # -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 ${TF_FLAGS[@]} #-g #-O3 42 | #g++ -std=c++11 -shared -o cuda_op_kernel_75.so cuda_op_kernel.cc \ 43 | # cuda_op_kernel_75.cu.o ${TF_FLAGS[@]} -I $TF_INC -I$TF_INC/external/nsync/public -fPIC -lcudart -L$TF_LIB \ 44 | # -L/usr/local/cuda/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 ${TF_FLAGS[@]} #-g #-O3 45 | 46 | 47 | 48 | cd py_util 49 | echo 50 | echo ".................." 51 | echo "building py_util" 52 | ./build_centos.sh 53 | cd .. 54 | -------------------------------------------------------------------------------- /cuda_includes.h: -------------------------------------------------------------------------------- 1 | #define CHECK_INIT { if(!op_initialized) init_op_launcher(); } 2 | #define REQ_INIT ASSERT(op_initialized, "op not initialized") 3 | 4 | #define CHECK_CUDA_ERR {err = cudaGetLastError();if(err != cudaSuccess){\ 5 | printf("CUDA error: %s, %s, %i\n",cudaGetErrorString(err),__FILE__,__LINE__); PANIC("");}} 6 | #define CHECK_CUDA_ERR_R {err = cudaGetLastError();if(err != cudaSuccess){\ 7 | printf("CUDA error: %s, %s, %i\n",cudaGetErrorString(err),__FILE__,__LINE__); PANIC("");}} 8 | #define MALLOC_ERR_CHECK {if (err != cudaSuccess){printf("malloc err line: %i\n",__LINE__); PANIC("");}} 9 | #define MALLOC_ERR_CHECK_R {if (err != cudaSuccess){printf("malloc err line: %i\n",__LINE__); PANIC("");}} 10 | 11 | #ifdef CUDA_DEBUG 12 | #define DASSERT(A) assert(A); 13 | #else 14 | #define DASSERT(A) 15 | #endif 16 | 17 | #define BMEM(A, B, SZ) err = cudaMemcpy(A, B, SZ*sizeof(A[0]), cudaMemcpyDeviceToDevice); MALLOC_ERR_CHECK 18 | #define RMEM(A, B, SZ) err = cudaMemcpy(B, A, SZ*sizeof(A[0]), cudaMemcpyDeviceToDevice); MALLOC_ERR_CHECK 19 | 20 | char op_initialized; 21 | 22 | curandState_t* rand_states; 23 | 24 | /////////////// game state 25 | // [X]2 indicates backup variables used to restore session 26 | 27 | char *board, *board2, board_cpu[BATCH_MAP_SZ]; 28 | 29 | // previous states to prevent ko 30 | char *board_prev, *board_pprev; 31 | char *board_prev2, *board_pprev2; 32 | 33 | int16_t * n_captures, *n_captures2; // [N_PLAYERS, BATCH_SZ] 34 | 35 | int16_t * ai_to_coord; // [BATCH_SZ], output of move_random_ai, input to move_unit 36 | 37 | char * valid_mv_map_internal; // [BATCH_SZ, MAP_SZ], output of create_batch, input to move_unit 38 | char * moved_internal; // [BATCH_SZ] used in move_random_ai, req. input to move_unit_launcher, results not used 39 | 40 | // 1 or -1: 41 | #define GET_PLAYER_VAL DASSERT((*moving_player == 0) || (*moving_player == 1)); char player_val = ((*moving_player == 0) * 2 )- 1; 42 | 43 | #define CHK_VALID_MAP_COORD(COORD) DASSERT((COORD) >= 0 && (COORD) < MAP_SZ) 44 | 45 | // count valid mvs and store n_valid_mvs 46 | #define COUNT_VALID \ 47 | int n_valid_mvs = 0;\ 48 | int16_t valid_mv_inds[MAP_SZ];\ 49 | MAP_LOOP{\ 50 | if(valid_mv_map_internal[gm_offset + loc]){\ 51 | valid_mv_inds[n_valid_mvs] = loc;\ 52 | n_valid_mvs ++;\ 53 | }\ 54 | }\ 55 | if(!n_valid_mvs){\ 56 | to_coord[gm] = -1;\ 57 | return;\ 58 | } // no valid mvs 59 | 60 | -------------------------------------------------------------------------------- /cuda_op_kernel.cc: -------------------------------------------------------------------------------- 1 | #include "tensorflow/core/framework/op.h" 2 | #include "tensorflow/core/framework/op_kernel.h" 3 | #include "tensorflow/core/framework/shape_inference.h" 4 | #include "tensorflow/core/framework/tensor.h" 5 | 6 | #include "includes.h" 7 | using namespace tensorflow; 8 | 9 | #include "vars.cc" // return / set vars 10 | 11 | // return coordinate from probability map, proportionate to probabiltiies 12 | REGISTER_OP("ProbToCoord") 13 | .Input("prob_map: float16") // [BATCH_SZ, MAP_SZ] 14 | .Output("to_coord: int16"); 15 | 16 | // return coordinate from probability map, proportionate to probabiltiies, restricted to only valid mvs 17 | REGISTER_OP("ProbToCoordValidMvs") 18 | .Input("prob_map: float16") 19 | .Output("to_coord: int16"); 20 | 21 | // return max coordinate from probability map, restricted to only valid mvs 22 | REGISTER_OP("MaxProbToCoordValidMvs") 23 | .Input("prob_map: float16") 24 | .Output("to_coord: int16"); 25 | 26 | #define CREATE_BATCH_SHAPES tensorflow::TensorShape imgs_shape, valid_mv_map_shape;\ 27 | imgs_shape.AddDim(BATCH_SZ);\ 28 | imgs_shape.AddDim(MAP_SZ_X);\ 29 | imgs_shape.AddDim(MAP_SZ_Y);\ 30 | imgs_shape.AddDim(N_INPUT_CHANNELS);\ 31 | \ 32 | valid_mv_map_shape.AddDim(BATCH_SZ);\ 33 | valid_mv_map_shape.AddDim(MAP_SZ_X);\ 34 | valid_mv_map_shape.AddDim(MAP_SZ_Y); 35 | 36 | REGISTER_OP("CreateBatch") 37 | .Input("moving_player: int8") // [1] 38 | .Output("imgs: float16") 39 | .Output("valid_mv_map: int8") 40 | 41 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { 42 | CREATE_BATCH_SHAPES 43 | tensorflow::shape_inference::ShapeHandle imgs_shape_h, valid_mv_map_shape_h; 44 | 45 | c->MakeShapeFromTensorShape(imgs_shape, &imgs_shape_h); 46 | c->MakeShapeFromTensorShape(valid_mv_map_shape, &valid_mv_map_shape_h); 47 | 48 | c->set_output(0, imgs_shape_h); 49 | c->set_output(1, valid_mv_map_shape_h); 50 | 51 | return Status::OK(); 52 | }); 53 | 54 | #define RETURN_WINNER_SHAPES tensorflow::TensorShape winner_shape, score_shape, n_captures_shape;\ 55 | winner_shape.AddDim(BATCH_SZ);\ 56 | score_shape.AddDim(BATCH_SZ);\ 57 | n_captures_shape.AddDim(N_PLAYERS);\ 58 | n_captures_shape.AddDim(BATCH_SZ); 59 | 60 | REGISTER_OP("ReturnWinner") 61 | .Input("moving_player: int8") // [1] 62 | .Output("winner: int8") 63 | .Output("score: int16") 64 | .Output("n_captures: int16") 65 | 66 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { 67 | RETURN_WINNER_SHAPES 68 | tensorflow::shape_inference::ShapeHandle winner_shape_h, score_shape_h, n_captures_shape_h; 69 | 70 | c->MakeShapeFromTensorShape(winner_shape, &winner_shape_h); 71 | c->set_output(0, winner_shape_h); 72 | 73 | c->MakeShapeFromTensorShape(score_shape, &score_shape_h); 74 | c->set_output(1, score_shape_h); 75 | 76 | c->MakeShapeFromTensorShape(n_captures_shape, &n_captures_shape_h); 77 | c->set_output(2, n_captures_shape_h); 78 | 79 | return Status::OK(); 80 | }); 81 | 82 | 83 | REGISTER_OP("InitState"); 84 | REGISTER_OP("EndTurn"); 85 | REGISTER_OP("SessionBackup"); 86 | REGISTER_OP("SessionRestore"); 87 | REGISTER_OP("MoveRandomAi") 88 | .Input("moving_player: int8"); // [1] 89 | 90 | REGISTER_OP("MoveUnit") 91 | .Input("to_coord: int16") 92 | .Input("moving_player: int8") // [1] 93 | .Output("moved: int8"); // [BATCH_SZ] 94 | 95 | void prob_to_coord_launcher(float * prob_map, int16_t * to_coord); 96 | void prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord); 97 | void max_prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord); 98 | 99 | void session_backup_launcher(); 100 | void session_restore_launcher(); 101 | void return_inputs_launcher(float* out); 102 | void init_state_launcher(); 103 | void move_random_ai_launcher(int8_t * moving_player); 104 | void create_batch_launcher(float * imgs, int8_t * moving_player, char * valid_mv_map); 105 | void move_unit_launcher(int16_t * to_coord, int8_t * moving_player, char *moved); 106 | void return_winner_launcher(int8_t * winner, int8_t *moving_player, int16_t * score, int16_t * n_captures_out); 107 | 108 | class session_backup : public OpKernel { 109 | public: 110 | explicit session_backup(OpKernelConstruction* context) : OpKernel(context) {} 111 | 112 | void Compute(OpKernelContext* context) override { 113 | session_backup_launcher(); 114 | } 115 | }; 116 | 117 | class session_restore : public OpKernel { 118 | public: 119 | explicit session_restore(OpKernelConstruction* context) : OpKernel(context) {} 120 | 121 | void Compute(OpKernelContext* context) override { 122 | session_restore_launcher(); 123 | } 124 | }; 125 | 126 | class prob_to_coord : public OpKernel { 127 | public: 128 | explicit prob_to_coord(OpKernelConstruction* context) : OpKernel(context) {} 129 | 130 | void Compute(OpKernelContext* context) override { 131 | /////////////////////////////////// inputs 132 | const Tensor& prob_map_tensor = context->input(0); 133 | 134 | auto prob_map = prob_map_tensor.flat(); 135 | 136 | // check dims 137 | TensorShape prob_map_shape = prob_map_tensor.shape(); 138 | ASSERT(prob_map_shape.dims() == 2, "number of dims not correct") 139 | ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size") 140 | ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size") 141 | 142 | ////////////////////////////////////// outputs 143 | Tensor* to_coord_tensor = nullptr; 144 | 145 | TensorShape to_coord_shape; 146 | to_coord_shape.AddDim(BATCH_SZ); 147 | 148 | OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor)); 149 | 150 | auto to_coord = to_coord_tensor->template flat(); 151 | 152 | /////////////////// 153 | prob_to_coord_launcher((float*)prob_map.data(), (int16_t*)to_coord.data()); 154 | } 155 | }; 156 | 157 | class prob_to_coord_valid_mvs : public OpKernel { 158 | public: 159 | explicit prob_to_coord_valid_mvs(OpKernelConstruction* context) : OpKernel(context) {} 160 | 161 | void Compute(OpKernelContext* context) override { 162 | /////////////////////////////////// inputs 163 | const Tensor& prob_map_tensor = context->input(0); 164 | auto prob_map = prob_map_tensor.flat(); 165 | 166 | // check dims 167 | TensorShape prob_map_shape = prob_map_tensor.shape(); 168 | ASSERT(prob_map_shape.dims() == 2, "number of dims not correct") 169 | ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size") 170 | ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size") 171 | 172 | ////////////////////////////////////// outputs 173 | Tensor* to_coord_tensor = nullptr; 174 | 175 | TensorShape to_coord_shape; 176 | to_coord_shape.AddDim(BATCH_SZ); 177 | 178 | OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor)); 179 | 180 | auto to_coord = to_coord_tensor->template flat(); 181 | 182 | /////////////////// 183 | prob_to_coord_valid_mvs_launcher((float*)prob_map.data(), (int16_t*)to_coord.data()); 184 | } 185 | }; 186 | 187 | class max_prob_to_coord_valid_mvs : public OpKernel { 188 | public: 189 | explicit max_prob_to_coord_valid_mvs(OpKernelConstruction* context) : OpKernel(context) {} 190 | 191 | void Compute(OpKernelContext* context) override { 192 | /////////////////////////////////// inputs 193 | const Tensor& prob_map_tensor = context->input(0); 194 | auto prob_map = prob_map_tensor.flat(); 195 | 196 | // check dims 197 | TensorShape prob_map_shape = prob_map_tensor.shape(); 198 | ASSERT(prob_map_shape.dims() == 2, "number of dims not correct") 199 | ASSERT(prob_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size") 200 | ASSERT(prob_map_shape.dim_size(1) == MAP_SZ, "incorrect input size") 201 | 202 | ////////////////////////////////////// outputs 203 | Tensor* to_coord_tensor = nullptr; 204 | 205 | TensorShape to_coord_shape; 206 | to_coord_shape.AddDim(BATCH_SZ); 207 | 208 | OP_REQUIRES_OK(context, context->allocate_output(0, to_coord_shape, &to_coord_tensor)); 209 | 210 | auto to_coord = to_coord_tensor->template flat(); 211 | 212 | /////////////////// 213 | max_prob_to_coord_valid_mvs_launcher((float*)prob_map.data(), (int16_t*)to_coord.data()); 214 | } 215 | }; 216 | 217 | class return_winner : public OpKernel { 218 | public: 219 | explicit return_winner(OpKernelConstruction* context) : OpKernel(context) {} 220 | 221 | void Compute(OpKernelContext* context) override { 222 | /////////////////////////////////// inputs 223 | const Tensor& moving_player_tensor = context->input(0); 224 | auto moving_player = moving_player_tensor.flat(); 225 | 226 | // check dims 227 | TensorShape moving_player_shape = moving_player_tensor.shape(); 228 | ASSERT(moving_player_shape.dims() == 0, "number of dims not correct") 229 | 230 | ////////////////////////////////////// outputs 231 | Tensor* winner_tensor = nullptr, * score_tensor = nullptr, * n_captures_tensor = nullptr; 232 | RETURN_WINNER_SHAPES 233 | 234 | OP_REQUIRES_OK(context, context->allocate_output(0, winner_shape, &winner_tensor)); 235 | OP_REQUIRES_OK(context, context->allocate_output(1, score_shape, &score_tensor)); 236 | OP_REQUIRES_OK(context, context->allocate_output(2, n_captures_shape, &n_captures_tensor)); 237 | 238 | auto winner = winner_tensor->template flat(); 239 | auto score = score_tensor->template flat(); 240 | auto n_captures = n_captures_tensor->template flat(); 241 | 242 | /////////////////// 243 | return_winner_launcher((int8_t*)winner.data(), (int8_t*)moving_player.data(), (int16_t*)score.data(), (int16_t*)n_captures.data()); 244 | } 245 | }; 246 | 247 | class move_unit : public OpKernel { 248 | public: 249 | explicit move_unit(OpKernelConstruction* context) : OpKernel(context) {} 250 | 251 | void Compute(OpKernelContext* context) override { 252 | ///////////////////////////////////// inputs 253 | const Tensor& to_coord_tensor = context->input(0); 254 | const Tensor& moving_player_tensor = context->input(1); 255 | 256 | auto to_coord = to_coord_tensor.flat(); 257 | auto moving_player = moving_player_tensor.flat(); 258 | 259 | // check dims 260 | TensorShape to_map_shape = to_coord_tensor.shape(); 261 | ASSERT(to_map_shape.dims() == 1, "number of dims not correct") 262 | ASSERT(to_map_shape.dim_size(0) == BATCH_SZ, "incorrect input size") 263 | 264 | TensorShape moving_player_shape = moving_player_tensor.shape(); 265 | ASSERT(moving_player_shape.dims() == 0, "number of dims not correct") 266 | 267 | ///////////////////////// outputs 268 | Tensor* moved_tensor = nullptr; 269 | 270 | TensorShape moved_shape; 271 | moved_shape.AddDim(BATCH_SZ); 272 | 273 | OP_REQUIRES_OK(context, context->allocate_output(0, moved_shape, &moved_tensor)); 274 | 275 | auto moved = moved_tensor->template flat(); 276 | 277 | /////////////////// 278 | move_unit_launcher((int16_t*)to_coord.data(), (int8_t*)moving_player.data(), (char*)moved.data()); 279 | } 280 | }; 281 | 282 | class create_batch : public OpKernel { 283 | public: 284 | explicit create_batch(OpKernelConstruction* context) : OpKernel(context) {} 285 | 286 | void Compute(OpKernelContext* context) override { 287 | ///////////////////////////////////// inputs 288 | const Tensor& moving_player_tensor = context->input(0); 289 | auto moving_player = moving_player_tensor.flat(); 290 | 291 | // check dims 292 | TensorShape moving_player_shape = moving_player_tensor.shape(); 293 | ASSERT(moving_player_shape.dims() == 0, "number of dims not correct") 294 | 295 | 296 | ////////////////////////////////////// outputs 297 | CREATE_BATCH_SHAPES 298 | Tensor* imgs_tensor = nullptr, *valid_mv_map_tensor = nullptr; 299 | 300 | OP_REQUIRES_OK(context, context->allocate_output(0, imgs_shape, &imgs_tensor)); 301 | OP_REQUIRES_OK(context, context->allocate_output(1, valid_mv_map_shape, &valid_mv_map_tensor)); 302 | 303 | auto imgs = imgs_tensor->template flat(); 304 | auto valid_mv_map = valid_mv_map_tensor->template flat(); 305 | 306 | /////////////////// 307 | create_batch_launcher((float*)imgs.data(), (int8_t*)moving_player.data(), 308 | (char *)valid_mv_map.data()); 309 | } 310 | }; 311 | 312 | class init_state : public OpKernel { 313 | public: 314 | explicit init_state(OpKernelConstruction* context) : OpKernel(context) {} 315 | 316 | void Compute(OpKernelContext* context) override { 317 | init_state_launcher(); 318 | } 319 | }; 320 | 321 | class move_random_ai : public OpKernel { 322 | public: 323 | explicit move_random_ai(OpKernelConstruction* context) : OpKernel(context) {} 324 | 325 | void Compute(OpKernelContext* context) override { 326 | ///////////////////////////////////// inputs 327 | const Tensor& moving_player_tensor = context->input(0); 328 | 329 | auto moving_player = moving_player_tensor.flat(); 330 | 331 | // check dims 332 | TensorShape moving_player_shape = moving_player_tensor.shape(); 333 | ASSERT(moving_player_shape.dims() == 0, "number of dims not correct") 334 | 335 | move_random_ai_launcher((int8_t *)moving_player.data()); 336 | } 337 | }; 338 | 339 | REGISTER_KERNEL_BUILDER(Name("InitState").Device(DEVICE_GPU), init_state); 340 | REGISTER_KERNEL_BUILDER(Name("MoveRandomAi").Device(DEVICE_GPU), move_random_ai); 341 | REGISTER_KERNEL_BUILDER(Name("CreateBatch").Device(DEVICE_GPU), create_batch); 342 | REGISTER_KERNEL_BUILDER(Name("MoveUnit").Device(DEVICE_GPU), move_unit); 343 | REGISTER_KERNEL_BUILDER(Name("SessionRestore").Device(DEVICE_GPU), session_restore); 344 | REGISTER_KERNEL_BUILDER(Name("SessionBackup").Device(DEVICE_GPU), session_backup); 345 | REGISTER_KERNEL_BUILDER(Name("ReturnWinner").Device(DEVICE_GPU), return_winner); 346 | REGISTER_KERNEL_BUILDER(Name("ProbToCoord").Device(DEVICE_GPU), prob_to_coord); 347 | REGISTER_KERNEL_BUILDER(Name("ProbToCoordValidMvs").Device(DEVICE_GPU), prob_to_coord_valid_mvs); 348 | REGISTER_KERNEL_BUILDER(Name("MaxProbToCoordValidMvs").Device(DEVICE_GPU), max_prob_to_coord_valid_mvs); 349 | 350 | -------------------------------------------------------------------------------- /cuda_op_kernel.cu.cc: -------------------------------------------------------------------------------- 1 | #if GOOGLE_CUDA 2 | #define EIGEN_USE_GPU 3 | #include 4 | #include 5 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 6 | #include "includes.h" 7 | #include "cuda_includes.h" 8 | 9 | #include "kernels/return_state.cu" 10 | #include "kernels/verify_integrity.cu" 11 | #include "kernels/init_op.cu" // allocates memory 12 | 13 | #include "kernels/vars.cu.cc" 14 | #include "kernels/init_state.cu" // inits new set of games 15 | 16 | #include "kernels/move_unit.cu" 17 | #include "kernels/move_random_ai.cu" 18 | #include "kernels/create_batch.cu" 19 | #include "kernels/return_winner.cu" 20 | 21 | #include "kernels/session_backup.cu.cc" 22 | #include "kernels/prob_to_coord.cu" 23 | #include "kernels/prob_to_coord_valid_mvs.cu" 24 | #include "kernels/max_prob_to_coord_valid_mvs.cu" 25 | 26 | #endif 27 | 28 | -------------------------------------------------------------------------------- /global_vars.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import numpy as np 4 | 5 | RAND_SEED = np.int(1e1*time.time()) % 4294967295 6 | np.random.seed(RAND_SEED) 7 | random.seed(RAND_SEED) 8 | 9 | n_rows, n_cols = 7,7 10 | N_PLAYERS = 2 11 | 12 | map_sz = (n_rows, n_cols) 13 | map_szt = np.prod(map_sz) 14 | 15 | n_input_channels = 3 # present and prior 2 game turns 16 | 17 | ########### training: 18 | BATCH_SZ = 128 19 | INPUTS_SHAPE = (BATCH_SZ, n_rows, n_cols, n_input_channels) 20 | 21 | -------------------------------------------------------------------------------- /gnu_go_test.py: -------------------------------------------------------------------------------- 1 | import subprocess as sp 2 | from subprocess import Popen, PIPE 3 | from time import sleep 4 | from fcntl import fcntl, F_GETFL, F_SETFL 5 | from os import O_NONBLOCK, read 6 | import global_vars as gv 7 | import numpy as np 8 | 9 | LEVEL = 1#0 10 | PAUSE = .001 11 | row_nm = 'ABCDEFGHIJKLMNOP' 12 | colors = 'BW' 13 | f = [None]*gv.BATCH_SZ 14 | 15 | ### start gnugo 16 | for gm in range(gv.BATCH_SZ): 17 | f[gm] = sp.Popen(['gnugo', '--chinese-rules', '--seed', str(gm+1), '--play-out-aftermath', '--capture-all-dead', '--no-ko', '--never-resign', '--mode','gtp','--boardsize',str(gv.n_rows),'--level', str(LEVEL)], stdout=sp.PIPE, stdin=sp.PIPE) 18 | flags = fcntl(f[gm].stdout, F_GETFL) # get current p.stdout flags 19 | fcntl(f[gm].stdout, F_SETFL, flags | O_NONBLOCK) 20 | 21 | def read_resp(gm): 22 | sleep(PAUSE) 23 | resp = ' ' 24 | while resp[-1] != '\n': 25 | sleep(PAUSE) 26 | try: 27 | resp2 = f[gm].stdout.read() 28 | resp += resp2 29 | except: 30 | continue 31 | return resp[1:] 32 | 33 | def req_ok(gm, cmd): 34 | f[gm].stdin.write(cmd) 35 | resp = read_resp(gm) 36 | assert resp[:2] == '= ', 'err reading resp gm %i, cmd %s resp %s' % (gm, cmd, resp) 37 | 38 | def req_ok_or_illegal(gm, cmd): 39 | f[gm].stdin.write(cmd) 40 | resp = read_resp(gm) 41 | assert resp[:2] == '= ' or resp.find('? illegal move') != -1 42 | 43 | def init_board(board): 44 | for gm in range(gv.BATCH_SZ): 45 | req_ok(gm, 'clear_board\n') 46 | for i in range(gv.n_rows): 47 | for j in range(gv.n_cols): 48 | if board[gm,i,j] == 0: 49 | continue 50 | #req_ok(gm, 'play %s %s%i\n' % (colors[np.int((board[gm,i,j]+1.)/2)], row_nm[j], gv.n_rows - i)) 51 | f[gm].stdin.write('play %s %s%i\n' % (colors[np.int((board[gm,i,j]+1.)/2)], row_nm[j], gv.n_rows - i)) 52 | 53 | def move_nn(to_coords, moving_player=0): 54 | passes = to_coords == -1 55 | to_coords_i = np.array(to_coords) 56 | to_coords_i[passes] = 0 57 | 58 | i, j = np.unravel_index(to_coords_i, (gv.n_rows, gv.n_cols)) 59 | for gm in range(gv.BATCH_SZ): 60 | #req_ok_or_illegal(gm, 'play %s %s%i\n' % (colors[moving_player], row_nm[j[gm]], gv.n_rows - i[gm])) 61 | if passes[gm]: 62 | cmd = 'play %s pass\n' % colors[moving_player] 63 | else: 64 | cmd = 'play %s %s%i\n' % (colors[moving_player], row_nm[j[gm]], gv.n_rows - i[gm]) 65 | f[gm].stdin.write(cmd) 66 | 67 | def move_ai(moving_player=1): 68 | ai_to_coords = -np.ones(gv.BATCH_SZ, dtype='int32') 69 | 70 | for gm in range(gv.BATCH_SZ): 71 | while True: 72 | try: 73 | f[gm].stdout.read() 74 | break 75 | except: 76 | j = 1 77 | 78 | f[gm].stdin.write('genmove %s\n' % colors[moving_player]) 79 | 80 | for gm in range(gv.BATCH_SZ): 81 | ai_mv_orig = read_resp(gm) 82 | ai_mv = ai_mv_orig.split('\n\n')[-2] 83 | if ai_mv[:2] != '= ': 84 | print 'failed gm %i resp %s' % (gm, ai_mv) 85 | continue 86 | #assert ai_mv[:2] == '= ', 'gm %i resp %s' % (gm, ai_mv) 87 | if ai_mv.find('= PASS') != -1: 88 | #print 'pass ', gm 89 | continue 90 | if ai_mv.find('= resign') != -1: 91 | print 'resign ', gm 92 | #assert False 93 | continue 94 | if len(ai_mv) <= 3: 95 | #assert False, 'gm %i resp %s, orig %s' % (gm, ai_mv, ai_mv_orig) 96 | assert 'gm %i resp %s, orig %s' % (gm, ai_mv, ai_mv_orig) 97 | continue 98 | col = row_nm.find(ai_mv[2]) 99 | assert col != -1, 'gm %i resp %s' % (gm, ai_mv) 100 | row = gv.n_rows - np.int(ai_mv[3:]) 101 | 102 | ai_to_coords[gm] = row*gv.n_cols + col 103 | return ai_to_coords 104 | 105 | def show_board(gm): 106 | f[gm].stdin.write('showboard\n') 107 | print read_resp(gm) 108 | 109 | 110 | -------------------------------------------------------------------------------- /includes.h: -------------------------------------------------------------------------------- 1 | //#define CUDA_DEBUG 1 2 | 3 | #define PANIC(A) { printf(A " %s:%i\n", __FILE__,__LINE__); exit(1); } 4 | #define ASSERT(S, A) { if(!(S)) PANIC(A) } 5 | 6 | #define BATCH_SZ 128 7 | #define N_PLAYERS 2 8 | 9 | #define MAP_SZ_X 7 10 | 11 | #define MAP_SZ_Y MAP_SZ_X 12 | 13 | #define MAP_SZ (MAP_SZ_X*MAP_SZ_Y) 14 | #define BATCH_MAP_SZ (BATCH_SZ*MAP_SZ_X*MAP_SZ_Y) 15 | 16 | #define MAP_LOOP for(int16_t loc = 0; loc < MAP_SZ; loc++) 17 | 18 | #define N_INPUT_CHANNELS 3 19 | 20 | // return var indices 21 | #define BOARD_IDX 0 22 | #define VALID_MV_MAP_INTERNAL_IDX 2 23 | 24 | #define RETURN_VARS 1 25 | #define SET_VARS 0 26 | 27 | -------------------------------------------------------------------------------- /kernels/create_batch.cu: -------------------------------------------------------------------------------- 1 | #define LIBERTY_TMP(COORD, PLAYER_VAL) return_liberty(COORD, PLAYER_VAL, 0, board_tmp, coord_stack, &coord_stack_sz) 2 | 3 | // imgs_shape = [gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels] 4 | // valid_mv_map = [gv.BATCH_SZ, gv.n_rows, gv.n_cols] 5 | 6 | // create batch (for nn) from current game state 7 | __global__ void create_batch_kernel(half * imgs, char * board, char * board_prev, char * board_pprev, int8_t * moving_player, char * valid_mv_map, 8 | char * valid_mv_map_internal){ 9 | 10 | int32_t gm = blockIdx.x; 11 | int16_t map_coord = threadIdx.x; 12 | int game_offset = gm*MAP_SZ; 13 | int gcoord = game_offset + map_coord; 14 | 15 | GET_PLAYER_VAL 16 | 17 | //////////// imgs 18 | int icoord = gm*MAP_SZ*N_INPUT_CHANNELS + map_coord*N_INPUT_CHANNELS; 19 | if(board[gcoord] == player_val) 20 | imgs[icoord] = 1; 21 | else if(board[gcoord] == 0) 22 | imgs[icoord] = 0; 23 | else 24 | imgs[icoord] = -1; 25 | 26 | icoord ++; 27 | if(board_prev[gcoord] == player_val) 28 | imgs[icoord] = 1; 29 | else if(board_prev[gcoord] == 0) 30 | imgs[icoord] = 0; 31 | else 32 | imgs[icoord] = -1; 33 | 34 | icoord ++; 35 | if(board_pprev[gcoord] == player_val) 36 | imgs[icoord] = 1; 37 | else if(board_pprev[gcoord] == 0) 38 | imgs[icoord] = 0; 39 | else 40 | imgs[icoord] = -1; 41 | 42 | //////////// valid moves 43 | // adj search vars 44 | int16_t coord_stack[MAP_SZ]; 45 | int coord_stack_sz; 46 | 47 | __syncthreads(); 48 | if(map_coord != 0) return; 49 | 50 | #define ADD_MV { valid_mv_map[gcoord] = 1; valid_mv_map_internal[gcoord] = 1; } 51 | 52 | for(map_coord = 0; map_coord < MAP_SZ; map_coord++){ 53 | gcoord = game_offset + map_coord; 54 | 55 | valid_mv_map[gcoord] = 0; 56 | valid_mv_map_internal[gcoord] = 0; 57 | 58 | if(board[gcoord] != 0) continue; 59 | 60 | // add move 61 | if(LIBERTY(map_coord, player_val)){ 62 | ADD_MV 63 | continue; 64 | } 65 | 66 | //////////// if no liberty, check if pieces can be captured creating liberty for moving player 67 | 68 | // copy board 69 | char board_tmp[MAP_SZ]; // just sotre one game, don't waste space for games not eval'd in this worker 70 | for(int loc = 0; loc < MAP_SZ; loc++) 71 | board_tmp[loc] = board[game_offset + loc]; 72 | 73 | // if we did move here, would we capture? 74 | char valid_mv = 0; 75 | board_tmp[map_coord] = player_val; // tmp move here 76 | ADJ_LOOP(map_coord) 77 | // remove pieces with no liberty 78 | if(board_tmp[coord_i] == (-player_val) && 79 | !LIBERTY_TMP(coord_i, -player_val)){ 80 | valid_mv = 1; 81 | board_tmp[coord_i] = 0; 82 | 83 | // remove adj pieces (to then check if final state matches prior state) 84 | for(int stack_i = 0; stack_i < coord_stack_sz; stack_i++){ 85 | int coord_j = coord_stack[stack_i]; 86 | DASSERT(board_tmp[coord_j] == (-player_val)) 87 | board_tmp[coord_j] = 0; 88 | } // stack 89 | 90 | } // opposing player / liberty check 91 | } // adj loop 92 | 93 | if(valid_mv == 0) 94 | continue; 95 | 96 | ////// does this replicate a prior state? 97 | char matching = 1, matching2 = 1; 98 | for(int loc = 0; matching && (loc < MAP_SZ); loc++){ 99 | matching = board_pprev[game_offset + loc] == board_tmp[loc]; 100 | } 101 | for(int loc = 0; matching2 && (loc < MAP_SZ); loc++){ 102 | matching2 = board_prev[game_offset + loc] == board_tmp[loc]; 103 | } 104 | 105 | if(matching == 0 && matching2 == 0) ADD_MV 106 | 107 | } // map loop 108 | } 109 | 110 | void create_batch_launcher(float * imgs, int8_t * moving_player, char * valid_mv_map){ 111 | REQ_INIT 112 | 113 | create_batch_kernel <<< BATCH_SZ, MAP_SZ >>> ((half*)imgs, board, board_prev, board_pprev, moving_player, valid_mv_map, valid_mv_map_internal); 114 | 115 | VERIFY_BUFFER_INTEGRITY 116 | } 117 | 118 | -------------------------------------------------------------------------------- /kernels/init_op.cu: -------------------------------------------------------------------------------- 1 | __global__ void init_rand_states(int32_t RAND_SEED, int32_t map_sz, curandState_t * rand_states){ 2 | int32_t offset = blockIdx.x*map_sz + threadIdx.x; 3 | curand_init(RAND_SEED + offset, 0, 1, &rand_states[offset]); 4 | } 5 | 6 | #define CMALLOC(VAR, SZ) {err = cudaMalloc((void**) &VAR, SZ*sizeof(VAR[0])); MALLOC_ERR_CHECK_R} 7 | #define MALLOC_CHAR(VAR, SZ) {VAR = (char*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 8 | #define MALLOC_INT32(VAR, SZ) {VAR = (int32_t*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 9 | #define MALLOC_UINT32(VAR, SZ) {VAR = (uint32_t*) malloc(SZ*sizeof(VAR[0])); ASSERT(VAR != 0, "malloc failed"); } 10 | 11 | void init_op_launcher(){ 12 | cudaError_t err; 13 | op_initialized = 1; 14 | 15 | ///////////////////////////////// gpu buffers 16 | // game state 17 | CMALLOC(board, BATCH_MAP_SZ); 18 | CMALLOC(board2, BATCH_MAP_SZ); 19 | 20 | CMALLOC(board_prev, BATCH_MAP_SZ); 21 | CMALLOC(board_prev2, BATCH_MAP_SZ); 22 | 23 | CMALLOC(board_pprev, BATCH_MAP_SZ); 24 | CMALLOC(board_pprev2, BATCH_MAP_SZ); 25 | 26 | CMALLOC(n_captures, N_PLAYERS*BATCH_SZ); 27 | CMALLOC(n_captures2, N_PLAYERS*BATCH_SZ); 28 | 29 | CMALLOC(ai_to_coord, BATCH_SZ); // input to move_unit, output from move_random_ai 30 | 31 | CMALLOC(valid_mv_map_internal, BATCH_MAP_SZ) // input to move_unit, output from create_batch 32 | 33 | CMALLOC(moved_internal, BATCH_SZ) // [BATCH_SZ] used in move_random_ai, req. input to move_unit_launcher, results not used 34 | 35 | ////// random seed 36 | int32_t RAND_SEED = time(NULL); 37 | err = cudaMalloc((void**) &rand_states, BATCH_MAP_SZ*sizeof(curandState_t)); 38 | init_rand_states <<< BATCH_SZ, MAP_SZ >>> (RAND_SEED, MAP_SZ, rand_states); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /kernels/init_state.cu: -------------------------------------------------------------------------------- 1 | void init_state_launcher() { 2 | CHECK_INIT 3 | 4 | cudaError_t err = cudaMemset(board, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR 5 | err = cudaMemset(board_prev, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR 6 | err = cudaMemset(board_pprev, 0, sizeof(board[0])*BATCH_MAP_SZ); CHECK_CUDA_ERR 7 | 8 | err = cudaMemset(n_captures, 0, sizeof(n_captures[0])*N_PLAYERS*BATCH_SZ); CHECK_CUDA_ERR 9 | } 10 | 11 | -------------------------------------------------------------------------------- /kernels/max_prob_to_coord_valid_mvs.cu: -------------------------------------------------------------------------------- 1 | __global__ void max_prob_to_coord_valid_mvs_kernel(half * prob_map, int16_t * to_coord, 2 | char * board, char * valid_mv_map_internal){ 3 | int gm = blockIdx.x; 4 | int gm_offset = gm*MAP_SZ; 5 | half * prob_map_cur = &prob_map[gm_offset]; 6 | 7 | COUNT_VALID 8 | 9 | // determine max prob 10 | float max_prob = -999; 11 | int16_t max_map_loc = -1; 12 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 13 | int map_loc = valid_mv_inds[mv_ind]; 14 | CHK_VALID_MAP_COORD(map_loc) 15 | DASSERT(board[gm*MAP_SZ + map_loc] == 0) 16 | if((float)prob_map_cur[map_loc] <= max_prob) 17 | continue; 18 | max_map_loc = map_loc; 19 | max_prob = prob_map_cur[map_loc]; 20 | } 21 | 22 | to_coord[gm] = max_map_loc; 23 | } 24 | 25 | void max_prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord){ 26 | cudaError_t err; 27 | REQ_INIT 28 | 29 | max_prob_to_coord_valid_mvs_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, board, 30 | valid_mv_map_internal); CHECK_CUDA_ERR 31 | 32 | VERIFY_BUFFER_INTEGRITY 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /kernels/move_random_ai.cu: -------------------------------------------------------------------------------- 1 | __global__ void move_random_ai_kernel(int16_t * to_coord, char * board, curandState_t* rand_states, char * valid_mv_map_internal){ 2 | 3 | int gm = blockIdx.x; 4 | int gm_offset = gm*MAP_SZ; 5 | 6 | COUNT_VALID 7 | 8 | // select random move 9 | int rand_ind = (curand(&rand_states[gm]) % (n_valid_mvs-1)) + 1; 10 | 11 | to_coord[gm] = valid_mv_inds[rand_ind]; 12 | 13 | DASSERT(to_coord[gm] >= 0 && to_coord[gm] < MAP_SZ && board[gm_offset + to_coord[gm]] == 0) 14 | 15 | } 16 | 17 | void move_random_ai_launcher(int8_t * moving_player){ 18 | cudaError_t err; 19 | REQ_INIT 20 | 21 | move_random_ai_kernel <<< BATCH_SZ, 1 >>> (ai_to_coord, board, rand_states, valid_mv_map_internal); CHECK_CUDA_ERR 22 | 23 | move_unit_launcher(ai_to_coord, moving_player, moved_internal); 24 | VERIFY_BUFFER_INTEGRITY 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /kernels/move_unit.cu: -------------------------------------------------------------------------------- 1 | #define N_ADJ 4 2 | 3 | #define ADJ_LOOP(COORD) \ 4 | int coord_x = COORD / MAP_SZ_Y;\ 5 | int coord_y = COORD % MAP_SZ_Y;\ 6 | \ 7 | int X_adj[N_ADJ] = {0, -1, 1, 0};\ 8 | int Y_adj[N_ADJ] = {-1, 0, 0, 1};\ 9 | for(int adj = 0; adj < N_ADJ; adj++){\ 10 | int coord_px = coord_x + X_adj[adj];\ 11 | int coord_py = coord_y + Y_adj[adj];\ 12 | if(coord_py < 0 || coord_py >= MAP_SZ_Y ||\ 13 | coord_px < 0 || coord_px >= MAP_SZ_X)\ 14 | continue;\ 15 | int coord_i = coord_px*MAP_SZ_Y + coord_py;\ 16 | 17 | 18 | __device__ inline int add_adj_to_stack(int16_t coord, int16_t * coord_stack, int coord_stack_sz, 19 | char * checked, char op_player_val, int game_offset, char * board){ 20 | DASSERT(coord >= 0 && coord < MAP_SZ); 21 | 22 | ADJ_LOOP(coord) 23 | if(checked[coord_i]) // already checked 24 | continue; 25 | 26 | if(board[game_offset + coord_i] == 0) return -1; 27 | 28 | // add to stack 29 | if(board[game_offset + coord_i] == op_player_val){ 30 | checked[coord_i] = 1; 31 | coord_stack[coord_stack_sz] = coord_i; 32 | coord_stack_sz ++; 33 | DASSERT(coord_stack_sz < MAP_SZ) 34 | } 35 | } // adj 36 | 37 | return coord_stack_sz; 38 | } 39 | 40 | #define ADD_ADJ_TO_STACK(COORD, PLAYER_VAL) *coord_stack_sz = add_adj_to_stack(COORD, coord_stack, \ 41 | *coord_stack_sz, checked, PLAYER_VAL, game_offset, board); 42 | 43 | #define LIBERTY(COORD, PLAYER_VAL) return_liberty(COORD, PLAYER_VAL, game_offset, board, coord_stack, &coord_stack_sz) 44 | __device__ inline char return_liberty(int16_t coord, char player_val, int game_offset, char * board, 45 | int16_t * coord_stack, int * coord_stack_sz){ 46 | char checked[MAP_SZ]; 47 | 48 | //////////// check if there exists a liberty for the placed stone 49 | *coord_stack_sz = 0; 50 | for(int i = 0; i < MAP_SZ; i++) checked[i] = 0; checked[coord] = 1; 51 | 52 | ADD_ADJ_TO_STACK(coord, player_val) 53 | 54 | for(int stack_i = 0; stack_i < *coord_stack_sz; stack_i++){ 55 | int16_t coord_j = coord_stack[stack_i]; 56 | 57 | DASSERT(coord_j >= 0 && coord_j < MAP_SZ) 58 | DASSERT(board[game_offset + coord_j] == player_val) 59 | 60 | ADD_ADJ_TO_STACK(coord_j, player_val) 61 | 62 | } // stack 63 | 64 | return *coord_stack_sz == -1; 65 | } 66 | 67 | __global__ void move_unit_kernel(int16_t *to_coord, int8_t *moving_player, char * board, int16_t * n_captures, char * moved, char * valid_mv_map_internal){ 68 | int gm = blockIdx.x; 69 | int game_offset = gm * MAP_SZ; 70 | 71 | moved[gm] = 0; 72 | 73 | if(to_coord[gm] < 0 || to_coord[gm] >= MAP_SZ) return; 74 | 75 | DASSERT(*moving_player == 0 || *moving_player == 1); 76 | 77 | GET_PLAYER_VAL 78 | 79 | int16_t coord = to_coord[gm]; 80 | 81 | // position not empty. shouldn't happen? (only when nn is making moves directly frm outputs) 82 | if(board[game_offset + coord] != 0) return; 83 | 84 | ///////////////// check if we have listed this is a valid mv 85 | if(!valid_mv_map_internal[game_offset + coord]) return; // invalid move 86 | 87 | /////////////////////////// 88 | 89 | board[game_offset + coord] = player_val; 90 | 91 | // adj search vars 92 | int16_t coord_stack[MAP_SZ]; 93 | int coord_stack_sz; 94 | 95 | ///////////// check if we should remove stones 96 | char removed_stones = 0; 97 | 98 | ADJ_LOOP(coord) 99 | if(board[game_offset + coord_i] == (-player_val) && 100 | !LIBERTY(coord_i, -player_val)){ 101 | 102 | removed_stones = 1; 103 | DASSERT(board[game_offset + coord_i] == (-player_val)) 104 | board[game_offset + coord_i] = 0; 105 | n_captures[*moving_player*BATCH_SZ + gm] ++; 106 | 107 | for(int stack_i = 0; stack_i < coord_stack_sz; stack_i++){ 108 | int coord_j = coord_stack[stack_i]; 109 | DASSERT(board[game_offset + coord_j] == (-player_val)) 110 | board[game_offset + coord_j] = 0; 111 | n_captures[*moving_player*BATCH_SZ + gm] ++; 112 | 113 | } // stack 114 | 115 | } // opposing player / liberty check 116 | } // adj 117 | 118 | ///////////////// if we've not removed stones, make sure there's a liberty for the placed stone 119 | if(!removed_stones && !LIBERTY(coord, player_val)) 120 | board[game_offset + coord] = 0; 121 | 122 | // surrounded & could not capture 123 | if(board[game_offset + coord] == 0) return; 124 | 125 | moved[gm] = 1; 126 | } 127 | 128 | void move_unit_launcher(int16_t * to_coord, int8_t * moving_player, char * moved){ 129 | REQ_INIT 130 | cudaError_t err; 131 | 132 | BMEM(board_pprev, board_prev, BATCH_MAP_SZ) 133 | BMEM(board_prev, board, BATCH_MAP_SZ) 134 | move_unit_kernel <<< BATCH_SZ, 1 >>> (to_coord, moving_player, board, n_captures, moved, valid_mv_map_internal); 135 | 136 | CHECK_CUDA_ERR 137 | VERIFY_BUFFER_INTEGRITY 138 | } 139 | 140 | -------------------------------------------------------------------------------- /kernels/prob_to_coord.cu: -------------------------------------------------------------------------------- 1 | #define RAND_RES 100000 2 | #define PROB ((float)prob_map[MO + loc] / probs_sum_orig) 3 | 4 | __global__ void prob_to_coord_kernel(half * prob_map, int16_t * to_coord, curandState_t* rand_states){ 5 | int gm = blockIdx.x; 6 | int MO = gm*MAP_SZ; 7 | float rand_val = (float)(curand(&rand_states[gm]) % RAND_RES); 8 | rand_val /= (float)RAND_RES; 9 | 10 | float probs_sum_orig = 0; 11 | MAP_LOOP 12 | probs_sum_orig += (float)prob_map[MO + loc]; 13 | assert(probs_sum_orig >= 0); 14 | 15 | float probs_sum = 0; 16 | MAP_LOOP{ 17 | if(PROB < 0 || PROB > 1) 18 | printf("PROB %f\n", PROB); 19 | //DASSERT(PROB >= 0 && PROB <= 1) 20 | 21 | if((rand_val >= probs_sum) && (rand_val < (probs_sum + PROB))){ 22 | to_coord[gm] = loc; 23 | return; 24 | } 25 | probs_sum += PROB; 26 | } 27 | 28 | to_coord[gm] = -1; 29 | 30 | DASSERT(probs_sum <= 1.01) 31 | DASSERT(probs_sum >= .999) 32 | } 33 | 34 | void prob_to_coord_launcher(float * prob_map, int16_t * to_coord){ 35 | REQ_INIT 36 | cudaError_t err; 37 | 38 | prob_to_coord_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, rand_states); 39 | 40 | CHECK_CUDA_ERR 41 | VERIFY_BUFFER_INTEGRITY 42 | } 43 | 44 | -------------------------------------------------------------------------------- /kernels/prob_to_coord_valid_mvs.cu: -------------------------------------------------------------------------------- 1 | __global__ void prob_to_coord_valid_mvs_kernel(half * prob_map, int16_t * to_coord, 2 | char * board, curandState_t* rand_states, char * valid_mv_map_internal){ 3 | int gm = blockIdx.x; 4 | int gm_offset = gm*MAP_SZ; 5 | half * prob_map_cur = &prob_map[gm_offset]; 6 | 7 | COUNT_VALID 8 | 9 | float rand_val = (float)(curand(&rand_states[gm]) % RAND_RES); 10 | rand_val /= (float)RAND_RES; 11 | 12 | // compute probs sum over valid mvs 13 | float probs_sum_orig = 0; 14 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 15 | int map_loc = valid_mv_inds[mv_ind]; 16 | CHK_VALID_MAP_COORD(map_loc) 17 | DASSERT(board[gm*MAP_SZ + map_loc] == 0) 18 | probs_sum_orig += (float)prob_map_cur[map_loc]; 19 | } 20 | if(probs_sum_orig == 0) probs_sum_orig = 1; 21 | //assert(probs_sum_orig >= 0); 22 | 23 | float probs_sum = 0; 24 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 25 | int16_t map_loc = valid_mv_inds[mv_ind]; 26 | float p = (float)prob_map_cur[map_loc] / probs_sum_orig; 27 | //if(!(p >= 0 && p <= 1)) 28 | // printf("prob err %f\n", p); 29 | //DASSERT(p >= 0 && p <= 1) 30 | 31 | // randomly selected or we're at the last move 32 | if(((rand_val >= probs_sum) && (rand_val < (probs_sum + p))) || 33 | (mv_ind == (n_valid_mvs - 1))){ 34 | to_coord[gm] = map_loc; 35 | return; 36 | } 37 | probs_sum += p; 38 | } 39 | 40 | to_coord[gm] = -1; 41 | //assert(0); 42 | } 43 | 44 | void prob_to_coord_valid_mvs_launcher(float * prob_map, int16_t * to_coord){ 45 | cudaError_t err; 46 | REQ_INIT 47 | 48 | prob_to_coord_valid_mvs_kernel <<< BATCH_SZ, 1 >>> ((half*)prob_map, to_coord, board, rand_states, valid_mv_map_internal); CHECK_CUDA_ERR 49 | 50 | VERIFY_BUFFER_INTEGRITY 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /kernels/return_state.cu: -------------------------------------------------------------------------------- 1 | char return_device_buffers(){ 2 | cudaError_t err; 3 | 4 | err = cudaMemcpy(board_cpu, board, BATCH_MAP_SZ*sizeof(board[0]), cudaMemcpyDeviceToHost); MALLOC_ERR_CHECK 5 | 6 | return 1; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /kernels/return_winner.cu: -------------------------------------------------------------------------------- 1 | __device__ inline char add_blank_adj_to_stack(int16_t coord, int16_t * coord_stack, int *coord_stack_sz, 2 | char * checked, int game_offset, char * board, int * owner){ 3 | DASSERT(coord >= 0 && coord < MAP_SZ); 4 | 5 | ADJ_LOOP(coord) 6 | if(checked[coord_i]) // already checked 7 | continue; 8 | 9 | // touching non-owner, therefore blank space is not owned by owner 10 | if(board[game_offset + coord_i] != 0){ 11 | if(*owner != 0){ 12 | if(*owner != board[game_offset + coord_i]) 13 | return 0; 14 | }else 15 | // set owner 16 | *owner = board[game_offset + coord_i]; 17 | 18 | }else{ // space is empty; add to stack 19 | 20 | checked[coord_i] = 1; 21 | coord_stack[*coord_stack_sz] = coord_i; 22 | *coord_stack_sz = *coord_stack_sz + 1; 23 | 24 | DASSERT(*coord_stack_sz < MAP_SZ) 25 | } 26 | } // adj 27 | 28 | return 1; // blank potentially owned by single player 29 | } 30 | 31 | #define ADD_BLANK_ADJ_TO_STACK(COORD) add_blank_adj_to_stack(COORD, coord_stack, \ 32 | &coord_stack_sz, checked, game_offset, board, &owner); 33 | 34 | 35 | #define SCORE_START (MAP_SZ*2) 36 | #define LARGE_VAL 99999 37 | __global__ void return_winner_kernel(int8_t * winner, char * board, int8_t * moving_player, int16_t * score){ 38 | int32_t game = blockIdx.x; 39 | int16_t coord = threadIdx.x; 40 | int game_offset = game*MAP_SZ; 41 | int gcoord = game_offset + coord; 42 | 43 | GET_PLAYER_VAL 44 | 45 | __shared__ unsigned score_tmp; 46 | if(coord == 0) score_tmp = SCORE_START; 47 | __syncthreads(); 48 | 49 | if(board[gcoord] == player_val) // + 1 50 | atomicInc(&score_tmp, LARGE_VAL); 51 | else if(board[gcoord] == (-player_val)) 52 | atomicDec(&score_tmp, LARGE_VAL); // -1 53 | else{ 54 | // determine ownership of blank 55 | if(board[gcoord] != 0) 56 | printf("gcoord %i playerval %i board %i\n", gcoord, player_val, board[gcoord]); 57 | DASSERT(board[gcoord] == 0) 58 | 59 | int owner = 0; 60 | 61 | // adj search vars 62 | char checked[MAP_SZ]; 63 | int16_t coord_stack[MAP_SZ]; 64 | int coord_stack_sz = 0; 65 | for(int i = 0; i < MAP_SZ; i++) checked[i] = 0; 66 | checked[coord] = 1; 67 | 68 | int space_owned = ADD_BLANK_ADJ_TO_STACK(coord); 69 | 70 | for(int stack_i = 0; space_owned && (stack_i < coord_stack_sz); stack_i++){ 71 | int coord_j = coord_stack[stack_i]; 72 | 73 | DASSERT(coord_j >= 0 && coord_j < MAP_SZ) 74 | DASSERT(board[game_offset + coord_j] == 0) 75 | 76 | space_owned = ADD_BLANK_ADJ_TO_STACK(coord_j); 77 | } 78 | 79 | // add score to winner 80 | if(space_owned && owner != 0){ 81 | if(owner == player_val) 82 | atomicInc(&score_tmp, LARGE_VAL); 83 | else 84 | atomicDec(&score_tmp, LARGE_VAL); 85 | } 86 | 87 | } // empty space 88 | 89 | __syncthreads(); 90 | if(coord != 0) 91 | return; 92 | 93 | score[game] = (int16_t)(score_tmp) - (int16_t)(SCORE_START); 94 | if(score_tmp > SCORE_START) 95 | winner[game] = 1; 96 | else if(score_tmp < SCORE_START) 97 | winner[game] = -1; 98 | else 99 | winner[game] = 0; 100 | 101 | } 102 | 103 | void return_winner_launcher(int8_t * winner, int8_t * moving_player, int16_t * score, int16_t * n_captures_out){ 104 | REQ_INIT 105 | 106 | cudaError_t err; 107 | BMEM(n_captures_out, n_captures, N_PLAYERS*BATCH_SZ) 108 | 109 | return_winner_kernel <<< BATCH_SZ, MAP_SZ >>> (winner, board, moving_player, score); 110 | VERIFY_BUFFER_INTEGRITY 111 | } 112 | -------------------------------------------------------------------------------- /kernels/session_backup.cu.cc: -------------------------------------------------------------------------------- 1 | void session_backup_launcher(){ 2 | REQ_INIT 3 | cudaError_t err; 4 | 5 | BMEM(board2, board, BATCH_MAP_SZ) 6 | BMEM(board_prev2, board_prev, BATCH_MAP_SZ) 7 | BMEM(board_pprev2, board_pprev, BATCH_MAP_SZ) 8 | 9 | BMEM(n_captures2, n_captures, BATCH_SZ) 10 | } 11 | 12 | void session_restore_launcher(){ 13 | REQ_INIT 14 | cudaError_t err; 15 | 16 | RMEM(board2, board, BATCH_MAP_SZ) 17 | RMEM(board_prev2, board_prev, BATCH_MAP_SZ) 18 | RMEM(board_pprev2, board_pprev, BATCH_MAP_SZ) 19 | 20 | RMEM(n_captures2, n_captures, BATCH_SZ) 21 | } 22 | -------------------------------------------------------------------------------- /kernels/vars.cu.cc: -------------------------------------------------------------------------------- 1 | #define CP_MAP(MAP) { if(op == RETURN_VARS)\ 2 | err = cudaMemcpy(outputs, MAP, BATCH_MAP_SZ*sizeof(MAP[0]), cudaMemcpyDeviceToDevice);\ 3 | else\ 4 | err = cudaMemcpy(MAP, outputs, BATCH_MAP_SZ*sizeof(MAP[0]), cudaMemcpyDeviceToDevice);\ 5 | MALLOC_ERR_CHECK} 6 | 7 | #define CP_MAP_DT(MAP, dt) { if(op == RETURN_VARS)\ 8 | err = cudaMemcpy(outputs, MAP, BATCH_MAP_SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\ 9 | else\ 10 | err = cudaMemcpy(MAP, outputs, BATCH_MAP_SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\ 11 | MALLOC_ERR_CHECK} 12 | 13 | #define CP_DT(MAP, SZ, dt) { if(op == RETURN_VARS)\ 14 | err = cudaMemcpy(outputs, MAP, SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\ 15 | else\ 16 | err = cudaMemcpy(MAP, outputs, SZ*sizeof(dt), cudaMemcpyDeviceToDevice);\ 17 | MALLOC_ERR_CHECK} 18 | 19 | 20 | void vars_launcher(int var_idx, void * outputs, char op){ 21 | REQ_INIT 22 | cudaError_t err; 23 | if(var_idx == BOARD_IDX) CP_MAP(board) 24 | else if(var_idx == VALID_MV_MAP_INTERNAL_IDX) CP_MAP(valid_mv_map_internal) 25 | else PANIC("unknown var_idx, return_vars_launcher"); 26 | 27 | } 28 | 29 | -------------------------------------------------------------------------------- /kernels/verify_integrity.cu: -------------------------------------------------------------------------------- 1 | #define ASSERT_S(COND) {if(!(COND)){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1);}} 2 | #ifdef CUDA_DEBUG 3 | #define VERIFY_BUFFER_INTEGRITY {if(verify_buffer_integrity() != 1){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1); }} 4 | //#define VERIFY_BUFFER_INTEGRITY {printf("verifying %s\n", __FILE__); if(verify_buffer_integrity() != 1){ printf("assertion failure %s:%i\n", __FILE__, __LINE__); exit(1); }} 5 | #else 6 | #define VERIFY_BUFFER_INTEGRITY 7 | 8 | #endif 9 | 10 | char verify_buffer_integrity(){ 11 | cudaError_t err = cudaDeviceSynchronize(); CHECK_CUDA_ERR 12 | 13 | if(return_device_buffers() != 1){ 14 | printf("err returnning buffers %s:%i\n", __FILE__, __LINE__); 15 | return 0; 16 | } 17 | 18 | int coord; 19 | for(int game = 0; game < BATCH_SZ; game++){ 20 | ////////////////// map tests 21 | for(int x = 0; x < MAP_SZ_X; x++){ 22 | for(int y = 0; y < MAP_SZ_Y; y++){ 23 | coord = game*MAP_SZ + x*MAP_SZ_Y + y; 24 | 25 | ASSERT_S((board_cpu[coord] == 0) || (board_cpu[coord] == 1) || 26 | (board_cpu[coord] == -1)); 27 | } // y 28 | } // x 29 | 30 | ////////// todo test stones are not surrounded 31 | } 32 | return 1; 33 | } 34 | -------------------------------------------------------------------------------- /models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.data-00000-of-00001 -------------------------------------------------------------------------------- /models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.index -------------------------------------------------------------------------------- /models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy.meta -------------------------------------------------------------------------------- /net_vs_gnugo.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import pygame 4 | import numpy as np 5 | from numpy import sqrt 6 | from pygame.locals import * 7 | import time 8 | import global_vars as gv 9 | import tensorflow as tf 10 | import architectures.tree_tf_op_multi as arch 11 | import gnu_go_test as gt 12 | 13 | ########################################################## configuration: 14 | save_nm = 'models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy' 15 | 16 | # load the following variables from the model .npy file: 17 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA', 18 | 'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 19 | 'N_TURNS', 'CPUCT'] 20 | 21 | save_d = np.load(save_nm, allow_pickle=True).item() 22 | for key in save_vars: 23 | if key == 'save_nm': 24 | continue 25 | exec('%s = save_d["%s"]' % (key,key)) 26 | 27 | ########## over-write number of simulations previously used: 28 | N_SIM = 2000 #500 29 | 30 | net = 'eval32' 31 | #net = 'eval' 32 | #net = 'main' 33 | 34 | run_one_pass_only = True # run only the network (no tree search) 35 | #run_one_pass_only = False # make moves from the tree search 36 | 37 | if run_one_pass_only == False: 38 | import py_util.py_util as pu 39 | 40 | TURN_MIN = 5 # if we are near the max turns the network was trained on (N_TURNS), how much farther do we simulate? 41 | NET_PLAYER = 0 # 0: the network plays first, 1: GNU Go plays first 42 | 43 | ############## load model, init variables 44 | DEVICE = '/gpu:0' 45 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, 46 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, training=False) 47 | 48 | arch.saver.restore(arch.sess, save_nm) 49 | arch.sess.run(arch.init_state) 50 | 51 | visit_count_map = np.zeros((gv.n_rows, gv.n_cols), dtype='int32') 52 | 53 | def ret_d(player): # return dictionary for input into tensor flow 54 | return {arch.moving_player: player} 55 | 56 | def run_sim(turn, starting_player): # simulate game forward 57 | t_start = time.time() 58 | arch.sess.run(arch.session_backup) 59 | pu.session_backup() 60 | 61 | for sim in range(N_SIM): 62 | # backup then make next move 63 | # (this loop, iterates over one full game-play from present turn) 64 | for turn_sim in range(turn, np.max((N_TURNS+1, turn+TURN_MIN))): 65 | for player in [0,1]: 66 | if turn_sim == turn and starting_player == 1 and player == 0: # skip player 0, has already moved 67 | continue 68 | 69 | # get valid moves, network policy and value estimates: 70 | valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[net], arch.val[net]], feed_dict=ret_d(player)) 71 | 72 | # backup visit Q values 73 | if turn_sim != turn: 74 | pu.backup_visit(player, np.array(val, dtype='single')) 75 | 76 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 77 | to_coords = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree) 78 | pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree 79 | 80 | arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars) 81 | 82 | # backup terminal state 83 | winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single') 84 | pu.backup_visit(0, winner) 85 | pu.backup_visit(1, -winner) 86 | 87 | # return move back to previous node in tree 88 | arch.sess.run(arch.session_restore) 89 | pu.session_restore() 90 | 91 | # print progress 92 | if sim % 20 == 0: 93 | print 'simulation: ', sim, ' (%i sec)' % (time.time() - t_start) 94 | 95 | 96 | 97 | ################################# 98 | t_start = time.time() 99 | board = np.zeros((N_TURNS, 2, gv.BATCH_SZ, gv.n_rows, gv.n_cols, gv.n_input_channels), dtype='float16') 100 | winner = np.zeros((N_TURNS, gv.BATCH_SZ), dtype='int') 101 | scores = np.zeros((N_TURNS, gv.BATCH_SZ), dtype='int') 102 | 103 | arch.sess.run(arch.init_state) 104 | if run_one_pass_only == False: 105 | pu.init_tree() 106 | 107 | gt.init_board(arch.sess.run(arch.gm_vars['board'])) 108 | gt.move_nn(np.ones(gv.BATCH_SZ, dtype='int')*-1) # when NET_PLAYER=1, for some reason GnuGo doesn't respond unless we pass the first move 109 | 110 | turn_start_t = time.time() 111 | for turn in range(N_TURNS): 112 | for player in [0,1]: 113 | # network's turn 114 | if player == NET_PLAYER: 115 | 116 | #### make most probable mv, do not use tree search 117 | if run_one_pass_only: 118 | # 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined) 119 | d = ret_d(player) 120 | imgs = arch.sess.run(arch.imgs, feed_dict=d) 121 | d[arch.imgs32] = np.asarray(imgs, dtype='float') 122 | pol = arch.sess.run(arch.pol[net], feed_dict=d) 123 | d[arch.pol['eval']] = pol 124 | 125 | board[turn, player] = imgs 126 | 127 | if turn == 0: # choose in proportion to probability 128 | to_coords = arch.sess.run([arch.nn_prob_to_coords_valid_mvs['eval'], arch.nn_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0] 129 | else: 130 | to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs['eval'], arch.nn_max_prob_move_unit_valid_mvs['eval']], feed_dict=d)[0] 131 | 132 | ##### use tree search 133 | else: 134 | run_sim(turn, player) 135 | 136 | board[turn, player], valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol[net]], feed_dict = ret_d(player)) # generate batch and valid moves 137 | 138 | ######### 139 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 140 | visit_count_map = pu.choose_moves(player, np.array(pol, dtype='single'), CPUCT)[-1] # get number of times each node was visited 141 | 142 | if turn == 0: 143 | to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: player, 144 | arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts 145 | else: 146 | to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: player, 147 | arch.pol[net]: visit_count_map})[0] 148 | 149 | gt.move_nn(to_coords) # tell gnugo where the network moved 150 | 151 | # gnugo's turn 152 | else: 153 | # mv gnugo 154 | board[turn, player], valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict = ret_d(player)) # generate batch and valid moves 155 | 156 | # register valid moves in tree: 157 | if run_one_pass_only == False: 158 | pu.add_valid_mvs(player, valid_mv_map) 159 | 160 | to_coords = gt.move_ai() # get move from gnu go 161 | 162 | # update gpu game state w/ move: 163 | arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: player, arch.nn_max_to_coords['eval']: to_coords}) 164 | 165 | print turn, player 166 | 167 | # register move in tree: 168 | if run_one_pass_only == False: 169 | pu.register_mv(player, np.array(to_coords, dtype='int32')) 170 | 171 | winner[turn], scores[turn] = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER}) 172 | 173 | # prune tree 174 | if run_one_pass_only == False and turn != (N_TURNS-1): 175 | pu.prune_tree(0) # 0: prune all games in batch, 1: prune only first game 176 | 177 | if (turn+1) % 2 == 0: 178 | print 'eval finished turn %i (%i sec)' % (turn, time.time() - turn_start_t) 179 | 180 | 181 | ####### printing 182 | res, score = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER}) 183 | if run_one_pass_only: 184 | match_str = 'network run-once (per turn) mode' 185 | else: 186 | match_str = 'using self-play w/ {} playout batches / turn', N_SIM 187 | 188 | print 'wins', (res == 1).sum(), (res == 1).sum() / 128., 'ties', (res == 0).sum(), 'opp wins', (res == -1).sum(), match_str 189 | 190 | 191 | ######### save results to npy file 192 | fname = '/tmp/' 193 | if run_one_pass_only: 194 | fname += 'test_one_pass_vs_gnu.npy' 195 | else: 196 | fname += 'test_%i_N_SIM_vs_gnu.npy' % N_SIM 197 | print N_SIM 198 | 199 | np.save(fname, {'run_one_pass_only': run_one_pass_only, 'N_SIM': N_SIM, 'board': board, 200 | 'res': res, 'score': score, 'winner': winner, 'scores': scores}) 201 | 202 | -------------------------------------------------------------------------------- /notebooks/go_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_black.png -------------------------------------------------------------------------------- /notebooks/go_blank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_blank.png -------------------------------------------------------------------------------- /notebooks/go_pieces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_pieces.png -------------------------------------------------------------------------------- /notebooks/go_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/notebooks/go_white.png -------------------------------------------------------------------------------- /play_network_gui.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import pygame 4 | import numpy as np 5 | from numpy import sqrt 6 | from pygame.locals import * 7 | import time 8 | from datetime import datetime 9 | import global_vars as gv 10 | import tensorflow as tf 11 | import architectures.tree_tf_op_multi as arch 12 | 13 | ########################################################## configuration: 14 | save_nm = 'models/go_0.2000EPS_7GMSZ_800N_SIM_32N_TURNS_128N_FILTERS_5N_LAYERS_35N_BATCH_SETS_TOTAL_35_N_BATCH_SETS_MIN_5N_REP_TRAIN.npy' 15 | 16 | net = 'eval32' 17 | #net = 'eval' 18 | #net = 'main' 19 | 20 | run_one_pass_only = True # run only the network (no tree search) 21 | #run_one_pass_only = False # make moves from the tree search 22 | 23 | show_txt = False # don't show statistics of each move (Q and P values, visit counts) -- toggle w/ right click after network makes move 24 | 25 | # load the following variables from the model .npy file: 26 | save_vars = ['LSQ_LAMBDA', 'LSQ_REG_LAMBDA', 'POL_CROSS_ENTROP_LAMBDA', 'VAL_LAMBDA', 'VALR_LAMBDA', 'L2_LAMBDA', 27 | 'FILTER_SZS', 'STRIDES', 'N_FILTERS', 'N_FC1', 'EPS', 'MOMENTUM', 'SAVE_FREQ', 'N_SIM', 'N_TURNS', 'CPUCT'] 28 | save_d = np.load(save_nm, allow_pickle=True).item() 29 | for key in save_vars: 30 | if key == 'save_nm': 31 | continue 32 | exec('%s = save_d["%s"]' % (key,key)) 33 | 34 | if run_one_pass_only == False: 35 | import py_util.py_util as pu 36 | 37 | ########## over-write number of simulations previously used: 38 | # (stop self-play when both of these (the next two) conditions is met) 39 | SIM_MIN = 2000 40 | TIME_MIN = 1 # time spent running self-play exceeds this (minutes) 41 | 42 | ### 43 | TURN_MIN = 5 # if we are near the max turns the network was trained on (N_TURNS), how much farther do we simulate? 44 | CPUCT = 1 45 | NET_PLAYER = 0 # 0: the network plays first, 1: you play first 46 | 47 | def human_player(): 48 | global NET_PLAYER 49 | assert NET_PLAYER == 1 or NET_PLAYER == 0 50 | return 1 - NET_PLAYER 51 | 52 | ############################################################################### 53 | save_screenshot_flag = True 54 | 55 | img_sdir = 'go_games_imgs/' 56 | img_sdir += datetime.now().strftime('%Y_%m_%d_%H_%M_%S') 57 | os.system('mkdir ' + img_sdir) 58 | os.system("echo %s > %s/model_location.txt" % (save_nm, img_sdir)) 59 | 60 | ############## load model, init variables 61 | DEVICE = '/gpu:0' 62 | arch.init_model(DEVICE, N_FILTERS, FILTER_SZS, STRIDES, N_FC1, EPS, MOMENTUM, 63 | LSQ_LAMBDA, LSQ_REG_LAMBDA, POL_CROSS_ENTROP_LAMBDA, VAL_LAMBDA, VALR_LAMBDA, L2_LAMBDA, training=False) 64 | 65 | arch.saver.restore(arch.sess, save_nm) 66 | arch.sess.run(arch.init_state) 67 | if run_one_pass_only == False: 68 | pu.init_tree() 69 | 70 | ##### stats to print if show_txt = True 71 | Q_map = np.zeros((gv.n_rows, gv.n_cols), dtype='single') 72 | Q_map_next = np.zeros_like(Q_map) # Q values for the move after the current (assuming you make the move the network predicts you will) 73 | P_map = np.zeros_like(Q_map) 74 | P_map_next = np.zeros_like(Q_map) 75 | visit_count_map = np.zeros((gv.n_rows, gv.n_cols), dtype='int32') 76 | visit_count_map_next = np.zeros_like(visit_count_map) 77 | 78 | t_init = time.time() 79 | 80 | def ret_d(player): # return dictionary for input into tensor flow 81 | return {arch.moving_player: player} 82 | 83 | def ret_stats(player): # return Q map, P map, and visit count maps 84 | pol = np.zeros((gv.BATCH_SZ, gv.map_szt), dtype='float32') 85 | pol[:,0] = 1 86 | Q_map, P_map, visit_count_map = pu.choose_moves(player, pol, CPUCT)[1:] 87 | 88 | Q_map = Q_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0] 89 | P_map = P_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0] 90 | visit_count_map = visit_count_map.reshape((gv.BATCH_SZ, gv.n_rows, gv.n_cols))[0] 91 | 92 | return Q_map, P_map, visit_count_map 93 | 94 | 95 | # move neural network 96 | def nn_mv(): 97 | global Q_map, P_map, visit_count_map, valid_mv_map, pol 98 | global Q_map_next, P_map_next, visit_count_map_next, to_coords 99 | 100 | t_start = time.time() 101 | arch.sess.run(arch.session_backup) 102 | 103 | #### make most probable mv, do not use tree search 104 | if run_one_pass_only: 105 | # 'eval32' movement ops were not defined, so get policy, from network, and then use the ops in 'eval' (where it was defined) 106 | d = ret_d(NET_PLAYER) 107 | imgs = arch.sess.run(arch.imgs, feed_dict=d) 108 | d[arch.imgs32] = np.asarray(imgs, dtype='float') 109 | pol = arch.sess.run(arch.pol[net], feed_dict=d) 110 | d = ret_d(NET_PLAYER) 111 | d[arch.pol['eval']] = pol 112 | 113 | if turn == 0: 114 | arch.sess.run(arch.nn_prob_move_unit_valid_mvs['eval'], feed_dict=d) 115 | else: 116 | arch.sess.run(arch.nn_max_prob_move_unit_valid_mvs['eval'], feed_dict=d) 117 | 118 | #Q_map, P_map, visit_count_map = ret_stats(0) 119 | 120 | ##### use tree search 121 | else: 122 | #pu.init_tree() 123 | pu.session_backup() 124 | 125 | sim = 0 126 | # each loop is one simulation 127 | while True: 128 | if ((time.time() - t_start) > TIME_MIN) and (sim >= SIM_MIN): 129 | break 130 | 131 | # backup then make next move 132 | # (this loop, iterates over one full game-play from present turn) 133 | for turn_sim in range(turn, np.max((N_TURNS+1, turn+TURN_MIN))): 134 | for player in [0,1]: 135 | if turn_sim == turn and human_player() == 0 and player == 0: # skip player 0 (human), has already moved 136 | continue 137 | 138 | # get valid moves, network policy and value estimates: 139 | valid_mv_map, pol, val = arch.sess.run([arch.valid_mv_map, arch.pol[net], arch.val[net]], feed_dict=ret_d(player)) 140 | 141 | # backup visit Q values 142 | if turn_sim != turn: 143 | pu.backup_visit(player, np.array(val, dtype='single')) 144 | 145 | pu.add_valid_mvs(player, valid_mv_map) # register valid moves in tree 146 | to_coords = pu.choose_moves(player, np.array(pol, dtype='float32'), CPUCT)[0] # choose moves based on policy and Q values (latter of which already stored in tree) 147 | 148 | pu.register_mv(player, np.array(to_coords, dtype='int32')) # register move in tree 149 | arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: player, arch.to_coords_input: to_coords}) # move network (update GPU vars) 150 | 151 | # backup terminal state 152 | winner = np.array(arch.sess.run(arch.winner, feed_dict=ret_d(0)), dtype='single') 153 | pu.backup_visit(0, winner) 154 | pu.backup_visit(1, -winner) 155 | 156 | # return move to previous node in tree 157 | arch.sess.run(arch.session_restore) # reset gpu game state 158 | pu.session_restore() # reset cpu tree state 159 | 160 | ###################### 161 | # print stats from tree 162 | if sim % 20 == 0: 163 | # get valid moves, network policy and value estimates: 164 | valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d(NET_PLAYER))[1] 165 | pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # register valid moves in tree 166 | 167 | visit_count_map_128 = pu.choose_moves(NET_PLAYER, np.array(pol, dtype='float32'), CPUCT)[-1] # to feed back into tf (entries for all 128 games, not just 1) 168 | Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER) # stats we will show on screen 169 | 170 | # move network where it is estimates is its best move 171 | to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: NET_PLAYER, 172 | arch.pol[net]: visit_count_map_128})[0] 173 | 174 | pu.register_mv(NET_PLAYER, np.asarray(to_coords, dtype='int32')) # register move in tree 175 | arch.sess.run(arch.move_frm_inputs, feed_dict={arch.moving_player: NET_PLAYER, arch.to_coords_input: to_coords}) # move network (update GPU vars) 176 | 177 | # get network tree estimates as to where it thinks you will move after it moves 178 | valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict=ret_d(human_player()))[1] 179 | pu.add_valid_mvs(human_player(), valid_mv_map) # register valid moves in tree 180 | 181 | Q_map_next, P_map_next, visit_count_map_next = ret_stats(human_player()) 182 | 183 | arch.sess.run(arch.session_restore) # restore prior tf game state 184 | pu.session_restore() # restore prior tree 185 | 186 | draw(True) 187 | pygame.display.set_caption('%i %2.1f' % (sim, time.time() - t_start)) 188 | 189 | print 'simulation: ', sim, ' (%i sec)' % (time.time() - t_start) 190 | 191 | sim += 1 192 | 193 | ### make move 194 | 195 | # first get valid moves and current policy at board position 196 | valid_mv_map, pol = arch.sess.run([arch.imgs, arch.valid_mv_map, arch.pol[net]], feed_dict = ret_d(NET_PLAYER))[1:] 197 | pu.add_valid_mvs(NET_PLAYER, valid_mv_map) # set in tree 198 | 199 | visit_count_map_128 = pu.choose_moves(NET_PLAYER, np.array(pol, dtype='float32'), CPUCT)[-1] # to feed back into tf (entries for all 128 games, not just 1) 200 | Q_map, P_map, visit_count_map = ret_stats(NET_PLAYER) 201 | 202 | # makes moves as if this were still part of the self-play (max visit count) 203 | #to_coords = arch.sess.run([arch.tree_det_visit_coord, arch.tree_det_move_unit], feed_dict={arch.moving_player: 0, 204 | # arch.visit_count_map: visit_count_map})[0] 205 | 206 | # move to max visited node: 207 | #if turn != 0: 208 | to_coords = arch.sess.run([arch.nn_max_prob_to_coords_valid_mvs[net], arch.nn_max_prob_move_unit_valid_mvs[net]], feed_dict={arch.moving_player: NET_PLAYER, 209 | arch.pol[net]: visit_count_map_128})[0] 210 | 211 | # randomly move proportionatly to vist counts 212 | #else: 213 | # to_coords = arch.sess.run([arch.tree_prob_visit_coord, arch.tree_prob_move_unit], feed_dict={arch.moving_player: 0, 214 | # arch.visit_count_map: visit_count_map})[0] # make move in proportion to visit counts 215 | 216 | pu.register_mv(NET_PLAYER, np.array(to_coords, dtype='int32')) 217 | 218 | print 'pruning...' 219 | pu.prune_tree(1) # 0: prune all games in batch, 1: prune only first game 220 | print time.time() - t_start 221 | 222 | print 'finished' 223 | return arch.sess.run(arch.gm_vars['board'])[0] 224 | 225 | def save_screenshot(player): 226 | if save_screenshot_flag == False: 227 | return 228 | 229 | fname = "%s/%i_%i_%s_net_%s_one_pass_%i_ai_player_%i" % (img_sdir, t_init, turn, player, net, run_one_pass_only, NET_PLAYER) 230 | if run_one_pass_only == False: 231 | fname += '_%isims' % SIM_MIN 232 | 233 | pygame.image.save(windowSurface, fname + '.png') 234 | 235 | ##################### display 236 | psz = 50 # size to display pieces 237 | pszh = psz/2. 238 | n_txt_rows = 4 239 | window_sz = (psz*gv.n_rows, psz*gv.n_cols) 240 | 241 | BLACK = (0,)*3 242 | LINE_WIDTH = 2 243 | turn = 0 244 | 245 | windowSurface = pygame.display.set_mode(window_sz, 0, 32) 246 | pygame.display.set_caption('Go GUI') 247 | 248 | pygame.init() 249 | basicFont = pygame.font.SysFont(None, 15) # < font size 250 | 251 | whitep = pygame.image.load('notebooks/go_white.png') 252 | blackp = pygame.image.load('notebooks/go_black.png') 253 | blank = pygame.image.load('notebooks/go_blank.png') 254 | 255 | whitep = pygame.transform.scale(whitep, (psz, psz)) 256 | blackp = pygame.transform.scale(blackp, (psz, psz)) 257 | blank = pygame.transform.scale(blank, window_sz) 258 | 259 | 260 | centers = np.arange(gv.n_rows)*psz + pszh 261 | to_coords_manual = -np.ones(gv.BATCH_SZ, dtype='int32') 262 | 263 | board = np.zeros((gv.n_rows, gv.n_cols), dtype='int8') 264 | 265 | # draw text over partially transparent background 266 | # tcoord is the coordinate, tsz is the size, bgc is the color 267 | def draw_txt(txt, tcoord, tsz, bgc): 268 | txtBgSurface = pygame.Surface(tsz) 269 | txtBgSurface.set_alpha(128) 270 | txtBgSurface.fill(bgc) 271 | windowSurface.blit(txtBgSurface, tcoord) 272 | 273 | fc = [255,255,255] 274 | text = basicFont.render(txt, True, fc) 275 | windowSurface.blit(text, tcoord) 276 | 277 | 278 | 279 | # draw board and optionally text 280 | def draw(update=False): 281 | windowSurface.blit(blank, (0,0)) 282 | 283 | # draw lines 284 | for i in range(gv.n_rows): 285 | pygame.draw.line(windowSurface, BLACK, (0, i*psz + pszh), (window_sz[0], i*psz + pszh), LINE_WIDTH) 286 | pygame.draw.line(windowSurface, BLACK, (i*psz + pszh, 0), (i*psz + pszh, window_sz[1]), LINE_WIDTH) 287 | 288 | # loop over all positions on game board 289 | for i in range(gv.n_rows): 290 | for j in range(gv.n_cols): 291 | coord = np.asarray((i*psz, j*psz)) 292 | # show pieces 293 | if board[i,j] == 1: 294 | windowSurface.blit(blackp, coord) 295 | elif board[i,j] == -1: 296 | windowSurface.blit(whitep, coord) 297 | 298 | ############## 299 | # print tree statistics (for the network's own movement) 300 | if P_map[i,j] != 0 and show_txt: 301 | visit_total = visit_count_map.sum() 302 | rc = np.int(np.min((255, 3*255.*visit_count_map.reshape(gv.map_sz)[i,j] / np.single(visit_total)))) 303 | bgc = [rc, 0, 0] 304 | 305 | # Show Q and P at each location on map 306 | txt = '%1.2f %1.2f' % (Q_map.reshape(gv.map_sz)[i,j], P_map.reshape(gv.map_sz)[i,j]) 307 | tsz = np.asarray(basicFont.size(txt), dtype='single') 308 | tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2]) 309 | draw_txt(txt, tcoord, tsz, bgc) 310 | tsz1 = copy.deepcopy(tsz) 311 | 312 | # Show Q + P, and visit_count_map 313 | txt = '%1.2f %i' % (Q_map.reshape(gv.map_sz)[i,j]+P_map.reshape(gv.map_sz)[i,j], visit_count_map.reshape(gv.map_sz)[i,j]) 314 | tsz = np.asarray(basicFont.size(txt), dtype='single') 315 | tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2]) 316 | tcoord[1] += tsz1[1] 317 | draw_txt(txt, tcoord, tsz, bgc) 318 | tsz2 = copy.deepcopy(tsz) 319 | else: 320 | tsz1 = tsz2 = [0,0] 321 | 322 | ############### 323 | # print tree statistics (where the network estimates *you* will play) 324 | if P_map_next[i,j] and show_txt: 325 | visit_total = visit_count_map_next.sum() 326 | rc = np.int(np.min((255, 3*255.*visit_count_map_next.reshape(gv.map_sz)[i,j] / np.single(visit_total)))) 327 | bgc = [0, rc, 0] 328 | fc = [255,255,255] 329 | 330 | # Show Q and P at each location on map 331 | txt = '%1.2f %1.2f' % (Q_map_next.reshape(gv.map_sz)[i,j], P_map_next.reshape(gv.map_sz)[i,j]) 332 | tsz = np.asarray(basicFont.size(txt), dtype='single') 333 | tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2]) 334 | tcoord[1] += tsz1[1] + tsz2[1] 335 | draw_txt(txt, tcoord, tsz, bgc) 336 | tsz3 = copy.deepcopy(tsz) 337 | 338 | # Show Q + P, and visit_count_map 339 | txt = '%1.2f %i' % (Q_map_next.reshape(gv.map_sz)[i,j]+P_map_next.reshape(gv.map_sz)[i,j], visit_count_map_next.reshape(gv.map_sz)[i,j]) 340 | tsz = np.asarray(basicFont.size(txt), dtype='single') 341 | tcoord = coord + pszh - np.asarray([tsz[0]/2., n_txt_rows*tsz[1]/2]) 342 | tcoord[1] += tsz1[1] + tsz2[1] + tsz3[1] 343 | draw_txt(txt, tcoord, tsz, bgc) 344 | 345 | 346 | if update: 347 | pygame.display.update() 348 | 349 | draw(update=True) 350 | 351 | if NET_PLAYER == 0: # network makes first move 352 | board = nn_mv() 353 | draw(update=True) 354 | save_screenshot('b') 355 | 356 | #pygame.mixer.music.load('/home/tapa/gtr-nylon22.mp3') 357 | 358 | while True: 359 | event = pygame.event.wait() 360 | 361 | # move player, then move network 362 | if event.type == MOUSEBUTTONUP: 363 | 364 | # if right button pressed, toggle showing tree stats 365 | if event.button == 3: 366 | show_txt = not show_txt 367 | draw(update=True) 368 | continue 369 | 370 | # get player move from cursor 371 | mouse_pos = np.asarray(event.pos) 372 | x = np.argmin((mouse_pos[0] - centers)**2) 373 | y = np.argmin((mouse_pos[1] - centers)**2) 374 | 375 | to_coords_manual[0] = x*gv.n_cols + y 376 | 377 | board_prev = arch.sess.run(arch.gm_vars['board'])[0] 378 | 379 | imgs, valid_mv_map = arch.sess.run([arch.imgs, arch.valid_mv_map], feed_dict={arch.moving_player: human_player()}) 380 | 381 | # make move for player 382 | arch.sess.run(arch.nn_max_move_unit['eval'], feed_dict={arch.moving_player: human_player(), arch.nn_max_to_coords['eval']: to_coords_manual}) 383 | 384 | # valid? 385 | board = arch.sess.run(arch.gm_vars['board'])[0] 386 | if board_prev.sum() == board.sum(): # invalid move 387 | print 'invalid mv' 388 | continue 389 | 390 | # register in tree if not in one-pass-only mode 391 | if run_one_pass_only == False: 392 | pu.add_valid_mvs(human_player(), valid_mv_map) # register valid moves in tree 393 | pu.register_mv(human_player(), to_coords_manual) 394 | 395 | win_tmp, score_tmp = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: human_player()}) 396 | print 'you: turn %i, winner %i, score %i' % (turn, win_tmp[0], score_tmp[0]) 397 | 398 | draw(update=True) 399 | save_screenshot('w') 400 | 401 | # network makes move 402 | board = nn_mv() 403 | draw(update=True) 404 | turn += 1 405 | save_screenshot('b') 406 | 407 | if run_one_pass_only == False: 408 | pygame.mixer.music.play() 409 | 410 | win_tmp, score_tmp = arch.sess.run([arch.winner, arch.score], feed_dict={arch.moving_player: NET_PLAYER}) 411 | print 'network: turn %i, winner %i, score %i' % (turn, win_tmp[0], score_tmp[0]) 412 | 413 | if event.type == QUIT: 414 | pygame.display.quit() 415 | break 416 | 417 | -------------------------------------------------------------------------------- /py_util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cody2007/alpha_go_zero_implementation/50cb9e54ace2fedd96ca1861ce5260c33d354074/py_util/__init__.py -------------------------------------------------------------------------------- /py_util/_py_util.c: -------------------------------------------------------------------------------- 1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 2 | #include "includes.h" 3 | 4 | #include "rotate_reflect_imgs.c" 5 | #include "init_tree.c" 6 | #include "add_valid_mvs.c" 7 | #include "register_mv.c" 8 | #include "backup_visit.c" 9 | #include "prune_tree.c" 10 | #include "choose_moves.c" 11 | #include "session_backup.c" 12 | #include "return_tree.c" 13 | 14 | static PyMethodDef py_util[] = { 15 | {"rotate_reflect_imgs", rotate_reflect_imgs, METH_VARARGS}, 16 | {"init_tree", init_tree, METH_VARARGS}, 17 | {"add_valid_mvs", add_valid_mvs, METH_VARARGS}, 18 | {"register_mv", register_mv, METH_VARARGS}, 19 | {"backup_visit", backup_visit, METH_VARARGS}, 20 | {"prune_tree", prune_tree, METH_VARARGS}, 21 | {"choose_moves", choose_moves, METH_VARARGS}, 22 | {"session_backup", session_backup, METH_VARARGS}, 23 | {"session_restore", session_restore, METH_VARARGS}, 24 | {"return_tree", return_tree, METH_VARARGS}, 25 | 26 | {NULL, NULL} 27 | }; 28 | 29 | #if defined(_WIN32) || defined(_WIN64) 30 | extern "C" void _declspec(dllexport) init_py_util(){ 31 | #else 32 | extern void init_py_util(){ 33 | #endif 34 | srand(time(NULL)); 35 | 36 | (void) Py_InitModule("_py_util", py_util); 37 | import_array(); 38 | 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /py_util/add_valid_mvs.c: -------------------------------------------------------------------------------- 1 | static PyObject *add_valid_mvs(PyObject *self, PyObject *args){ 2 | PyArrayObject *valid_mv_map_np; 3 | int moving_player; 4 | char * valid_mv_map; 5 | 6 | if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &valid_mv_map_np)) return NULL; 7 | 8 | /////////////////////// check inputs 9 | ASSERT(moving_player == 0 || moving_player == 1, "moving player incorrect") 10 | ASSERT(valid_mv_map_np != NULL, "absent inputs") 11 | ASSERT(PyArray_TYPE(valid_mv_map_np) == NPY_INT8, "data type incorrect") 12 | ASSERT(PyArray_NDIM(valid_mv_map_np) == 3, "dims incorrect") 13 | ASSERT(PyArray_STRIDE(valid_mv_map_np, 2) == sizeof(valid_mv_map[0]), "data not contigious or C-order") 14 | 15 | npy_intp * dims_in = PyArray_DIMS(valid_mv_map_np); 16 | 17 | ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect") 18 | ASSERT(dims_in[1] == MAP_SZ_X, "map sz incorrect") 19 | ASSERT(dims_in[2] == MAP_SZ_Y, "map sz incorrect") 20 | 21 | valid_mv_map = (char *) PyArray_DATA(valid_mv_map_np); 22 | 23 | //////////////////////////// 24 | for(int gm = 0; gm < BATCH_SZ; gm++){ 25 | int TO; 26 | int game_offset = gm*MAP_SZ; 27 | 28 | #ifdef CUDA_DEBUG 29 | if(tree_sz[gm] >= TREE_BUFFER_SZ) 30 | printf("tree_sz[%i] = %i tree_start %i\n", gm, tree_sz[gm], tree_start[gm]); 31 | if(tree_start[gm] < 0 || tree_start[gm] >= tree_sz[gm]) 32 | printf("tree_sz[%i] = %i tree_start %i\n", gm, tree_sz[gm], tree_start[gm]); 33 | #endif 34 | 35 | int t_ind = tree_start[gm]; TO_FRM_T_IND 36 | 37 | // already created valid moves leaves: 38 | if(tree_list_start[TO] != -1){ 39 | DASSERT(tree_player[TO] == moving_player); 40 | #ifdef CUDA_DEBUG 41 | int n_valid_mvs_chk = 1; 42 | for(int map_coord = 0; map_coord < MAP_SZ; map_coord++){ 43 | int gcoord = game_offset + map_coord; 44 | if(valid_mv_map[gcoord]) n_valid_mvs_chk ++; 45 | } 46 | if(n_valid_mvs_chk != tree_list_sz[TO]){ 47 | printf("skipping %i moving_player %i n_valid_mvs_chk %i tree_list_sz %i\n", gm, moving_player, 48 | n_valid_mvs_chk, tree_list_sz[TO]); 49 | DASSERT(0) 50 | } 51 | #endif 52 | continue; 53 | } 54 | 55 | tree_player[TO] = moving_player; 56 | tree_list_start[TO] = list_sz[gm]; 57 | tree_list_sz[TO] = 0; 58 | 59 | DASSERT(list_sz[gm] < MV_BUFFER_SZ); 60 | 61 | #define LOE (gm*MV_BUFFER_SZ + list_sz[gm]) 62 | #define ADD_MV(COORD) { list_valid_mv_inds[LOE] = COORD;\ 63 | list_valid_tree_inds[LOE] = -1;\ 64 | list_q_total[LOE] = 0;\ 65 | list_visit_count[LOE] = 0;\ 66 | list_prob[LOE] = -1;\ 67 | tree_list_sz[TO] ++;\ 68 | list_sz[gm] ++;\ 69 | assert(list_sz[gm] < MV_BUFFER_SZ); } 70 | 71 | ADD_MV(-1) // pass move entry 72 | 73 | for(int map_coord = 0; map_coord < MAP_SZ; map_coord++){ 74 | int gcoord = game_offset + map_coord; 75 | if(!valid_mv_map[gcoord]) continue; 76 | 77 | ADD_MV(map_coord) 78 | } // map loop 79 | } // gm 80 | 81 | Py_RETURN_NONE; 82 | } 83 | 84 | -------------------------------------------------------------------------------- /py_util/backup_visit.c: -------------------------------------------------------------------------------- 1 | static PyObject *backup_visit(PyObject *self, PyObject *args){ 2 | PyArrayObject * q_np; 3 | float * q; 4 | int moving_player; 5 | 6 | if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &q_np)) return NULL; 7 | 8 | /////////////////// check inputs 9 | ASSERT(q_np != NULL, "absent inputs") 10 | ASSERT(PyArray_TYPE(q_np) == NPY_FLOAT32, "data type incorrect") 11 | ASSERT(PyArray_NDIM(q_np) == 1, "dims must be 1") 12 | ASSERT(PyArray_STRIDE(q_np, 0) == sizeof(q[0]), "data not contigious or C-order") 13 | ASSERT(moving_player == 0 || moving_player == 1, "moving_player incorrect") 14 | 15 | npy_intp * dims_in = PyArray_DIMS(q_np); 16 | 17 | ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect") 18 | 19 | q = (float *) PyArray_DATA(q_np); 20 | 21 | ///////////////////////// 22 | 23 | for(int gm = 0; gm < BATCH_SZ; gm++){ 24 | 25 | // tree ind 26 | int TO, LO; 27 | int t_ind = tree_start[gm]; TO_FRM_T_IND 28 | 29 | while(1){ 30 | int t_ind_prev = t_ind; 31 | if(tree_parent[TO] == -1) // tree root 32 | break; 33 | 34 | // inds 35 | t_ind = tree_parent[TO]; TO_FRM_T_IND 36 | int l_ind = tree_list_start[TO]; LO_FRM_L_IND 37 | int n_valid_mvs = tree_list_sz[TO]; CHK_N_VALID_MVS 38 | 39 | // find list index for previous tree ind 40 | char found = 0; 41 | int LOC; 42 | for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){ 43 | LOC = LO + mv_ind; 44 | if(list_valid_tree_inds[LOC] != t_ind_prev) continue; 45 | 46 | found = 1; 47 | break; 48 | } 49 | assert(found != 0); 50 | 51 | if(tree_player[TO] == moving_player){ 52 | list_visit_count[LOC] ++; 53 | DASSERT((powf(2, 8*sizeof(list_visit_count[0])) - 3) > (float)list_visit_count[LOC]) // overflow check 54 | list_q_total[LOC] += q[gm]; 55 | } 56 | 57 | } 58 | } // gm 59 | 60 | Py_RETURN_NONE; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /py_util/build.sh: -------------------------------------------------------------------------------- 1 | gcc _py_util.c -fPIC -O3 -I/usr/include/python2.7 -I/usr/include/numpy -lpython2.7 -shared -o _py_util.so -Wall 2 | 3 | -------------------------------------------------------------------------------- /py_util/build_centos.sh: -------------------------------------------------------------------------------- 1 | gcc _py_util.c -fPIC -O3 -I/usr/include/python2.7 -I/usr/include/numpy -I/usr/lib64/python2.7/site-packages/numpy/core/include/numpy -lpython2.7 -shared -o _py_util.so -Wall 2 | 3 | -------------------------------------------------------------------------------- /py_util/choose_moves.c: -------------------------------------------------------------------------------- 1 | // choose maps based on tree search 2 | 3 | /* .Input("moving_player: int32") // [1] 4 | .Input("pol: float") // map, network's estimted probs 5 | .Input("CPUCT: float") // [1] 6 | 7 | .Output("to_coords: int32") // [BATCH_SZ] 8 | .Output("Q_map: float") // map 9 | .Output("P_map: float") // map 10 | .Output("visit_count_map: float") // map 11 | */ 12 | static PyObject *choose_moves(PyObject *self, PyObject *args){ 13 | PyArrayObject *pol_np; 14 | float * pol, CPUCT; 15 | int moving_player; 16 | 17 | if(!PyArg_ParseTuple(args, "iO!f", &moving_player, &PyArray_Type, &pol_np, &CPUCT)) return NULL; 18 | 19 | /////////////////////// check inputs 20 | ASSERT(pol_np != NULL, "absent inputs") 21 | ASSERT(PyArray_TYPE(pol_np) == NPY_FLOAT32, "data type incorrect") 22 | ASSERT(PyArray_NDIM(pol_np) == 2, "dims must be 2") 23 | ASSERT(PyArray_STRIDE(pol_np, 1) == sizeof(pol[0]), "data not contigious or C-order") 24 | 25 | npy_intp * dims_in = PyArray_DIMS(pol_np); 26 | 27 | ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect") 28 | ASSERT(dims_in[1] == (MAP_SZ_X*MAP_SZ_Y), "map sz incorrect") 29 | 30 | pol = (float*) PyArray_DATA((PyArrayObject*) pol_np); 31 | 32 | ///// output 33 | npy_intp dims[4]; 34 | dims[0] = BATCH_SZ; 35 | dims[1] = MAP_SZ_X; 36 | dims[2] = MAP_SZ_Y; 37 | 38 | PyObject * to_coords_np = PyArray_SimpleNew(1, dims, NPY_INT32); 39 | PyObject * Q_map_np = PyArray_SimpleNew(3, dims, NPY_FLOAT32); 40 | PyObject * P_map_np = PyArray_SimpleNew(3, dims, NPY_FLOAT32); 41 | 42 | dims[1] = MAP_SZ_X*MAP_SZ_Y; 43 | PyObject * visit_count_map_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32); 44 | 45 | int * to_coords = (int *) PyArray_DATA((PyArrayObject*) to_coords_np); 46 | float * Q_map = (float *) PyArray_DATA((PyArrayObject*) Q_map_np); 47 | float * P_map = (float *) PyArray_DATA((PyArrayObject*) P_map_np); 48 | float * visit_count_map = (float *) PyArray_DATA((PyArrayObject*) visit_count_map_np); 49 | 50 | ////////////////////////////////////// 51 | for(int gm = 0; gm < BATCH_SZ; gm++){ 52 | 53 | ////// init 54 | MAP_LOOP{ 55 | int MO = gm*MAP_SZ + loc; 56 | P_map[MO] = 0; 57 | Q_map[MO] = 0; 58 | visit_count_map[MO] = 0; 59 | } 60 | 61 | CUR_TREE_INDS 62 | 63 | // pass move only valid move 64 | if(n_valid_mvs == 1){ 65 | to_coords[gm] = -1; 66 | continue; 67 | } 68 | 69 | #define LOC_AND_MO int LOC = LO + mv_ind;\ 70 | int map_loc = list_valid_mv_inds[LOC];\ 71 | DASSERT(map_loc >= 0 && map_loc < MAP_SZ);\ 72 | int MO = gm*MAP_SZ + map_loc; 73 | 74 | /////////// sum all valid probs 75 | float prob_sum = 0; 76 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 77 | LOC_AND_MO 78 | prob_sum += pol[MO]; 79 | } 80 | 81 | //////////// set prob value, compute tmp sums of Q & P 82 | int visit_sum = 0; // across mvs 83 | 84 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 85 | LOC_AND_MO 86 | 87 | // init move prob 88 | if(list_prob[LOC] == -1) 89 | list_prob[LOC] = pol[MO] / prob_sum; 90 | 91 | int visit_count_tmp = list_visit_count[LOC]; 92 | if(visit_count_tmp == 0) visit_count_tmp = 1; 93 | 94 | // set maps 95 | Q_map[MO] = list_q_total[LOC] / visit_count_tmp; 96 | P_map[MO] = (CPUCT * list_prob[LOC]) / (1. + list_visit_count[LOC]); 97 | 98 | visit_sum += list_visit_count[LOC]; 99 | 100 | visit_count_map[MO] = list_visit_count[LOC]; 101 | } 102 | 103 | // compute U for each action, select max action 104 | float U_max = 0; 105 | int mv_ind_max = -1; 106 | float visit_sum_sqrt = sqrtf(visit_sum); 107 | for(int mv_ind = 1; mv_ind < n_valid_mvs; mv_ind++){ // skip pass move 108 | LOC_AND_MO 109 | P_map[MO] *= visit_sum_sqrt; 110 | 111 | float U_tmp = Q_map[MO] + P_map[MO]; 112 | if((U_max < U_tmp) || (mv_ind_max == -1)){ 113 | mv_ind_max = mv_ind; 114 | U_max = U_tmp; 115 | } 116 | } 117 | 118 | // set to_coords 119 | int LOC = LO + mv_ind_max; 120 | int map_loc = list_valid_mv_inds[LOC]; 121 | DASSERT(map_loc >= 0 && map_loc < MAP_SZ); 122 | to_coords[gm] = map_loc; 123 | 124 | } // gm 125 | 126 | /////////// return 127 | PyObject * ret = PyList_New(4); 128 | ASSERT(ret != 0, "err creating output list") 129 | 130 | ASSERT(PyList_SetItem(ret, 0, to_coords_np) == 0, "failed setting item"); 131 | ASSERT(PyList_SetItem(ret, 1, Q_map_np) == 0, "failed setting item"); 132 | ASSERT(PyList_SetItem(ret, 2, P_map_np) == 0, "failed setting item"); 133 | ASSERT(PyList_SetItem(ret, 3, visit_count_map_np) == 0, "failed setting item"); 134 | 135 | return ret; 136 | } 137 | 138 | -------------------------------------------------------------------------------- /py_util/includes.h: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "arrayobject.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "../includes.h" 10 | 11 | #ifdef CUDA_DEBUG 12 | #define DASSERT(A) ASSERT(A, "assertion error") 13 | #else 14 | #define DASSERT(A) 15 | #endif 16 | 17 | #define TREE_BUFFER_SZ 500000 // 250000 //190000 18 | //#define MV_BUFFER_SZ 7000000 19 | #define MV_BUFFER_SZ 8000000 20 | 21 | //5000000 //4500000 //4250000 //4000000 // 3760000 22 | 23 | 24 | //#define TREE_BUFFER_SZ 19000//0 //70000//(1 200 000)//(800000)//*600000*2) 25 | //#define MV_BUFFER_SZ 276000//0 //2 000 000 //1200000 //900000 //TREE_BUFFER_SZ 26 | 27 | #define BMEM(A, B, SZ) memcpy(A, B, SZ*sizeof(A[0])); 28 | #define BMEM2(A, B, SZ) memcpy(A, B, SZ*sizeof(B[0])); 29 | 30 | //////////////////// tree 31 | // create_batch: creates leaves (ex. list_valid_mv_inds) 32 | // choose_moves: sets list_prob 33 | // backup_visit: sets list_q_total 34 | // move_unit: increments visit count, creates new tree node, sets tree_parent 35 | 36 | ////// node information: 37 | unsigned tree_sz[BATCH_SZ]; 38 | unsigned tree_start[BATCH_SZ], tree_start2[BATCH_SZ]; // tree_sz2: for session backup/restoration 39 | 40 | #define B_TREE_SZ (BATCH_SZ * TREE_BUFFER_SZ) 41 | char tree_player[B_TREE_SZ], tree_player_back[TREE_BUFFER_SZ]; 42 | int tree_parent[B_TREE_SZ], tree_parent_back[TREE_BUFFER_SZ]; 43 | 44 | // start index for list_valid_mv_inds, list_valid_tree_inds: 45 | int tree_list_sz[B_TREE_SZ], tree_list_sz_back[TREE_BUFFER_SZ]; 46 | int tree_list_start[B_TREE_SZ], tree_list_start_back[TREE_BUFFER_SZ]; 47 | 48 | ////// lists (leaf information) 49 | #define B_MV_SZ (BATCH_SZ * MV_BUFFER_SZ) 50 | unsigned list_sz[BATCH_SZ]; 51 | short list_valid_mv_inds[B_MV_SZ], list_valid_mv_inds_back[MV_BUFFER_SZ]; // (first entry is always the pass mv) 52 | int list_valid_tree_inds[B_MV_SZ], list_valid_tree_inds_back[MV_BUFFER_SZ]; 53 | float list_q_total[B_MV_SZ], list_q_total_back[MV_BUFFER_SZ]; 54 | float list_prob[B_MV_SZ], list_prob_back[MV_BUFFER_SZ]; 55 | unsigned list_visit_count[B_MV_SZ], list_visit_count_back[MV_BUFFER_SZ]; 56 | 57 | // used in prune tree: 58 | unsigned tree_cp_old_stack[TREE_BUFFER_SZ], tree_cp_new_stack[TREE_BUFFER_SZ]; // prune_tree, tree inds to cp 59 | 60 | //////////////////////////////////////// 61 | #define CHK_T_IND DASSERT(tree_sz[gm] < TREE_BUFFER_SZ);\ 62 | DASSERT(t_ind >= 0 && t_ind < tree_sz[gm]); 63 | 64 | #define CHK_L_IND DASSERT(list_sz[gm] < MV_BUFFER_SZ);\ 65 | DASSERT(l_ind >= 0)\ 66 | DASSERT(l_ind < list_sz[gm]) 67 | 68 | 69 | #define CHK_N_VALID_MVS DASSERT(n_valid_mvs > 0 && n_valid_mvs <= (MAP_SZ+1));\ 70 | DASSERT( (n_valid_mvs + tree_list_start[TO]) <= list_sz[gm]); 71 | 72 | #define TO_FRM_T_IND CHK_T_IND; TO = gm*TREE_BUFFER_SZ + t_ind; 73 | #define LO_FRM_L_IND CHK_L_IND; LO = gm*MV_BUFFER_SZ + l_ind; 74 | 75 | #define CUR_TREE_INDS_WO_MV_CHK int TO, LO;\ 76 | int t_ind = tree_start[gm]; TO_FRM_T_IND\ 77 | int l_ind = tree_list_start[TO]; LO_FRM_L_IND\ 78 | int n_valid_mvs = tree_list_sz[TO]; 79 | 80 | #define CUR_TREE_INDS CUR_TREE_INDS_WO_MV_CHK \ 81 | CHK_N_VALID_MVS 82 | 83 | 84 | -------------------------------------------------------------------------------- /py_util/init_tree.c: -------------------------------------------------------------------------------- 1 | #define ZERO(A, S) memset(A, 0, (S)*sizeof(A[0])); 2 | 3 | /*void init_vecs(){ 4 | 5 | ZERO(tree_player, BATCH_SZ * TREE_BUFFER_SZ) 6 | ZERO(list_q_total, BATCH_SZ * MV_BUFFER_SZ) 7 | ZERO(list_visit_count, BATCH_SZ * MV_BUFFER_SZ) 8 | 9 | 10 | 11 | for(int i = 0; i < (BATCH_SZ*TREE_BUFFER_SZ); i++){ 12 | tree_parent[i] = -1; 13 | tree_list_start[i] = -1; 14 | tree_list_sz[i] = -1; 15 | } 16 | 17 | for(int i = 0; i < (BATCH_SZ*MV_BUFFER_SZ); i++){ 18 | list_valid_mv_inds[i] = -1; 19 | list_valid_tree_inds[i] = -1; 20 | list_prob[i] = -1; 21 | } 22 | }*/ 23 | 24 | static PyObject *init_tree(PyObject *self, PyObject *args){ 25 | ZERO(tree_start, BATCH_SZ) 26 | ZERO(list_sz, BATCH_SZ) 27 | 28 | for(int i = 0; i < BATCH_SZ; i++){ 29 | tree_sz[i] = 1; 30 | tree_list_start[i*TREE_BUFFER_SZ] = -1; 31 | tree_parent[i*TREE_BUFFER_SZ] = -1; 32 | } 33 | 34 | //init_vecs(); 35 | 36 | Py_RETURN_NONE; 37 | } 38 | 39 | -------------------------------------------------------------------------------- /py_util/prune_tree.c: -------------------------------------------------------------------------------- 1 | #define ADD_NODE_TO_STACK(NODE) \ 2 | tree_cp_old_stack[stack_sz] = NODE;\ 3 | tree_cp_new_stack[stack_sz] = tree_sz_back;\ 4 | \ 5 | stack_sz ++;\ 6 | tree_sz_back ++;\ 7 | DASSERT(stack_sz < TREE_BUFFER_SZ)\ 8 | DASSERT(tree_sz_back < TREE_BUFFER_SZ)\ 9 | DASSERT(tree_sz_back <= tree_sz[gm]) 10 | 11 | #define CHK_PREV_TREE_IND(IND, SZ) DASSERT((int)IND >= 0 && IND < SZ) 12 | 13 | static PyObject *prune_tree(PyObject *self, PyObject *args){ 14 | int single_game; 15 | 16 | if(!PyArg_ParseTuple(args, "i", &single_game)) return NULL; 17 | 18 | // only prune first game, reset everything else 19 | int games_loop = BATCH_SZ; 20 | if(single_game == 1){ 21 | games_loop = 1; 22 | 23 | memset(&tree_start[1], 0, sizeof(tree_start[0])*(BATCH_SZ-1)); 24 | memset(&list_sz[1], 0, sizeof(list_sz[0])*(BATCH_SZ-1)); 25 | 26 | for(int i = 1; i < BATCH_SZ; i++){ 27 | tree_sz[i] = 1; 28 | tree_list_start[i*TREE_BUFFER_SZ] = -1; 29 | tree_parent[i*TREE_BUFFER_SZ] = -1; 30 | } 31 | } 32 | 33 | for(int gm = 0; gm < games_loop; gm++){ 34 | int TOFF = gm*TREE_BUFFER_SZ; 35 | int LOFF = gm*MV_BUFFER_SZ; 36 | 37 | int stack_sz = 0; 38 | int tree_sz_back = 0; 39 | int list_sz_back = 0; 40 | 41 | DASSERT(tree_sz[gm] < TREE_BUFFER_SZ) 42 | DASSERT(list_sz[gm] < MV_BUFFER_SZ) 43 | 44 | ////////////////// 45 | // start from tree_start[gm] and mv forward keeping all leaves 46 | ADD_NODE_TO_STACK(tree_start[gm]) 47 | 48 | for(int stack_loc = 0; stack_loc < stack_sz; stack_loc++){ 49 | CHK_PREV_TREE_IND(tree_cp_new_stack[stack_loc], tree_sz_back) 50 | CHK_PREV_TREE_IND(tree_cp_old_stack[stack_loc], tree_sz[gm]) 51 | 52 | int TO_NEW = tree_cp_new_stack[stack_loc]; 53 | int TO = TOFF + tree_cp_old_stack[stack_loc]; 54 | 55 | ///////////// cp node 56 | tree_player_back[TO_NEW] = tree_player[TO]; 57 | tree_list_sz_back[TO_NEW] = tree_list_sz[TO]; 58 | 59 | if(tree_list_start[TO] != -1) // new list slot 60 | tree_list_start_back[TO_NEW] = list_sz_back; 61 | else 62 | tree_list_start_back[TO_NEW] = -1; 63 | 64 | /////////////////////// set tree_parent 65 | 66 | // parent of new root is non-existant 67 | if(tree_cp_old_stack[stack_loc] == tree_start[gm]){ 68 | tree_parent_back[TO_NEW] = -1; 69 | }else{ 70 | // find new tree_parent index 71 | char found = 0; int stack_loc_j; 72 | for(stack_loc_j = 0; stack_loc_j < stack_sz; stack_loc_j++){ 73 | if(tree_cp_old_stack[stack_loc_j] != tree_parent[TO]) 74 | continue; 75 | found = 1; 76 | break; 77 | } 78 | assert(found == 1); 79 | 80 | tree_parent_back[TO_NEW] = tree_cp_new_stack[stack_loc_j]; 81 | } 82 | 83 | /////////////// cp list 84 | DASSERT(tree_list_sz[TO] <= (MAP_SZ+1)) 85 | DASSERT(tree_list_sz[TO] >= 0) 86 | DASSERT((tree_list_start_back[TO_NEW] >= 0 && tree_list_start_back[TO_NEW] <= list_sz_back) || tree_list_start_back[TO_NEW] == -1) 87 | 88 | for(int mv_ind = 0; mv_ind < tree_list_sz[TO]; mv_ind++){ 89 | int LO = LOFF + tree_list_start[TO] + mv_ind; 90 | int LO_NEW = tree_list_start_back[TO_NEW] + mv_ind; 91 | 92 | // cp list 93 | list_valid_mv_inds_back[LO_NEW] = list_valid_mv_inds[LO]; 94 | list_q_total_back[LO_NEW] = list_q_total[LO]; 95 | list_prob_back[LO_NEW] = list_prob[LO]; 96 | list_visit_count_back[LO_NEW] = list_visit_count[LO]; 97 | 98 | // tree node to copy 99 | if(list_valid_tree_inds[LO] != -1){ 100 | list_valid_tree_inds_back[LO_NEW] = tree_sz_back; 101 | 102 | ADD_NODE_TO_STACK(list_valid_tree_inds[LO]) 103 | }else 104 | list_valid_tree_inds_back[LO_NEW] = -1; 105 | 106 | list_sz_back ++; 107 | DASSERT(list_sz_back <= list_sz[gm]) 108 | } 109 | } 110 | 111 | /////////// copy over 112 | tree_start[gm] = 0; 113 | tree_sz[gm] = tree_sz_back; 114 | list_sz[gm] = list_sz_back; 115 | 116 | DASSERT((tree_sz[gm] < TREE_BUFFER_SZ) && (tree_sz[gm] > 0)) 117 | BMEM2(&tree_player[TOFF], tree_player_back, tree_sz[gm]) 118 | BMEM2(&tree_parent[TOFF], tree_parent_back, tree_sz[gm]) 119 | BMEM2(&tree_list_sz[TOFF], tree_list_sz_back, tree_sz[gm]) 120 | BMEM2(&tree_list_start[TOFF], tree_list_start_back, tree_sz[gm]) 121 | 122 | DASSERT(list_sz[gm] < MV_BUFFER_SZ) 123 | BMEM2(&list_valid_mv_inds[LOFF], list_valid_mv_inds_back, list_sz[gm]) 124 | BMEM2(&list_valid_tree_inds[LOFF], list_valid_tree_inds_back, list_sz[gm]) 125 | BMEM2(&list_q_total[LOFF], list_q_total_back, list_sz[gm]) 126 | BMEM2(&list_prob[LOFF], list_prob_back, list_sz[gm]) 127 | BMEM2(&list_visit_count[LOFF], list_visit_count_back, list_sz[gm]) 128 | } // gm 129 | 130 | Py_RETURN_NONE; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /py_util/py_util.py: -------------------------------------------------------------------------------- 1 | from _py_util import * 2 | 3 | -------------------------------------------------------------------------------- /py_util/py_util_dyn.py: -------------------------------------------------------------------------------- 1 | from _py_util_dyn import * 2 | 3 | -------------------------------------------------------------------------------- /py_util/register_mv.c: -------------------------------------------------------------------------------- 1 | // register move in tree, initialize node if not already initialized 2 | static PyObject *register_mv(PyObject *self, PyObject *args){ 3 | PyArrayObject *chosen_coord_np; 4 | int moving_player, * chosen_coord; 5 | 6 | if(!PyArg_ParseTuple(args, "iO!", &moving_player, &PyArray_Type, &chosen_coord_np)) return NULL; 7 | 8 | /////////////////////// check inputs 9 | ASSERT(moving_player == 0 || moving_player == 1, "moving player incorrect") 10 | ASSERT(chosen_coord_np != NULL, "absent inputs") 11 | ASSERT(PyArray_TYPE(chosen_coord_np) == NPY_INT32, "data type incorrect") 12 | ASSERT(PyArray_NDIM(chosen_coord_np) == 1, "dims incorrect") 13 | ASSERT(PyArray_STRIDE(chosen_coord_np, 0) == sizeof(chosen_coord[0]), "data not contigious or C-order") 14 | 15 | npy_intp * dims_in = PyArray_DIMS(chosen_coord_np); 16 | 17 | ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect") 18 | 19 | chosen_coord = (int *) PyArray_DATA(chosen_coord_np); 20 | 21 | /////////////////////////////// 22 | 23 | for(int gm = 0; gm < BATCH_SZ; gm++){ 24 | //if(chosen_coord[gm] == -1) continue; 25 | 26 | #ifdef CUDA_DEBUG 27 | if(tree_sz[gm] >= TREE_BUFFER_SZ){ 28 | printf("tree_sz[%i] %i\n", gm, tree_sz[gm]); 29 | DASSERT(0); 30 | } 31 | if(tree_start[gm] < 0 || tree_start[gm] >= tree_sz[gm]){ 32 | printf("tree_sz[%i] %i\n", gm, tree_sz[gm]); 33 | printf("tree_start %i\n", tree_start[gm]); 34 | DASSERT(0); 35 | } 36 | if(list_sz[gm] >= MV_BUFFER_SZ){ 37 | printf("list_sz[%i] %i\n", gm, list_sz[gm]); 38 | DASSERT(0); 39 | } 40 | int t_ind2 = tree_start[gm]; 41 | int TO2 = gm*TREE_BUFFER_SZ + t_ind2; 42 | if(tree_list_start[TO2] < 0 || tree_list_start[TO2] >= list_sz[gm]){ 43 | printf("list_sz[%i] %i\n", gm, list_sz[gm]); 44 | printf("tree_list_start[%i] %i\n", TO2, tree_list_start[TO2]); 45 | DASSERT(0); 46 | } 47 | #endif 48 | 49 | CUR_TREE_INDS 50 | 51 | // find list index for chosen move 52 | char found = 0; 53 | int LOC; 54 | for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){ 55 | LOC = LO + mv_ind; 56 | if(list_valid_mv_inds[LOC] != chosen_coord[gm]) continue; 57 | 58 | found = 1; 59 | break; 60 | } 61 | 62 | #ifdef CUDA_DEBUG 63 | if(found == 0){ 64 | printf("could not find valid move: gm %i chosen_coord %i n_valid_mvs %i\n", gm, chosen_coord[gm], n_valid_mvs); 65 | for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){ 66 | LOC = LO + mv_ind; 67 | printf("valid: %i\n", list_valid_mv_inds[LOC]); 68 | } 69 | for(int gm2 = 0; gm2 < BATCH_SZ; gm2++) 70 | printf("to_coords[%i] %i\n", gm2, chosen_coord[gm2]); 71 | //LOC = LO; 72 | } 73 | #endif 74 | ASSERT(found != 0, "could not find move"); 75 | 76 | // update pointer to tree_start 77 | int t_ind_new; 78 | if(list_valid_tree_inds[LOC] == -1){ 79 | 80 | // create new node, return t_ind_new 81 | list_valid_tree_inds[LOC] = tree_sz[gm]; 82 | 83 | t_ind_new = tree_sz[gm]; 84 | int TO_NEW = gm*TREE_BUFFER_SZ + t_ind_new; 85 | 86 | tree_parent[TO_NEW] = t_ind; 87 | tree_player[TO_NEW] = moving_player == 0; 88 | tree_list_start[TO_NEW] = -1; 89 | tree_list_sz[TO_NEW] = 0; 90 | 91 | tree_sz[gm] ++; 92 | ASSERT(tree_sz[gm] < TREE_BUFFER_SZ, "tree buffer size exceeded"); 93 | }else{ 94 | 95 | // return t_ind_new from list 96 | t_ind_new = list_valid_tree_inds[LOC]; 97 | DASSERT(t_ind_new >= 0 && t_ind_new < TREE_BUFFER_SZ); 98 | 99 | #ifdef CUDA_DEBUG 100 | int TO_NEW = gm*TREE_BUFFER_SZ + t_ind_new; 101 | #endif 102 | 103 | DASSERT(tree_parent[TO_NEW] == t_ind) 104 | DASSERT(tree_player[TO_NEW] == (!moving_player)) 105 | } 106 | tree_start[gm] = t_ind_new; 107 | } // gm 108 | 109 | Py_RETURN_NONE; 110 | } 111 | 112 | -------------------------------------------------------------------------------- /py_util/return_probs_map.c: -------------------------------------------------------------------------------- 1 | // return probs from tree visit counts 2 | static PyObject *return_probs_map(PyObject *self, PyObject *args){ 3 | int N_TURNS; 4 | 5 | if(!PyArg_ParseTuple(args, "i", &N_TURNS)) return NULL; 6 | 7 | ASSERT(N_TURNS > 0, "N_TURNS must be > 0") 8 | 9 | //////// dbg 10 | int max_tree_sz = tree_sz[0]; 11 | int max_list_sz = list_sz[0]; 12 | for(int gm = 1; gm < BATCH_SZ; gm++){ 13 | if(max_tree_sz < tree_sz[gm]) 14 | max_tree_sz = tree_sz[gm]; 15 | if(max_list_sz < list_sz[gm]) 16 | max_list_sz = list_sz[gm]; 17 | } 18 | printf("max tree_sz: %i, list_sz %i\n", max_tree_sz, max_list_sz); 19 | //////// 20 | 21 | ///// output 22 | npy_intp dims[3]; 23 | dims[0] = N_TURNS * N_PLAYERS * BATCH_SZ; 24 | dims[1] = MAP_SZ_X * MAP_SZ_Y; 25 | 26 | PyObject * probs_map_np = PyArray_SimpleNew(2, dims, NPY_FLOAT); 27 | 28 | float * probs_map = (float *) PyArray_DATA((PyArrayObject*) probs_map_np); 29 | 30 | ////////////////////////////////////// 31 | for(int gm = 0; gm < BATCH_SZ; gm++){ 32 | 33 | int TO; 34 | int t_ind = tree_start[gm]; TO_FRM_T_IND 35 | DASSERT(0 == tree_player[TO]) 36 | 37 | unsigned tree_loc = tree_parent[TO]; 38 | 39 | // traverse tree backward, alternating players 40 | for(int turn = N_TURNS-1; turn >= 0; turn--) for(char player = 1; player >= 0; player--){ 41 | float * probs_map_cur = &probs_map[turn*N_PLAYERS*BATCH_SZ*MAP_SZ + player*BATCH_SZ*MAP_SZ + gm*MAP_SZ]; 42 | 43 | // init 44 | MAP_LOOP probs_map_cur[loc] = 0; 45 | int TO, LO; 46 | 47 | // inds 48 | int t_ind = tree_loc; TO_FRM_T_IND 49 | int l_ind = tree_list_start[TO]; LO_FRM_L_IND 50 | int n_valid_mvs = tree_list_sz[TO]; CHK_N_VALID_MVS 51 | 52 | DASSERT(n_valid_mvs >= 1); 53 | DASSERT(player == tree_player[TO]); 54 | 55 | tree_loc = tree_parent[TO]; 56 | 57 | // set map, sum visits 58 | int visit_sum = 0; 59 | for(int mv_ind = 0; mv_ind < n_valid_mvs; mv_ind++){ 60 | int map_loc = list_valid_mv_inds[LO + mv_ind]; 61 | 62 | DASSERT(map_loc >= -1 && map_loc < MAP_SZ); 63 | if(map_loc == -1) continue; 64 | 65 | probs_map_cur[map_loc] = (float)list_visit_count[LO + mv_ind]; 66 | visit_sum += list_visit_count[LO + mv_ind]; 67 | } 68 | 69 | // normalize 70 | for(int mv_ind = 0; (visit_sum != 0) && (mv_ind < n_valid_mvs); mv_ind++){ 71 | int map_loc = list_valid_mv_inds[LO + mv_ind]; 72 | 73 | DASSERT(map_loc >= -1 && map_loc < MAP_SZ); 74 | if(map_loc == -1) continue; 75 | 76 | probs_map_cur[map_loc] /= (float)visit_sum; 77 | } 78 | } // turn / player loops 79 | } // gm 80 | 81 | return probs_map_np; 82 | } 83 | 84 | -------------------------------------------------------------------------------- /py_util/return_tree.c: -------------------------------------------------------------------------------- 1 | /* tree_sz, tree_start, tree_player, tree_parent, tree_list_sz, tree_list_start, \ 2 | list_sz, list_valid_mv_inds, list_valid_tree_inds, list_q_total, list_prob, \ 3 | list_visit_count = tf_op.return_tree() 4 | */ 5 | static PyObject *return_tree(PyObject *self, PyObject *args){ 6 | 7 | ///// output 8 | npy_intp dims[4]; 9 | dims[0] = BATCH_SZ; 10 | dims[1] = TREE_BUFFER_SZ; 11 | 12 | PyObject * tree_sz_np = PyArray_SimpleNew(1, dims, NPY_UINT32); 13 | PyObject * tree_start_np = PyArray_SimpleNew(1, dims, NPY_UINT32); 14 | 15 | PyObject * tree_player_np = PyArray_SimpleNew(2, dims, NPY_INT8); 16 | PyObject * tree_parent_np = PyArray_SimpleNew(2, dims, NPY_INT32); 17 | 18 | PyObject * tree_list_sz_np = PyArray_SimpleNew(2, dims, NPY_INT32); 19 | PyObject * tree_list_start_np = PyArray_SimpleNew(2, dims, NPY_INT32); 20 | 21 | PyObject * list_sz_np = PyArray_SimpleNew(1, dims, NPY_UINT32); 22 | 23 | dims[1] = MV_BUFFER_SZ; 24 | PyObject * list_valid_mv_inds_np = PyArray_SimpleNew(2, dims, NPY_INT16); 25 | PyObject * list_valid_tree_inds_np = PyArray_SimpleNew(2, dims, NPY_INT32); 26 | PyObject * list_q_total_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32); 27 | PyObject * list_prob_np = PyArray_SimpleNew(2, dims, NPY_FLOAT32); 28 | PyObject * list_visit_count_np = PyArray_SimpleNew(2, dims, NPY_UINT32); 29 | 30 | ASSERT(tree_sz_np && tree_start_np && tree_player_np && tree_parent_np && tree_list_sz_np && 31 | tree_list_start_np && list_sz_np && list_valid_mv_inds_np && list_valid_tree_inds_np && 32 | list_q_total_np && list_prob_np && list_visit_count_np, "error creating python outputs"); 33 | 34 | unsigned * tree_sz_ret = (unsigned *) PyArray_DATA((PyArrayObject*) tree_sz_np); 35 | unsigned * tree_start_ret = (unsigned *) PyArray_DATA((PyArrayObject*) tree_start_np); 36 | 37 | char * tree_player_ret = (char *) PyArray_DATA((PyArrayObject*) tree_player_np); 38 | int * tree_parent_ret = (int *) PyArray_DATA((PyArrayObject*) tree_parent_np); 39 | 40 | int * tree_list_sz_ret = (int *) PyArray_DATA((PyArrayObject*) tree_list_sz_np); 41 | int * tree_list_start_ret = (int *) PyArray_DATA((PyArrayObject*) tree_list_start_np); 42 | 43 | unsigned * list_sz_ret = (unsigned *) PyArray_DATA((PyArrayObject*) list_sz_np); 44 | 45 | short * list_valid_mv_inds_ret = (short *) PyArray_DATA((PyArrayObject*) list_valid_mv_inds_np); 46 | int * list_valid_tree_inds_ret = (int *) PyArray_DATA((PyArrayObject*) list_valid_mv_inds_np); 47 | float * list_q_total_ret = (float *) PyArray_DATA((PyArrayObject*) list_q_total_np); 48 | float * list_prob_ret = (float *) PyArray_DATA((PyArrayObject*) list_prob_np); 49 | unsigned * list_visit_count_ret = (unsigned *) PyArray_DATA((PyArrayObject*) list_visit_count_np); 50 | 51 | ////////////////////////////////////// copy 52 | BMEM(tree_sz_ret, tree_sz, BATCH_SZ) 53 | BMEM(tree_start_ret, tree_start, BATCH_SZ) 54 | 55 | BMEM(tree_player_ret, tree_player, B_TREE_SZ) 56 | BMEM(tree_parent_ret, tree_parent, B_TREE_SZ) 57 | 58 | BMEM(tree_list_sz_ret, tree_list_sz, B_TREE_SZ) 59 | BMEM(tree_list_start_ret, tree_list_start, B_TREE_SZ) 60 | 61 | BMEM(list_sz_ret, list_sz, BATCH_SZ) 62 | 63 | BMEM(list_valid_mv_inds_ret, list_valid_mv_inds, B_MV_SZ) 64 | BMEM(list_valid_tree_inds_ret, list_valid_tree_inds, B_MV_SZ) 65 | BMEM(list_q_total_ret, list_q_total, B_MV_SZ) 66 | BMEM(list_prob_ret, list_prob, B_MV_SZ) 67 | BMEM(list_visit_count_ret, list_visit_count, B_MV_SZ) 68 | 69 | /////////// return 70 | PyObject * ret = PyList_New(12); 71 | ASSERT(ret != 0, "err creating output list") 72 | 73 | ASSERT(PyList_SetItem(ret, 0, tree_sz_np) == 0, "failed setting item"); 74 | ASSERT(PyList_SetItem(ret, 1, tree_start_np) == 0, "failed setting item"); 75 | ASSERT(PyList_SetItem(ret, 2, tree_player_np) == 0, "failed setting item"); 76 | ASSERT(PyList_SetItem(ret, 3, tree_parent_np) == 0, "failed setting item"); 77 | ASSERT(PyList_SetItem(ret, 4, tree_list_sz_np) == 0, "failed setting item"); 78 | ASSERT(PyList_SetItem(ret, 5, tree_list_start_np) == 0, "failed setting item"); 79 | ASSERT(PyList_SetItem(ret, 6, list_sz_np) == 0, "failed setting item"); 80 | ASSERT(PyList_SetItem(ret, 7, list_valid_mv_inds_np) == 0, "failed setting item"); 81 | ASSERT(PyList_SetItem(ret, 8, list_valid_tree_inds_np) == 0, "failed setting item"); 82 | ASSERT(PyList_SetItem(ret, 9, list_q_total_np) == 0, "failed setting item"); 83 | ASSERT(PyList_SetItem(ret, 10, list_prob_np) == 0, "failed setting item"); 84 | ASSERT(PyList_SetItem(ret, 11, list_visit_count_np) == 0, "failed setting item"); 85 | 86 | return ret; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /py_util/rotate_reflect_imgs.c: -------------------------------------------------------------------------------- 1 | // inputs: imgs[batch_sz, map_sz_x, map_sz_y, channels] 2 | // randomly rotate/reflect each image 3 | static PyObject *rotate_reflect_imgs(PyObject *self, PyObject *args){ 4 | PyArrayObject *imgs_np, *tree_probs_np; 5 | PyObject *imgs_r_np, *tree_probs_r_np; 6 | float * imgs, *imgs_r, *tree_probs, *tree_probs_r; 7 | 8 | if(!PyArg_ParseTuple(args, "O!O!", &PyArray_Type, &imgs_np, &PyArray_Type, &tree_probs_np)) return NULL; 9 | 10 | /////////////////////// check inputs 11 | ASSERT(imgs_np != NULL, "absent inputs") 12 | ASSERT(PyArray_TYPE(imgs_np) == NPY_FLOAT32 && PyArray_TYPE(tree_probs_np) == NPY_FLOAT32, "data type incorrect") 13 | ASSERT(PyArray_NDIM(imgs_np) == 4 && PyArray_NDIM(tree_probs_np) == 2, "dims must be 4") 14 | ASSERT(PyArray_STRIDE(imgs_np, 3) == sizeof(imgs[0]) && PyArray_STRIDE(tree_probs_np, 1) == sizeof(tree_probs[0]), "data not contigious or C-order") 15 | 16 | npy_intp * dims_in = PyArray_DIMS(imgs_np); 17 | npy_intp * pdims_in = PyArray_DIMS(tree_probs_np); 18 | 19 | ASSERT(dims_in[0] == BATCH_SZ, "batch sz incorrect") 20 | 21 | int map_sz_x = dims_in[1]; 22 | int map_sz_y = dims_in[2]; 23 | int n_chan = dims_in[3]; 24 | 25 | ASSERT(map_sz_x == map_sz_y, "board must be sq") 26 | ASSERT(pdims_in[0] == BATCH_SZ && pdims_in[1] == (map_sz_x*map_sz_y), "tree_probs incorrect") 27 | 28 | imgs_r_np = PyArray_SimpleNew(4, dims_in, NPY_FLOAT); 29 | tree_probs_r_np = PyArray_SimpleNew(2, pdims_in, NPY_FLOAT); 30 | 31 | imgs = (float *) PyArray_DATA(imgs_np); 32 | tree_probs = (float *) PyArray_DATA(tree_probs_np); 33 | 34 | imgs_r = (float *) PyArray_DATA((PyArrayObject*) imgs_r_np); 35 | tree_probs_r = (float *) PyArray_DATA((PyArrayObject*) tree_probs_r_np); 36 | 37 | float * imgs_r_pre = malloc(BATCH_SZ*map_sz_x*map_sz_y*n_chan*sizeof(imgs[0])); 38 | float * tree_probs_r_pre = malloc(BATCH_SZ*map_sz_x*map_sz_y*sizeof(imgs[0])); 39 | 40 | ASSERT(imgs_r_pre && tree_probs_r_pre, "failed allocating"); 41 | 42 | #define MAP_LOOP_SEP for(int x = 0; x < map_sz_x; x++){ for(int y = 0; y < map_sz_y; y++){ 43 | 44 | #define CP(X, Y) MAP_LOOP_SEP\ 45 | memcpy(&imgs_r_pre[gm_off + x*map_sz_y*n_chan + y*n_chan], \ 46 | &imgs[gm_off + (X)*map_sz_y*n_chan + (Y)*n_chan], n_chan*sizeof(imgs[0]));\ 47 | tree_probs_r_pre[pgm_off + x*map_sz_y + y] = \ 48 | tree_probs[pgm_off + (X)*map_sz_y + Y];\ 49 | }} 50 | 51 | #define CP_F(X, Y) MAP_LOOP_SEP\ 52 | memcpy(&imgs_r[gm_off + x*map_sz_y*n_chan + y*n_chan], \ 53 | &imgs_r_pre[gm_off + (X)*map_sz_y*n_chan + (Y)*n_chan], n_chan*sizeof(imgs[0]));\ 54 | tree_probs_r[pgm_off + x*map_sz_y + y] = \ 55 | tree_probs_r_pre[pgm_off + (X)*map_sz_y + Y];\ 56 | }} 57 | for(int gm = 0; gm < BATCH_SZ; gm++){ 58 | int op = rand() % 4; 59 | int trans = rand() % 2; 60 | int gm_off = gm*map_sz_x*map_sz_y*n_chan; 61 | int pgm_off = gm*map_sz_x*map_sz_y; 62 | 63 | ////////////////////////////////// 64 | if(op == 0){ // no transform 65 | memcpy(&imgs_r_pre[gm_off], &imgs[gm_off], map_sz_x*map_sz_y*n_chan*sizeof(imgs[0])); 66 | memcpy(&tree_probs_r_pre[pgm_off], &tree_probs[pgm_off], map_sz_x*map_sz_y*sizeof(imgs[0])); 67 | }else if(op == 1){ // imgs[::-1] 68 | CP(map_sz_x - 1 - x, y) 69 | }else if(op == 2){ // imgs[:,::-1] 70 | CP(x, map_sz_y - 1 - y) 71 | }else if(op == 3){ // imgs[::-1, ::-1] 72 | CP(map_sz_x - 1 - x, map_sz_y - 1 - y) 73 | } 74 | 75 | /////////// transpose 76 | if(trans == 1){ 77 | CP_F(y, x) 78 | }else{ // direct cp 79 | memcpy(&imgs_r[gm_off], &imgs_r_pre[gm_off], map_sz_x*map_sz_y*n_chan*sizeof(imgs[0])); 80 | memcpy(&tree_probs_r[pgm_off], &tree_probs_r_pre[pgm_off], map_sz_x*map_sz_y*sizeof(imgs[0])); 81 | } 82 | } 83 | 84 | PyObject * ret = PyList_New(2); 85 | ASSERT(ret != 0, "err creating output list") 86 | 87 | ASSERT(PyList_SetItem(ret, 0, imgs_r_np) == 0, "failed setting item"); 88 | ASSERT(PyList_SetItem(ret, 1, tree_probs_r_np) == 0, "failed setting item"); 89 | 90 | return ret; 91 | } 92 | 93 | -------------------------------------------------------------------------------- /py_util/session_backup.c: -------------------------------------------------------------------------------- 1 | static PyObject *session_backup(PyObject *self, PyObject *args){ 2 | 3 | BMEM(tree_start2, tree_start, BATCH_SZ) 4 | 5 | Py_RETURN_NONE; 6 | } 7 | 8 | static PyObject *session_restore(PyObject *self, PyObject *args){ 9 | 10 | BMEM(tree_start, tree_start2, BATCH_SZ) 11 | 12 | Py_RETURN_NONE; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | while true; do 2 | python bp_tree.py 3 | done 4 | -------------------------------------------------------------------------------- /vars.cc: -------------------------------------------------------------------------------- 1 | void vars_launcher(int var_idx, void * outputs, char op); 2 | 3 | /// return 4 | #define MAP_COMPUTE(IDX, OP) {tensorflow::TensorShape shape;\ 5 | shape.AddDim(BATCH_SZ);\ 6 | shape.AddDim(MAP_SZ_X);\ 7 | shape.AddDim(MAP_SZ_Y);\ 8 | Tensor* tensor = nullptr;\ 9 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\ 10 | auto outputs = tensor->template flat();\ 11 | vars_launcher(IDX, outputs.data(), RETURN_VARS);} 12 | 13 | #define MAP_COMPUTE_CHAR(IDX, OP) {tensorflow::TensorShape shape;\ 14 | shape.AddDim(BATCH_SZ);\ 15 | shape.AddDim(MAP_SZ_X);\ 16 | shape.AddDim(MAP_SZ_Y);\ 17 | Tensor* tensor = nullptr;\ 18 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\ 19 | auto outputs = tensor->template flat();\ 20 | vars_launcher(IDX, outputs.data(), RETURN_VARS);} 21 | 22 | #define COMPUTE_BATCH_SZ_DT(IDX, DT, OP) {tensorflow::TensorShape shape;\ 23 | shape.AddDim(BATCH_SZ);\ 24 | Tensor* tensor = nullptr;\ 25 | OP_REQUIRES_OK(context, context->allocate_output(0, shape, &tensor));\ 26 | auto outputs = tensor->template flat
();\ 27 | vars_launcher(IDX, outputs.data(), RETURN_VARS);} 28 | 29 | //// set 30 | #define SET_MAP_COMPUTE(IDX, OP) {tensorflow::TensorShape shape;\ 31 | shape.AddDim(BATCH_SZ);\ 32 | shape.AddDim(MAP_SZ_X);\ 33 | shape.AddDim(MAP_SZ_Y);\ 34 | const Tensor& inputs_tensor = context->input(0);\ 35 | auto inputs = inputs_tensor.flat();\ 36 | vars_launcher(IDX, (void*)inputs.data(), SET_VARS);} 37 | 38 | #define SET_MAP_COMPUTE_CHAR(IDX, OP) {tensorflow::TensorShape shape;\ 39 | shape.AddDim(BATCH_SZ);\ 40 | shape.AddDim(MAP_SZ_X);\ 41 | shape.AddDim(MAP_SZ_Y);\ 42 | const Tensor& inputs_tensor = context->input(0);\ 43 | auto inputs = inputs_tensor.flat();\ 44 | vars_launcher(IDX, (void*)inputs.data(), SET_VARS);} 45 | 46 | #define SET_COMPUTE_BATCH_SZ_DT(IDX, DT, OP) {tensorflow::TensorShape shape;\ 47 | shape.AddDim(BATCH_SZ);\ 48 | const Tensor& inputs_tensor = context->input(0);\ 49 | auto inputs = inputs_tensor.flat
();\ 50 | vars_launcher(IDX, (void*)inputs.data(), SET_VARS);} 51 | 52 | #include "vars_class_return.cc" 53 | #include "vars_class_set.cc" 54 | 55 | 56 | -------------------------------------------------------------------------------- /vars_class_return.cc: -------------------------------------------------------------------------------- 1 | // maps 2 | REGISTER_OP("Board").Output("outputs: int8"); 3 | REGISTER_OP("ValidMvMapInternal").Output("outputs: int8"); 4 | 5 | // maps 6 | class Board : public OpKernel { 7 | public: 8 | explicit Board(OpKernelConstruction* context) : OpKernel(context) {} 9 | void Compute(OpKernelContext* context) override { 10 | MAP_COMPUTE_CHAR(BOARD_IDX, RETURN_VARS) 11 | } 12 | }; 13 | 14 | class ValidMvMapInternal : public OpKernel { 15 | public: 16 | explicit ValidMvMapInternal(OpKernelConstruction* context) : OpKernel(context) {} 17 | void Compute(OpKernelContext* context) override { 18 | MAP_COMPUTE_CHAR(VALID_MV_MAP_INTERNAL_IDX, RETURN_VARS) 19 | } 20 | }; 21 | 22 | 23 | // maps 24 | REGISTER_KERNEL_BUILDER(Name("Board").Device(DEVICE_GPU), Board); 25 | REGISTER_KERNEL_BUILDER(Name("ValidMvMapInternal").Device(DEVICE_GPU), Board); 26 | 27 | -------------------------------------------------------------------------------- /vars_class_set.cc: -------------------------------------------------------------------------------- 1 | // maps 2 | REGISTER_OP("SetBoard").Input("inputs: int8"); 3 | REGISTER_OP("SetValidMvMapInternal").Input("inputs: int8"); 4 | 5 | // maps 6 | class SetBoard : public OpKernel { 7 | public: 8 | explicit SetBoard(OpKernelConstruction* context) : OpKernel(context) {} 9 | void Compute(OpKernelContext* context) override { 10 | SET_MAP_COMPUTE_CHAR(BOARD_IDX, SET_VARS) 11 | } 12 | }; 13 | 14 | class SetValidMvMapInternal : public OpKernel { 15 | public: 16 | explicit SetValidMvMapInternal(OpKernelConstruction* context) : OpKernel(context) {} 17 | void Compute(OpKernelContext* context) override { 18 | SET_MAP_COMPUTE_CHAR(VALID_MV_MAP_INTERNAL_IDX, SET_VARS) 19 | } 20 | }; 21 | 22 | // maps 23 | REGISTER_KERNEL_BUILDER(Name("SetBoard").Device(DEVICE_GPU), SetBoard); 24 | REGISTER_KERNEL_BUILDER(Name("SetValidMvMapInternal").Device(DEVICE_GPU), SetValidMvMapInternal); 25 | 26 | --------------------------------------------------------------------------------