├── nn ├── __init__.py ├── td_prediction_lstm_V3.py ├── td_prediction_lstm_V4.py └── td_two_tower_lstm.py ├── .gitignore ├── infos ├── IJCAI18_poster.pdf ├── IceHockey-IJCAI2018-Presentation.pdf └── BibTex.bib ├── images └── DP-lstm-model-structure.png ├── .idea ├── libraries │ └── R_User_Library.xml ├── misc.xml ├── vcs.xml ├── modules.xml ├── DRL-ice-hockey.iml └── workspace.xml ├── configuration.py ├── LICENSE ├── README.md ├── utils.py ├── td_three_prediction_lstm.py └── bak └── player_ranking_match_by_match_td_lstm.py /nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | .idea 3 | models/ 4 | -------------------------------------------------------------------------------- /infos/IJCAI18_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guiliang/DRL-ice-hockey/HEAD/infos/IJCAI18_poster.pdf -------------------------------------------------------------------------------- /images/DP-lstm-model-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guiliang/DRL-ice-hockey/HEAD/images/DP-lstm-model-structure.png -------------------------------------------------------------------------------- /infos/IceHockey-IJCAI2018-Presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guiliang/DRL-ice-hockey/HEAD/infos/IceHockey-IJCAI2018-Presentation.pdf -------------------------------------------------------------------------------- /.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /configuration.py: -------------------------------------------------------------------------------- 1 | MODEL_TYPE = "v4" 2 | MAX_TRACE_LENGTH = 2 3 | FEATURE_NUMBER = 25 4 | BATCH_SIZE = 32 5 | GAMMA = 1 6 | H_SIZE = 512 7 | USE_HIDDEN_STATE = False 8 | model_train_continue = True 9 | SCALE = True 10 | FEATURE_TYPE = 5 11 | ITERATE_NUM = 30 12 | learning_rate = 1e-4 13 | SPORT = "NHL" 14 | save_mother_dir = "/Local-Scratch" -------------------------------------------------------------------------------- /.idea/DRL-ice-hockey.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | -------------------------------------------------------------------------------- /infos/BibTex.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{ijcai2018-478, 2 | title = {Deep Reinforcement Learning in Ice Hockey for Context-Aware Player Evaluation}, 3 | author = {Guiliang Liu and Oliver Schulte}, 4 | booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on 5 | Artificial Intelligence, {IJCAI-18}}, 6 | publisher = {International Joint Conferences on Artificial Intelligence Organization}, 7 | pages = {3442--3448}, 8 | year = {2018}, 9 | month = {7}, 10 | doi = {10.24963/ijcai.2018/478}, 11 | url = {https://doi.org/10.24963/ijcai.2018/478}, 12 | } 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Guiliang Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /nn/td_prediction_lstm_V3.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class td_prediction_lstm_V3: 5 | def __init__(self, FEATURE_NUMBER, H_SIZE, MAX_TRACE_LENGTH, learning_rate, rnn_type='bp_last_step'): 6 | """ 7 | define a shallow dynamic LSTM 8 | """ 9 | with tf.name_scope("LSTM_layer"): 10 | self.rnn_input = tf.placeholder(tf.float32, [None, 10, FEATURE_NUMBER], name="x_1") 11 | self.trace_lengths = tf.placeholder(tf.int32, [None], name="tl") 12 | 13 | self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=H_SIZE, state_is_tuple=True, 14 | initializer=tf.random_uniform_initializer(-0.05, 0.05)) 15 | 16 | self.rnn_output, self.rnn_state = tf.nn.dynamic_rnn( # while loop dynamic learning rnn 17 | inputs=self.rnn_input, cell=self.lstm_cell, sequence_length=self.trace_lengths, dtype=tf.float32, 18 | scope=rnn_type + '_rnn') 19 | 20 | # [batch_size, max_time, cell.output_size] 21 | self.outputs = tf.stack(self.rnn_output) 22 | 23 | # Hack to build the indexing and retrieve the right output. 24 | self.batch_size = tf.shape(self.outputs)[0] 25 | # Start indices for each sample 26 | self.index = tf.range(0, self.batch_size) * MAX_TRACE_LENGTH + (self.trace_lengths - 1) 27 | # Indexing 28 | self.rnn_last = tf.gather(tf.reshape(self.outputs, [-1, H_SIZE]), self.index) 29 | 30 | num_layer_1 = H_SIZE 31 | num_layer_2 = 3 32 | 33 | with tf.name_scope("Dense_Layer_first"): 34 | self.W1 = tf.get_variable('w1_xaiver', [num_layer_1, num_layer_2], 35 | initializer=tf.contrib.layers.xavier_initializer()) 36 | self.b1 = tf.Variable(tf.zeros([num_layer_2]), name="b_1") 37 | self.read_out = tf.matmul(self.rnn_last, self.W1) + self.b1 38 | # self.activation1 = tf.nn.relu(self.y1, name='activation') 39 | 40 | self.y = tf.placeholder("float", [None, num_layer_2]) 41 | 42 | with tf.name_scope("cost"): 43 | self.readout_action = self.read_out 44 | self.cost = tf.reduce_mean(tf.square(self.y - self.readout_action)) 45 | self.diff = tf.reduce_mean(tf.abs(self.y - self.readout_action)) 46 | tf.summary.histogram('cost', self.cost) 47 | 48 | with tf.name_scope("train"): 49 | self.train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost) 50 | -------------------------------------------------------------------------------- /nn/td_prediction_lstm_V4.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class td_prediction_lstm_V4: 5 | def __init__(self, FEATURE_NUMBER, H_SIZE, MAX_TRACE_LENGTH, learning_rate, rnn_type='bp_last_step'): 6 | """ 7 | define a dynamic LSTM 8 | """ 9 | with tf.name_scope("LSTM_layer"): 10 | self.rnn_input = tf.placeholder(tf.float32, [None, 10, FEATURE_NUMBER], name="x_1") 11 | self.trace_lengths = tf.placeholder(tf.int32, [None], name="tl") 12 | 13 | self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=H_SIZE * 2, state_is_tuple=True, 14 | initializer=tf.random_uniform_initializer(-0.05, 0.05)) 15 | 16 | self.rnn_output, self.rnn_state = tf.nn.dynamic_rnn( # while loop dynamic learning rnn 17 | inputs=self.rnn_input, cell=self.lstm_cell, sequence_length=self.trace_lengths, dtype=tf.float32, 18 | scope=rnn_type + '_rnn') 19 | 20 | # [batch_size, max_time, cell.output_size] 21 | self.outputs = tf.stack(self.rnn_output) 22 | 23 | # Hack to build the indexing and retrieve the right output. 24 | self.batch_size = tf.shape(self.outputs)[0] 25 | # Start indices for each sample 26 | self.index = tf.range(0, self.batch_size) * MAX_TRACE_LENGTH + (self.trace_lengths - 1) 27 | # Indexing 28 | self.rnn_last = tf.gather(tf.reshape(self.outputs, [-1, H_SIZE * 2]), self.index) 29 | 30 | num_layer_1 = H_SIZE * 2 31 | num_layer_2 = 1000 32 | num_layer_3 = 3 33 | 34 | with tf.name_scope("Dense_Layer_first"): 35 | self.W1 = tf.get_variable('w1_xaiver', [num_layer_1, num_layer_2], 36 | initializer=tf.contrib.layers.xavier_initializer()) 37 | self.b1 = tf.Variable(tf.zeros([num_layer_2]), name="b_1") 38 | self.y1 = tf.matmul(self.rnn_last, self.W1) + self.b1 39 | self.activation1 = tf.nn.relu(self.y1, name='activation') 40 | 41 | with tf.name_scope("Dense_Layer_second"): 42 | self.W2 = tf.get_variable('w2_xaiver', [num_layer_2, num_layer_3], 43 | initializer=tf.contrib.layers.xavier_initializer()) 44 | self.b2 = tf.Variable(tf.zeros([num_layer_3]), name="b_2") 45 | self.read_out = tf.matmul(self.activation1, self.W2) + self.b2 46 | # self.activation1 = tf.nn.relu(self.y1, name='activation') 47 | 48 | self.y = tf.placeholder("float", [None, num_layer_3]) 49 | 50 | with tf.name_scope("cost"): 51 | self.readout_action = self.read_out 52 | self.cost = tf.reduce_mean(tf.square(self.y - self.readout_action)) 53 | self.diff = tf.reduce_mean(tf.abs(self.y - self.readout_action)) 54 | tf.summary.histogram('cost', self.cost) 55 | 56 | with tf.name_scope("train"): 57 | self.train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost) 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DRL-ice-hockey 2 | 3 | The repository contains the codes about the network structure of paper "[Deep Reinforcement Learning in Ice Hockey 4 | for Context-Aware Player Evaluation](https://www.ijcai.org/proceedings/2018/0478.pdf)". 5 | 6 | ## Network Structure: 7 | 8 | | name | nodes | activation function | 9 | | ------------- |:-------------:| -----:| 10 | | LSTM Layer | 512 | N/A | 11 | | Fully Connected Layer 1| 1024 | Relu | 12 | | Fully Connected Layer 2| 1000 | Relu | 13 | | Fully Connected Layer 3| 3 | N/A | 14 | 15 | ## Image of network structure: 16 | 17 | drawing 18 | 19 | 20 | 21 | ## Training method 22 | We are using the on-policy prediction method [Sarsa](https://en.wikipedia.org/wiki/State%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action) (State–Action–Reward–State–Action). 23 | It's a Temporal Difference learning method, and estimate the player performance by Q(s,a), where state s is a series of game contexts and action a is the motion of player. 24 | 25 | ## Running: 26 | Use ```python td_three_prediction_lstm.py``` to train the neural network, which produce the Q values. Goal-Impact-Metric is the different between consecutive Q values. 27 | The origin works uses a private play-by-play dataset from [Sportlogiq](http://sportlogiq.com/en/), which we are not allowed to publish. 28 | 29 | ### About the input: 30 | If you want to run the network, please prepare your won sequential dataset, please organize the data according to network input in the format of Numpy. As it's shown in ```td_three_prediction_lstm.py```, the neural network requires three input files: 31 | 32 | * reward 33 | * state_input (conrtains both state features and one hot represetation of action) 34 | * state_trace_length 35 | 36 | To be specific, if you want to directly run this python RNN scripy, you need to prepare the input in this way. In each game file, there are three .mat files representing reward, state_input and state_trace_length. The name of files should follow the rules below: 37 | 38 | - **GameDirectory_xxx** 39 | - *dynamic_rnn_reward_xxx.mat* 40 | - A two dimensional array named 'dynamic_rnn_reward' should be in the .mat file 41 | - Row of the array: _R_, Column of the array: 10 42 | - *dynamic_rnn_input_xxx.mat* 43 | - A three dimensional array named 'dynamic_feature_input' should be in the .mat file 44 | - First dimension: _R_, Second dimension: 10, Third dimension: _feature number_ 45 | - *hybrid_trace_length_xxx.mat* 46 | - A two dimensional array named 'hybrid_trace_length' should be in the .mat file 47 | - Row of the array: 1, Column of the array: Unknown 48 | - The array gives us information about how to split the length of different plays, so the sum(_array_element_) should be _R_ 49 | 50 | in which *xxx* is a random string. 51 | 52 | Each input file must has the same number of rows _R_ (corresponding to number of events in a game). In our paper, we have trace length equals to 10, so reward is an _R_\*10 array, state_input is an _R_\*10\*_feature_number_ array and state_trace_length is an one demensional vector that tells the length of plays in a game. 53 | 54 | #### Examples 55 | ``` 56 | # R=3, feature number=1 57 | >>> reward['dynamic_rnn_reward'] 58 | array([[0, 0, 0, 1, 0, 0, 1, 0, 0, 0], 59 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 60 | [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]) 61 | >>> state_input['dynamic_feature_input'] 62 | array([[[-4.51194112e-02],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00], 63 | [ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00]], 64 | [[-4.51194112e-02],[ 5.43495586e-04],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00], 65 | [ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00]], 66 | [[-4.51194112e-02],[ 5.43495586e-04],[-3.46831161e-01],[ 0.00000000e+00],[ 0.00000000e+00], 67 | [ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00],[ 0.00000000e+00]]]) 68 | >>> trace_length['hybrid_trace_length'] 69 | array([[1, 2]]) 70 | ``` 71 | 72 | The data must be ***standardized or normalized*** before inputing to the neural network, we are using the ***sklearn.preprocessing.scale*** 73 | 74 | ## Package required: 75 | Python 2.7 76 | 1. Numpy 77 | 2. Tensorflow (1.0.0?) 78 | 3. Scipy 79 | 4. Matplotlib 80 | 5. scikit-learn 81 | (We may need a requirement.txt) 82 | 83 | ## Command: 84 | (For Oliver's students with access to the net drive, the following steps should work on lab's machine) 85 | 86 | Training: 87 | 1. modify the `save_mother_dir` in `configuration.py` as your save directory, e.g. `/cs/oschulte/Bill/` or just `/local_scratch/` 88 | 2. `cd` into your `save_mother_dir`, make two directories `./models/hybrid_sl_saved_NN/` and `./models/hybrid_sl_log_NN/` 89 | 3. modify the global `DATA_STORE` variable in `td_three_prediction_lstm.py` as `/cs/oschulte/Galen/Hockey-data-entire/Hybrid-RNN-Hockey-Training-All-feature5-scale-neg_reward_v_correct__length-dynamic/` 90 | 4. check the package and python version as mentioned above 91 | 5. `python td_three_prediction_lstm.py` 92 | 93 | Evaluation: 94 | 1. suppose you have finish the step 1-5 in the training process, to evalute the network only, just disable the AdamOptimizer. Modify line 188-192 in `td_three_prediction_lstm.py` as below 95 | ``` 96 | [diff, read_out, cost_out, summary_train] = sess.run( 97 | [model.diff, model.read_out, model.cost, merge], 98 | feed_dict={model.y: y_batch, 99 | model.trace_lengths: trace_t0_batch, 100 | model.rnn_input: s_t0_batch}) 101 | ``` 102 | 2. `python td_three_prediction_lstm.py` 103 | 3. we have a pretrained network in `/cs/oschulte/Bill/hybrid_sl_saved_NN/Scale-three-cut_together_saved_networks_feature5_batch32_iterate30_lr0.0001_v4_v_correct__MaxTL2/` only for LSTM_V4. If you want to directly use this network to evaluate, finish the step 1-4 in the training process, and modify the global `SAVED_NETWORK` variable in `td_three_prediction_lstm.py` as the previous network directory, then you can run the code using step 2. 104 | 105 | 106 | ## LICENSE: 107 | MIT LICENSE 108 | 109 | we are still updating this repository. 110 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from configuration import FEATURE_NUMBER 3 | 4 | 5 | def handle_trace_length(state_trace_length): 6 | """ 7 | transform format of trace length 8 | :return: 9 | """ 10 | trace_length_record = [] 11 | for length in state_trace_length: 12 | for sub_length in range(0, int(length)): 13 | trace_length_record.append(sub_length + 1) 14 | 15 | return trace_length_record 16 | 17 | 18 | def get_together_training_batch(s_t0, state_input, reward, train_number, train_len, state_trace_length, BATCH_SIZE): 19 | """ 20 | we generate the training batch, your can write your own method. 21 | in our dataset, 1 means home score, -1 means away score, we transfer it to one-hot representation: 22 | reward = [If_home_score, If_away_score, If_NeitherTeam_score] 23 | :return: 24 | batch_return is [s,s',r,s_play_length,s'_play_length, if_game_end, if_score_in_the_last_time_step] 25 | train_number is the current where we stop training 26 | s_t0 is the s for the next batch 27 | """ 28 | batch_return = [] 29 | current_batch_length = 0 30 | while current_batch_length < BATCH_SIZE: 31 | s_t1 = state_input[train_number] 32 | if len(s_t1) < 10 or len(s_t0) < 10: 33 | raise ValueError("wrong length of s") 34 | # train_number += 1 35 | # continue 36 | s_length_t1 = state_trace_length[train_number] 37 | s_length_t0 = state_trace_length[train_number - 1] 38 | if s_length_t1 > 10: # if trace length is too long 39 | s_length_t1 = 10 40 | if s_length_t0 > 10: # if trace length is too long 41 | s_length_t0 = 10 42 | try: 43 | s_reward_t1 = reward[train_number] 44 | s_reward_t0 = reward[train_number - 1] 45 | except IndexError: 46 | raise IndexError("s_reward wrong with index") 47 | train_number += 1 48 | if train_number + 1 == train_len: 49 | trace_length_index_t1 = s_length_t1 - 1 50 | trace_length_index_t0 = s_length_t0 - 1 51 | r_t0 = np.asarray([s_reward_t0[trace_length_index_t0]]) 52 | r_t1 = np.asarray([s_reward_t1[trace_length_index_t1]]) 53 | if r_t0 == [float(0)]: 54 | r_t0_combine = [float(0), float(0), float(0)] 55 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 0)) 56 | 57 | if r_t1 == float(0): 58 | r_t1_combine = [float(0), float(0), float(1)] 59 | elif r_t1 == float(-1): 60 | r_t1_combine = [float(0), float(1), float(1)] 61 | elif r_t1 == float(1): 62 | r_t1_combine = [float(1), float(0), float(1)] 63 | else: 64 | raise ValueError("incorrect r_t1") 65 | batch_return.append((s_t1, s_t1, r_t1_combine, s_length_t1, s_length_t1, 1, 0)) 66 | 67 | elif r_t0 == [float(-1)]: 68 | r_t0_combine = [float(0), float(1), float(0)] 69 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 0)) 70 | 71 | if r_t1 == float(0): 72 | r_t1_combine = [float(0), float(0), float(1)] 73 | elif r_t1 == float(-1): 74 | r_t1_combine = [float(0), float(1), float(1)] 75 | elif r_t1 == float(1): 76 | r_t1_combine = [float(1), float(0), float(1)] 77 | else: 78 | raise ValueError("incorrect r_t1") 79 | batch_return.append((s_t1, s_t1, r_t1_combine, s_length_t1, s_length_t1, 1, 0)) 80 | 81 | elif r_t0 == [float(1)]: 82 | r_t0_combine = [float(1), float(0), float(0)] 83 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 0)) 84 | 85 | if r_t1 == float(0): 86 | r_t1_combine = [float(0), float(0), float(1)] 87 | elif r_t1 == float(-1): 88 | r_t1_combine = [float(0), float(1), float(1)] 89 | elif r_t1 == float(1): 90 | r_t1_combine = [float(1), float(0), float(1)] 91 | else: 92 | raise ValueError("incorrect r_t1") 93 | batch_return.append((s_t1, s_t1, r_t1_combine, s_length_t1, s_length_t1, 1, 0)) 94 | else: 95 | raise ValueError("r_t0 wrong value") 96 | 97 | s_t0 = s_t1 98 | break 99 | 100 | trace_length_index_t0 = s_length_t0 - 1 101 | r_t0 = np.asarray([s_reward_t0[trace_length_index_t0]]) 102 | if r_t0 != [float(0)]: 103 | # print r_t0 104 | if r_t0 == [float(-1)]: 105 | r_t0_combine = [float(0), float(1), float(0)] 106 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 1)) 107 | elif r_t0 == [float(1)]: 108 | r_t0_combine = [float(1), float(0), float(0)] 109 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 1)) 110 | else: 111 | raise ValueError("r_t0 wrong value") 112 | s_t0 = s_t1 113 | break 114 | r_t0_combine = [float(0), float(0), float(0)] 115 | batch_return.append((s_t0, s_t1, r_t0_combine, s_length_t0, s_length_t1, 0, 0)) 116 | current_batch_length += 1 117 | s_t0 = s_t1 118 | 119 | return batch_return, train_number, s_t0 120 | 121 | 122 | def padding_hybrid_feature_input(hybrid_feature_input): 123 | """ 124 | padding the empty state features with 0 (states won't be traced by Dynamic LSTM) 125 | :param hybrid_feature_input: the lists of features state to be padding 126 | :return: 127 | """ 128 | current_list_length = len(hybrid_feature_input) 129 | padding_list_length = 10 - current_list_length 130 | for i in range(0, padding_list_length): 131 | hybrid_feature_input.append(np.asarray([float(0)] * FEATURE_NUMBER)) 132 | return np.asarray(hybrid_feature_input) 133 | 134 | 135 | def padding_hybrid_reward(hybrid_reward): 136 | """ 137 | padding the empty state rewards with 0 (rewards won't be traced by Dynamic LSTM) 138 | :param hybrid_reward: the lists of rewards to be padding 139 | :return: 140 | """ 141 | current_list_length = len(hybrid_reward) 142 | padding_list_length = 10 - current_list_length 143 | for i in range(0, padding_list_length): 144 | hybrid_reward.append(0) 145 | return np.asarray(hybrid_reward) 146 | 147 | 148 | def compromise_state_trace_length(state_trace_length, state_input, reward, MAX_TRACE_LENGTH): 149 | """ 150 | padding the features and rewards with 0, in order to get a proper format for LSTM 151 | :param state_trace_length: list of trace length 152 | :param state_input: list of state 153 | :param reward: list of rewards 154 | """ 155 | state_trace_length_output = [] 156 | for index in range(0, len(state_trace_length)): 157 | tl = state_trace_length[index] 158 | if tl >= 10: 159 | tl = 10 160 | if tl > MAX_TRACE_LENGTH: 161 | state_input_change_list = [] 162 | state_input_org = state_input[index] 163 | reward_change_list = [] 164 | reward_org = reward[index] 165 | for i in range(0, MAX_TRACE_LENGTH): 166 | state_input_change_list.append(state_input_org[tl - MAX_TRACE_LENGTH + i]) 167 | # temp = reward_org[tl - MAX_TRACE_LENGTH + i] 168 | # if temp != 0: 169 | # print 'find miss reward' 170 | reward_change_list.append(reward_org[tl - MAX_TRACE_LENGTH + i]) 171 | 172 | state_input_update = padding_hybrid_feature_input(state_input_change_list) 173 | state_input[index] = state_input_update 174 | reward_update = padding_hybrid_reward(reward_change_list) 175 | reward[index] = reward_update 176 | 177 | tl = MAX_TRACE_LENGTH 178 | state_trace_length_output.append(tl) 179 | return state_trace_length_output, state_input, reward 180 | -------------------------------------------------------------------------------- /nn/td_two_tower_lstm.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class td_prediction_tt_embed: 5 | def __init__(self, feature_number, h_size, max_trace_length, learning_rate, 6 | output_layer_size=3, 7 | lstm_layer_num=2, 8 | dense_layer_num=2, 9 | model_name="tt_lstm", 10 | rnn_type="bp_last_step"): 11 | """ 12 | init the model 13 | """ 14 | self.feature_number = feature_number 15 | self.h_size = h_size 16 | self.max_trace_length = max_trace_length 17 | self.learning_rate = learning_rate 18 | self.rnn_type = rnn_type 19 | self.model_name = model_name 20 | self.lstm_layer_num = lstm_layer_num 21 | self.dense_layer_num = dense_layer_num 22 | self.output_layer_size = output_layer_size 23 | 24 | self.rnn_input_ph = None 25 | self.trace_lengths_ph = None 26 | self.home_away_indicator_ph = None 27 | self.y_ph = None 28 | 29 | self.lstm_cell_home_all = [] 30 | self.lstm_cell_away_all = [] 31 | self.dense_layer_weights = [] 32 | self.dense_layer_bias = [] 33 | self.embed_away_b = None 34 | self.embed_away_w = None 35 | self.embed_home_w = None 36 | self.embed_home_b = None 37 | self.readout = None 38 | 39 | def build(self): 40 | """ 41 | define a shallow dynamic LSTM 42 | """ 43 | with tf.name_scope(self.model_name): 44 | with tf.name_scope("tower-for-home"): 45 | with tf.name_scope("Home-LSTM-layer"): 46 | for i in range(self.lstm_layer_num): 47 | self.lstm_cell_home_all.append( 48 | tf.nn.rnn_cell.LSTMCell(num_units=self.h_size, state_is_tuple=True, 49 | initializer=tf.random_normal_initializer())) 50 | with tf.name_scope("Home_embed_layer"): 51 | self.embed_home_w = tf.get_variable('w_embed_home', [self.h_size, self.h_size], 52 | initializer=tf.contrib.layers.xavier_initializer()) 53 | self.embed_home_b = tf.Variable(tf.zeros([self.h_size]), name="b_embed_home") 54 | 55 | with tf.name_scope("tower-for-away"): 56 | with tf.name_scope("Away-LSTM-layer"): 57 | for i in range(self.lstm_layer_num): 58 | self.lstm_cell_away_all.append( 59 | tf.nn.rnn_cell.LSTMCell(num_units=self.h_size, state_is_tuple=True, 60 | initializer=tf.random_normal_initializer())) 61 | with tf.name_scope("Away-embed-layer"): 62 | self.embed_away_w = tf.get_variable('w_embed_away', [self.h_size, self.h_size], 63 | initializer=tf.contrib.layers.xavier_initializer()) 64 | self.embed_away_b = tf.Variable(tf.zeros([self.h_size]), name="b_embed_away") 65 | 66 | with tf.name_scope("Dense_Layer"): 67 | for i in range(self.dense_layer_num): 68 | w_input_size = self.h_size 69 | w_output_size = self.h_size if i < self.dense_layer_num - 1 else self.output_layer_size 70 | self.dense_layer_weights.append(tf.get_variable('w{0}_xaiver'.format(str(i)), 71 | [w_input_size, w_output_size], 72 | initializer=tf.contrib.layers.xavier_initializer())) 73 | self.dense_layer_bias.append(tf.Variable(tf.zeros([w_output_size]), name="b_{0}".format(str(i)))) 74 | 75 | def call(self): 76 | """ 77 | build the network 78 | :return: 79 | """ 80 | with tf.name_scope(self.model_name): 81 | with tf.name_scope("tower-for-home"): 82 | with tf.name_scope("Home_LSTM_layer"): 83 | rnn_output = None 84 | for i in range(self.lstm_layer_num): 85 | rnn_input = self.rnn_input_ph if i == 0 else rnn_output 86 | rnn_output, rnn_state = tf.nn.dynamic_rnn( # while loop dynamic learning rnn 87 | inputs=rnn_input, cell=self.lstm_cell_home_all[i], 88 | sequence_length=self.trace_lengths_ph, dtype=tf.float32, 89 | scope=self.rnn_type + '_home_rnn_{0}'.format(str(i))) 90 | outputs = tf.stack(rnn_output) 91 | # Hack to build the indexing and retrieve the right output. 92 | self.batch_size = tf.shape(outputs)[0] 93 | # Start indices for each sample 94 | self.index = tf.range(0, self.batch_size) * self.max_trace_length + (self.trace_lengths_ph - 1) 95 | # Indexing 96 | rnn_last = tf.gather(tf.reshape(outputs, [-1, self.h_size]), self.index) 97 | 98 | with tf.name_scope("Home_embed_layer"): 99 | self.home_embed_layer = tf.matmul(rnn_last, self.embed_home_w) + self.embed_home_b 100 | 101 | with tf.name_scope("tower-for-away"): 102 | with tf.name_scope("Away_LSTM_layer"): 103 | rnn_output = None 104 | for i in range(self.lstm_layer_num): 105 | rnn_input = self.rnn_input_ph if i == 0 else rnn_output 106 | rnn_output, rnn_state = tf.nn.dynamic_rnn( # while loop dynamic learning rnn 107 | inputs=rnn_input, cell=self.lstm_cell_away_all[i], 108 | sequence_length=self.trace_lengths_ph, dtype=tf.float32, 109 | scope=self.rnn_type + '_away_rnn_{0}'.format(str(i))) 110 | outputs = tf.stack(rnn_output) 111 | # Hack to build the indexing and retrieve the right output. 112 | self.batch_size = tf.shape(outputs)[0] 113 | # Start indices for each sample 114 | self.index = tf.range(0, self.batch_size) * self.max_trace_length + (self.trace_lengths_ph - 1) 115 | # Indexing 116 | rnn_last = tf.gather(tf.reshape(outputs, [-1, self.h_size]), self.index) 117 | 118 | with tf.name_scope("Away_embed_layer"): 119 | self.away_embed_layer = tf.matmul(rnn_last, self.embed_away_w) + self.embed_away_b 120 | 121 | embed_layer = tf.where(condition=self.home_away_indicator_ph, 122 | x=self.home_embed_layer, 123 | y=self.away_embed_layer) 124 | 125 | with tf.name_scope('dense-layer'): 126 | dense_output = None 127 | for i in range(self.dense_layer_num): 128 | dense_input = embed_layer if i == 0 else dense_output 129 | dense_output = tf.matmul(dense_input, self.dense_layer_weights[i]) + self.dense_layer_bias[i] 130 | dense_output = tf.nn.relu(dense_output, name='activation_{0}'.format(str(i))) 131 | 132 | self.readout = dense_output 133 | with tf.name_scope("cost"): 134 | self.cost = tf.reduce_mean(tf.square(self.y_ph - self.readout)) 135 | self.diff = tf.reduce_mean(tf.abs(self.y_ph - self.readout)) 136 | tf.summary.histogram('cost', self.cost) 137 | 138 | with tf.name_scope("train"): 139 | self.train_step = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost) 140 | 141 | def initialize_ph(self): 142 | """ 143 | initialize the place holder 144 | :return: 145 | """ 146 | rnn_input_ph = tf.placeholder(dtype=tf.float32, shape=[None, self.max_trace_length, 147 | self.feature_number], name="rnn-input-ph") 148 | trace_lengths_ph = tf.placeholder(dtype=tf.int32, shape=[None], name="trace-length") 149 | home_away_indicator_ph = tf.cast(tf.placeholder(dtype=tf.int32, shape=[None], name="indicator-ph"), tf.bool) 150 | y_ph = tf.placeholder(dtype=tf.float32, shape=[None, self.output_layer_size]) 151 | 152 | self.rnn_input_ph = rnn_input_ph 153 | self.trace_lengths_ph = trace_lengths_ph 154 | self.home_away_indicator_ph = home_away_indicator_ph 155 | self.y_ph = y_ph 156 | 157 | 158 | if __name__ == '__main__': 159 | """testing the model locally""" 160 | test_model = td_prediction_tt_embed(feature_number=25, h_size=32, max_trace_length=10, learning_rate=0.0001) 161 | test_model.initialize_ph() 162 | test_model.build() 163 | test_model.call() 164 | print "testing model" 165 | -------------------------------------------------------------------------------- /td_three_prediction_lstm.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import tensorflow as tf 3 | import os 4 | import scipy.io as sio 5 | import numpy as np 6 | from nn.td_prediction_lstm_V3 import td_prediction_lstm_V3 7 | from nn.td_prediction_lstm_V4 import td_prediction_lstm_V4 8 | from utils import handle_trace_length, get_together_training_batch, compromise_state_trace_length 9 | from configuration import MODEL_TYPE, MAX_TRACE_LENGTH, FEATURE_NUMBER, BATCH_SIZE, GAMMA, H_SIZE, \ 10 | model_train_continue, FEATURE_TYPE, ITERATE_NUM, learning_rate, SPORT, save_mother_dir 11 | 12 | LOG_DIR = save_mother_dir + "/models/hybrid_sl_log_NN/Scale-three-cut_together_log_train_feature" + str( 13 | FEATURE_TYPE) + "_batch" + str( 14 | BATCH_SIZE) + "_iterate" + str( 15 | ITERATE_NUM) + "_lr" + str( 16 | learning_rate) + "_" + str(MODEL_TYPE) + "_MaxTL" + str(MAX_TRACE_LENGTH) 17 | SAVED_NETWORK = save_mother_dir + "/models/hybrid_sl_saved_NN/Scale-three-cut_together_saved_networks_feature" + str( 18 | FEATURE_TYPE) + "_batch" + str( 19 | BATCH_SIZE) + "_iterate" + str( 20 | ITERATE_NUM) + "_lr" + str( 21 | learning_rate) + "_" + str(MODEL_TYPE) + "_MaxTL" + str(MAX_TRACE_LENGTH) 22 | DATA_STORE = "your-data-dir" 23 | 24 | DIR_GAMES_ALL = os.listdir(DATA_STORE) 25 | number_of_total_game = len(DIR_GAMES_ALL) 26 | 27 | 28 | def write_game_average_csv(data_record): 29 | """ 30 | write the cost of training 31 | :param data_record: the recorded cost dict 32 | """ 33 | try: 34 | if os.path.exists(LOG_DIR + '/avg_cost_record.csv'): 35 | with open(LOG_DIR + '/avg_cost_record.csv', 'a') as csvfile: 36 | fieldnames = (data_record[0]).keys() 37 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 38 | for record in data_record: 39 | writer.writerow(record) 40 | else: 41 | with open(LOG_DIR + '/avg_cost_record.csv', 'w') as csvfile: 42 | fieldnames = (data_record[0]).keys() 43 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 44 | writer.writeheader() 45 | for record in data_record: 46 | writer.writerow(record) 47 | except: 48 | if os.path.exists(LOG_DIR + '/avg_cost_record2.csv'): 49 | with open(LOG_DIR + '/avg_cost_record.csv', 'a') as csvfile: 50 | fieldnames = (data_record[0]).keys() 51 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 52 | for record in data_record: 53 | writer.writerow(record) 54 | else: 55 | with open(LOG_DIR + '/avg_cost_record2.csv', 'w') as csvfile: 56 | fieldnames = (data_record[0]).keys() 57 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 58 | writer.writeheader() 59 | for record in data_record: 60 | writer.writerow(record) 61 | 62 | 63 | def train_network(sess, model): 64 | """ 65 | training thr neural network game by game 66 | :param sess: session of tf 67 | :param model: nn model 68 | :return: 69 | """ 70 | game_number = 0 71 | global_counter = 0 72 | converge_flag = False 73 | 74 | # loading network 75 | saver = tf.train.Saver() 76 | merge = tf.summary.merge_all() 77 | train_writer = tf.summary.FileWriter(LOG_DIR, sess.graph) 78 | sess.run(tf.global_variables_initializer()) 79 | if model_train_continue: 80 | checkpoint = tf.train.get_checkpoint_state(SAVED_NETWORK) 81 | if checkpoint and checkpoint.model_checkpoint_path: 82 | check_point_game_number = int((checkpoint.model_checkpoint_path.split("-"))[-1]) 83 | game_number_checkpoint = check_point_game_number % number_of_total_game 84 | game_number = check_point_game_number 85 | game_starting_point = 0 86 | saver.restore(sess, checkpoint.model_checkpoint_path) 87 | print("Successfully loaded:", checkpoint.model_checkpoint_path) 88 | else: 89 | print("Could not find old network weights") 90 | 91 | game_diff_record_all = [] 92 | 93 | while True: 94 | game_diff_record_dict = {} 95 | iteration_now = game_number / number_of_total_game + 1 96 | game_diff_record_dict.update({"Iteration": iteration_now}) 97 | if converge_flag: 98 | break 99 | elif game_number >= number_of_total_game * ITERATE_NUM: 100 | break 101 | else: 102 | converge_flag = True 103 | for dir_game in DIR_GAMES_ALL: 104 | 105 | if checkpoint and checkpoint.model_checkpoint_path: 106 | if model_train_continue: # go the check point data 107 | game_starting_point += 1 108 | if game_number_checkpoint + 1 > game_starting_point: 109 | continue 110 | 111 | v_diff_record = [] 112 | game_number += 1 113 | game_cost_record = [] 114 | game_files = os.listdir(DATA_STORE + "/" + dir_game) 115 | for filename in game_files: 116 | if "dynamic_rnn_reward" in filename: 117 | reward_name = filename 118 | elif "dynamic_rnn_input" in filename: 119 | state_input_name = filename 120 | elif "trace" in filename: 121 | state_trace_length_name = filename 122 | 123 | reward = sio.loadmat(DATA_STORE + "/" + dir_game + "/" + reward_name) 124 | try: 125 | reward = reward['dynamic_rnn_reward'] 126 | except: 127 | print("\n" + dir_game) 128 | raise ValueError("reward wrong") 129 | state_input = sio.loadmat(DATA_STORE + "/" + dir_game + "/" + state_input_name) 130 | state_input = (state_input['dynamic_feature_input']) 131 | state_trace_length = sio.loadmat(DATA_STORE + "/" + dir_game + "/" + state_trace_length_name) 132 | state_trace_length = (state_trace_length['hybrid_trace_length'])[0] 133 | state_trace_length = handle_trace_length(state_trace_length) 134 | state_trace_length, state_input, reward = compromise_state_trace_length(state_trace_length, state_input, 135 | reward, MAX_TRACE_LENGTH) 136 | 137 | print("\n load file" + str(dir_game) + " success") 138 | reward_count = sum(reward) 139 | print("reward number" + str(reward_count)) 140 | if len(state_input) != len(reward) or len(state_trace_length) != len(reward): 141 | raise Exception('state length does not equal to reward length') 142 | 143 | train_len = len(state_input) 144 | train_number = 0 145 | s_t0 = state_input[train_number] 146 | train_number += 1 147 | 148 | while True: 149 | # try: 150 | batch_return, train_number, s_tl = get_together_training_batch(s_t0, 151 | state_input, 152 | reward, 153 | train_number, 154 | train_len, 155 | state_trace_length, 156 | BATCH_SIZE) 157 | 158 | # get the batch variables 159 | s_t0_batch = [d[0] for d in batch_return] 160 | s_t1_batch = [d[1] for d in batch_return] 161 | r_t_batch = [d[2] for d in batch_return] 162 | trace_t0_batch = [d[3] for d in batch_return] 163 | trace_t1_batch = [d[4] for d in batch_return] 164 | y_batch = [] 165 | 166 | [outputs_t1, readout_t1_batch] = sess.run([model.outputs, model.read_out], 167 | feed_dict={model.trace_lengths: trace_t1_batch, 168 | model.rnn_input: s_t1_batch}) 169 | 170 | for i in range(0, len(batch_return)): 171 | terminal = batch_return[i][5] 172 | cut = batch_return[i][6] 173 | # if terminal, only equals reward 174 | if terminal or cut: 175 | y_home = float((r_t_batch[i])[0]) 176 | y_away = float((r_t_batch[i])[1]) 177 | y_end = float((r_t_batch[i])[2]) 178 | y_batch.append([y_home, y_away, y_end]) 179 | break 180 | else: 181 | y_home = float((r_t_batch[i])[0]) + GAMMA * ((readout_t1_batch[i]).tolist())[0] 182 | y_away = float((r_t_batch[i])[1]) + GAMMA * ((readout_t1_batch[i]).tolist())[1] 183 | y_end = float((r_t_batch[i])[2]) + GAMMA * ((readout_t1_batch[i]).tolist())[2] 184 | y_batch.append([y_home, y_away, y_end]) 185 | 186 | # perform gradient step 187 | y_batch = np.asarray(y_batch) 188 | [diff, read_out, cost_out, summary_train, _] = sess.run( 189 | [model.diff, model.read_out, model.cost, merge, model.train_step], 190 | feed_dict={model.y: y_batch, 191 | model.trace_lengths: trace_t0_batch, 192 | model.rnn_input: s_t0_batch}) 193 | 194 | v_diff_record.append(diff) 195 | 196 | if cost_out > 0.0001: 197 | converge_flag = False 198 | global_counter += 1 199 | game_cost_record.append(cost_out) 200 | train_writer.add_summary(summary_train, global_step=global_counter) 201 | s_t0 = s_tl 202 | 203 | # print info 204 | if terminal or ((train_number - 1) / BATCH_SIZE) % 5 == 1: 205 | print("TIMESTEP:", train_number, "Game:", game_number) 206 | home_avg = sum(read_out[:, 0]) / len(read_out[:, 0]) 207 | away_avg = sum(read_out[:, 1]) / len(read_out[:, 1]) 208 | end_avg = sum(read_out[:, 2]) / len(read_out[:, 2]) 209 | print("home average:{0}, away average:{1}, end average:{2}".format(str(home_avg), str(away_avg), 210 | str(end_avg))) 211 | print("cost of the network is" + str(cost_out)) 212 | 213 | if terminal: 214 | # save progress after a game 215 | saver.save(sess, SAVED_NETWORK + '/' + SPORT + '-game-', global_step=game_number) 216 | v_diff_record_average = sum(v_diff_record) / len(v_diff_record) 217 | game_diff_record_dict.update({dir_game: v_diff_record_average}) 218 | break 219 | 220 | # break 221 | cost_per_game_average = sum(game_cost_record) / len(game_cost_record) 222 | write_game_average_csv([{"iteration": str(game_number / number_of_total_game + 1), "game": game_number, 223 | "cost_per_game_average": cost_per_game_average}]) 224 | 225 | game_diff_record_all.append(game_diff_record_dict) 226 | 227 | 228 | def train_start(): 229 | if not os.path.isdir(LOG_DIR): 230 | os.mkdir(LOG_DIR) 231 | if not os.path.isdir(SAVED_NETWORK): 232 | os.mkdir(SAVED_NETWORK) 233 | 234 | sess = tf.InteractiveSession() 235 | if MODEL_TYPE == "v3": 236 | nn = td_prediction_lstm_V3(FEATURE_NUMBER, H_SIZE, MAX_TRACE_LENGTH, learning_rate) 237 | elif MODEL_TYPE == "v4": 238 | nn = td_prediction_lstm_V4(FEATURE_NUMBER, H_SIZE, MAX_TRACE_LENGTH, learning_rate) 239 | else: 240 | raise ValueError("MODEL_TYPE error") 241 | train_network(sess, nn) 242 | 243 | 244 | if __name__ == '__main__': 245 | train_start() 246 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 110 | 111 | 112 | 114 | 115 | 129 | 130 | 131 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 |