├── .gitignore ├── LICENSE ├── README.md ├── actor.py ├── algos └── ppo.py ├── data └── images │ ├── system.PNG │ ├── system2.PNG │ └── trained_result.png ├── encoders ├── encoder_basic.py └── encoder_highpass.py ├── evaluator.py ├── kaggle_simulations └── agent │ ├── main.py │ └── model_133997184.tar ├── learner.py ├── models ├── conv1d.py ├── conv1d_larger.py ├── simple_attention.py ├── team_fc.py └── team_pooling.py ├── requirements.txt ├── rewarders ├── rewarder_basic.py └── rewarder_highpass.py ├── train.py └── view_match.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.swp 3 | *.pyc 4 | *.pkl 5 | 6 | logs/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 seungeunrho 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Research Football Competition - liveinparis team 2 | 3 | * The exact codes used by the team "liveinparis" at [the kaggle football competition](https://www.kaggle.com/c/google-football) 4 | 5 | * Implementations of self-play RL from scratch with distributed actors 6 | 7 | * Final version of agents ranked [6th/1141](https://www.kaggle.com/c/google-football/leaderboard) (gold prize) 8 | 9 | * You can find all the training details at [here](https://www.kaggle.com/c/google-football/discussion/201376) 10 | 11 | 12 | ## Dependencies 13 | 1. [google-research football](https://github.com/google-research/football) 14 | 2. PyTorch 15 | 3. tensorboardX 16 | 4. kaggle_environments 17 | 18 | ## Usage 19 | ```bash 20 | python3 train.py 21 | # You can find args and hyper-parameters at the "arg_dict" in train.py. 22 | ``` 23 | 24 | ## training curves (vs rule base AI) 25 | ![](data/images/trained_result.png) 26 | (x-axis : # of episodes) 27 | 1. Orange curve - vs. easy level AI 28 | 2. Blue - vs. medium level AI 29 | 30 | ## learning system 31 | 32 | 33 | Actor proceeds simulation and send rollouts(transition tuples of horizon length 30) to the central learner. Learner updates the agent with provided rollouts. Since we chose on-policy update algorithm, we used a trick to ensure perfect on-policyness(behavior policy and learning policy are equal). Actor periodically stops simulation process when the learner is updating the policy. Actor resumes simulation when it receives the newest model from the learner after training. 34 | We used 1 actor per 1 cpu core. Our final version of agent is trained with 30 cpu cores and 1 gpu for 370 hours (cpu: AMD Ryzen Threadripper 2950X, gpu : RTX 2080). This is equivalent to 450,000 episodes, and 133M times of mini batch updates(single mini batch composed of 32 rollouts, each rollout composed of 30 state transitions). 35 | 36 | 37 | -------------------------------------------------------------------------------- /actor.py: -------------------------------------------------------------------------------- 1 | import gfootball.env as football_env 2 | import time, pprint, importlib, random, os 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | from torch.distributions import Categorical 8 | import torch.multiprocessing as mp 9 | from os import listdir 10 | from os.path import isfile, join 11 | import numpy as np 12 | 13 | from datetime import datetime, timedelta 14 | 15 | 16 | def state_to_tensor(state_dict, h_in): 17 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0) 18 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0) 19 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0) 20 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0) 21 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0) 22 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0) 23 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0) 24 | 25 | state_dict_tensor = { 26 | "player" : player_state, 27 | "ball" : ball_state, 28 | "left_team" : left_team_state, 29 | "left_closest" : left_closest_state, 30 | "right_team" : right_team_state, 31 | "right_closest" : right_closest_state, 32 | "avail" : avail, 33 | "hidden" : h_in 34 | } 35 | return state_dict_tensor 36 | 37 | 38 | def get_action(a_prob, m_prob): 39 | 40 | a = Categorical(a_prob).sample().item() 41 | m, need_m = 0, 0 42 | prob_selected_a = a_prob[0][0][a].item() 43 | prob_selected_m = 0 44 | if a==0: 45 | real_action = a 46 | prob = prob_selected_a 47 | elif a==1: 48 | m = Categorical(m_prob).sample().item() 49 | need_m = 1 50 | real_action = m + 1 51 | prob_selected_m = m_prob[0][0][m].item() 52 | prob = prob_selected_a* prob_selected_m 53 | else: 54 | real_action = a + 7 55 | prob = prob_selected_a 56 | 57 | assert prob != 0, 'prob 0 ERROR!!!! a : {}, m:{} {}, {}'.format(a,m,prob_selected_a,prob_selected_m) 58 | 59 | return real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m 60 | 61 | def actor(actor_num, center_model, data_queue, signal_queue, summary_queue, arg_dict): 62 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 63 | print("Actor process {} started".format(actor_num)) 64 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"]) 65 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"]) 66 | imported_model = importlib.import_module("models." + arg_dict["model"]) 67 | 68 | fe = fe_module.FeatureEncoder() 69 | model = imported_model.Model(arg_dict) 70 | model.load_state_dict(center_model.state_dict()) 71 | 72 | env = football_env.create_environment(env_name=arg_dict["env"], representation="raw", stacked=False, logdir='/tmp/football', \ 73 | write_goal_dumps=False, write_full_episode_dumps=False, render=False) 74 | n_epi = 0 75 | rollout = [] 76 | while True: # episode loop 77 | env.reset() 78 | done = False 79 | steps, score, tot_reward, win = 0, 0, 0, 0 80 | n_epi += 1 81 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float), 82 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float)) 83 | 84 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0 85 | obs = env.observation() 86 | 87 | while not done: # step loop 88 | init_t = time.time() 89 | 90 | is_stopped = False 91 | while signal_queue.qsize() > 0: 92 | time.sleep(0.02) 93 | is_stopped = True 94 | if is_stopped: 95 | model.load_state_dict(center_model.state_dict()) 96 | wait_t += time.time() - init_t 97 | 98 | h_in = h_out 99 | state_dict = fe.encode(obs[0]) 100 | state_dict_tensor = state_to_tensor(state_dict, h_in) 101 | 102 | t1 = time.time() 103 | with torch.no_grad(): 104 | a_prob, m_prob, _, h_out = model(state_dict_tensor) 105 | forward_t += time.time()-t1 106 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob) 107 | 108 | prev_obs = obs 109 | obs, rew, done, info = env.step(real_action) 110 | fin_r = rewarder.calc_reward(rew, prev_obs[0], obs[0]) 111 | state_prime_dict = fe.encode(obs[0]) 112 | 113 | (h1_in, h2_in) = h_in 114 | (h1_out, h2_out) = h_out 115 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy()) 116 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy()) 117 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m) 118 | rollout.append(transition) 119 | if len(rollout) == arg_dict["rollout_len"]: 120 | data_queue.put(rollout) 121 | rollout = [] 122 | model.load_state_dict(center_model.state_dict()) 123 | 124 | steps += 1 125 | score += rew 126 | tot_reward += fin_r 127 | 128 | if arg_dict['print_mode']: 129 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward) 130 | loop_t += time.time()-init_t 131 | 132 | if done: 133 | if score > 0: 134 | win = 1 135 | print("score",score,"total reward",tot_reward) 136 | summary_data = (win, score, tot_reward, steps, 0, loop_t/steps, forward_t/steps, wait_t/steps) 137 | summary_queue.put(summary_data) 138 | 139 | def select_opponent(arg_dict): 140 | onlyfiles_lst = [f for f in listdir(arg_dict["log_dir"]) if isfile(join(arg_dict["log_dir"], f))] 141 | model_num_lst = [] 142 | for file_name in onlyfiles_lst: 143 | if file_name[:6] == "model_": 144 | model_num = file_name[6:] 145 | model_num = model_num[:-4] 146 | model_num_lst.append(int(model_num)) 147 | model_num_lst.sort() 148 | 149 | coin = random.random() 150 | if coin arg_dict["latest_n_model"]: 152 | opp_model_num = random.randint(len(model_num_lst)-arg_dict["latest_n_model"],len(model_num_lst)-1) 153 | else: 154 | opp_model_num = len(model_num_lst)-1 155 | else: 156 | opp_model_num = random.randint(0,len(model_num_lst)-1) 157 | 158 | model_name = "/model_"+str(model_num_lst[opp_model_num])+".tar" 159 | opp_model_path = arg_dict["log_dir"] + model_name 160 | return opp_model_num, opp_model_path 161 | 162 | 163 | def actor_self(actor_num, center_model, data_queue, signal_queue, summary_queue, arg_dict): 164 | print("Actor process {} started".format(actor_num)) 165 | cpu_device = torch.device('cpu') 166 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"]) 167 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"]) 168 | imported_model = importlib.import_module("models." + arg_dict["model"]) 169 | 170 | fe = fe_module.FeatureEncoder() 171 | model = imported_model.Model(arg_dict) 172 | model.load_state_dict(center_model.state_dict()) 173 | opp_model = imported_model.Model(arg_dict) 174 | 175 | env = football_env.create_environment(env_name=arg_dict["env"], number_of_right_players_agent_controls=1, representation="raw", \ 176 | stacked=False, logdir='/tmp/football', write_goal_dumps=False, write_full_episode_dumps=False, \ 177 | render=False) 178 | 179 | n_epi = 0 180 | rollout = [] 181 | while True: # episode loop 182 | opp_model_num, opp_model_path = select_opponent(arg_dict) 183 | checkpoint = torch.load(opp_model_path, map_location=cpu_device) 184 | opp_model.load_state_dict(checkpoint['model_state_dict']) 185 | print("Current Opponent model Num:{}, Path:{} successfully loaded".format(opp_model_num, opp_model_path)) 186 | del checkpoint 187 | 188 | env.reset() 189 | done = False 190 | steps, score, tot_reward, win = 0, 0, 0, 0 191 | n_epi += 1 192 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float), 193 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float)) 194 | opp_h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float), 195 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float)) 196 | 197 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0 198 | [obs, opp_obs] = env.observation() 199 | 200 | while not done: # step loop 201 | init_t = time.time() 202 | is_stopped = False 203 | while signal_queue.qsize() > 0: 204 | time.sleep(0.02) 205 | is_stopped = True 206 | if is_stopped: 207 | model.load_state_dict(center_model.state_dict()) 208 | wait_t += time.time() - init_t 209 | 210 | h_in = h_out 211 | opp_h_in = opp_h_out 212 | state_dict = fe.encode(obs) 213 | state_dict_tensor = state_to_tensor(state_dict, h_in) 214 | opp_state_dict = fe.encode(opp_obs) 215 | opp_state_dict_tensor = state_to_tensor(opp_state_dict, opp_h_in) 216 | 217 | t1 = time.time() 218 | with torch.no_grad(): 219 | a_prob, m_prob, _, h_out = model(state_dict_tensor) 220 | opp_a_prob, opp_m_prob, _, opp_h_out = opp_model(opp_state_dict_tensor) 221 | forward_t += time.time()-t1 222 | 223 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob) 224 | opp_real_action, _, _, _, _, _, _ = get_action(opp_a_prob, opp_m_prob) 225 | 226 | prev_obs = obs 227 | [obs, opp_obs], [rew, _], done, info = env.step([real_action, opp_real_action]) 228 | fin_r = rewarder.calc_reward(rew, prev_obs, obs) 229 | state_prime_dict = fe.encode(obs) 230 | 231 | (h1_in, h2_in) = h_in 232 | (h1_out, h2_out) = h_out 233 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy()) 234 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy()) 235 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m) 236 | rollout.append(transition) 237 | if len(rollout) == arg_dict["rollout_len"]: 238 | data_queue.put(rollout) 239 | rollout = [] 240 | model.load_state_dict(center_model.state_dict()) 241 | 242 | steps += 1 243 | score += rew 244 | tot_reward += fin_r 245 | 246 | if arg_dict['print_mode']: 247 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward) 248 | 249 | loop_t += time.time()-init_t 250 | 251 | if done: 252 | if score > 0: 253 | win = 1 254 | print("score {}, total reward {:.2f}, opp num:{}, opp:{} ".format(score,tot_reward,opp_model_num, opp_model_path)) 255 | summary_data = (win, score, tot_reward, steps, str(opp_model_num), loop_t/steps, forward_t/steps, wait_t/steps) 256 | summary_queue.put(summary_data) 257 | 258 | -------------------------------------------------------------------------------- /algos/ppo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.distributions import Categorical 6 | import torch.multiprocessing as mp 7 | import numpy as np 8 | 9 | 10 | class Algo(): 11 | def __init__(self, arg_dict, device=None): 12 | self.gamma = arg_dict["gamma"] 13 | self.K_epoch = arg_dict["k_epoch"] 14 | self.lmbda = arg_dict["lmbda"] 15 | self.eps_clip = arg_dict["eps_clip"] 16 | self.entropy_coef = arg_dict["entropy_coef"] 17 | self.grad_clip = arg_dict["grad_clip"] 18 | 19 | def train(self, model, data): 20 | tot_loss_lst = [] 21 | pi_loss_lst = [] 22 | entropy_lst = [] 23 | move_entropy_lst = [] 24 | v_loss_lst = [] 25 | 26 | # to calculate fixed advantages before update 27 | data_with_adv = [] 28 | for mini_batch in data: 29 | s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch 30 | with torch.no_grad(): 31 | pi, pi_move, v, _ = model(s) 32 | pi_prime, pi_m_prime, v_prime, _ = model(s_prime) 33 | 34 | td_target = r + self.gamma * v_prime * done_mask 35 | delta = td_target - v # [horizon * batch_size * 1] 36 | delta = delta.detach().cpu().numpy() 37 | 38 | advantage_lst = [] 39 | advantage = np.array([0]) 40 | for delta_t in delta[::-1]: 41 | advantage = self.gamma * self.lmbda * advantage + delta_t 42 | advantage_lst.append(advantage) 43 | advantage_lst.reverse() 44 | advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device) 45 | 46 | data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage)) 47 | 48 | for i in range(self.K_epoch): 49 | for mini_batch in data_with_adv: 50 | s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch 51 | pi, pi_move, v, _ = model(s) 52 | pi_prime, pi_m_prime, v_prime, _ = model(s_prime) 53 | 54 | pi_a = pi.gather(2,a) 55 | pi_m = pi_move.gather(2,m) 56 | pi_am = pi_a*(1-need_move + need_move*pi_m) 57 | ratio = torch.exp(torch.log(pi_am) - torch.log(prob)) # a/b == exp(log(a)-log(b)) 58 | 59 | surr1 = ratio * advantage 60 | surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage 61 | entropy = -torch.log(pi_am) 62 | move_entropy = -need_move*torch.log(pi_m) 63 | 64 | surr_loss = -torch.min(surr1, surr2) 65 | v_loss = F.smooth_l1_loss(v, td_target.detach()) 66 | entropy_loss = -1*self.entropy_coef*entropy 67 | loss = surr_loss + v_loss + entropy_loss.mean() 68 | loss = loss.mean() 69 | 70 | model.optimizer.zero_grad() 71 | loss.backward() 72 | nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip) 73 | model.optimizer.step() 74 | 75 | tot_loss_lst.append(loss.item()) 76 | pi_loss_lst.append(surr_loss.mean().item()) 77 | v_loss_lst.append(v_loss.item()) 78 | entropy_lst.append(entropy.mean().item()) 79 | n_need_move = torch.sum(need_move).item() 80 | if n_need_move == 0: 81 | move_entropy_lst.append(0) 82 | else: 83 | move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item()) 84 | return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst) 85 | -------------------------------------------------------------------------------- /data/images/system.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/system.PNG -------------------------------------------------------------------------------- /data/images/system2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/system2.PNG -------------------------------------------------------------------------------- /data/images/trained_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/trained_result.png -------------------------------------------------------------------------------- /encoders/encoder_basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class FeatureEncoder: 4 | def __init__(self): 5 | self.active = -1 6 | self.player_pos_x, self.player_pos_y = 0, 0 7 | 8 | def get_feature_dims(self): 9 | dims = { 10 | 'player':29, 11 | 'ball':18, 12 | 'left_team':7, 13 | 'left_team_closest':7, 14 | 'right_team':7, 15 | 'right_team_closest':7, 16 | } 17 | return dims 18 | 19 | def encode(self, obs): 20 | player_num = obs['active'] 21 | 22 | player_pos_x, player_pos_y = obs['left_team'][player_num] 23 | player_direction = np.array(obs['left_team_direction'][player_num]) 24 | player_speed = np.linalg.norm(player_direction) 25 | player_role = obs['left_team_roles'][player_num] 26 | player_role_onehot = self._encode_role_onehot(player_role) 27 | player_tired = obs['left_team_tired_factor'][player_num] 28 | is_dribbling = obs['sticky_actions'][9] 29 | is_sprinting = obs['sticky_actions'][8] 30 | 31 | ball_x, ball_y, ball_z = obs['ball'] 32 | ball_x_relative = ball_x - player_pos_x 33 | ball_y_relative = ball_y - player_pos_y 34 | ball_x_speed, ball_y_speed, _ = obs['ball_direction'] 35 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative]) 36 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed]) 37 | ball_owned = 0.0 38 | if obs['ball_owned_team'] == -1: 39 | ball_owned = 0.0 40 | else: 41 | ball_owned = 1.0 42 | ball_owned_by_us = 0.0 43 | if obs['ball_owned_team'] == 0: 44 | ball_owned_by_us = 1.0 45 | elif obs['ball_owned_team'] == 1: 46 | ball_owned_by_us = 0.0 47 | else: 48 | ball_owned_by_us = 0.0 49 | 50 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y) 51 | 52 | if ball_distance > 0.03: 53 | ball_far = 1.0 54 | else: 55 | ball_far = 0.0 56 | 57 | avail = self._get_avail(obs, ball_distance) 58 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100], 59 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting])) 60 | 61 | 62 | ball_state = np.concatenate((np.array(obs['ball']), 63 | np.array(ball_which_zone), 64 | np.array([ball_x_relative, ball_y_relative]), 65 | np.array(obs['ball_direction'])*20, 66 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us]))) 67 | 68 | 69 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0) 70 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0) 71 | left_team_relative = obs_left_team 72 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True) 73 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True) 74 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1) 75 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \ 76 | left_team_distance*2, left_team_tired), axis=1) 77 | left_closest_idx = np.argmin(left_team_distance) 78 | left_closest_state = left_team_state[left_closest_idx] 79 | 80 | 81 | obs_right_team = np.array(obs['right_team']) 82 | obs_right_team_direction = np.array(obs['right_team_direction']) 83 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True) 84 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True) 85 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1) 86 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \ 87 | right_team_distance*2, right_team_tired), axis=1) 88 | right_closest_idx = np.argmin(right_team_distance) 89 | right_closest_state = right_team_state[right_closest_idx] 90 | 91 | state_dict = {"player": player_state, 92 | "ball": ball_state, 93 | "left_team" : left_team_state, 94 | "left_closest" : left_closest_state, 95 | "right_team" : right_team_state, 96 | "right_closest" : right_closest_state, 97 | "avail" : avail} 98 | 99 | return state_dict 100 | 101 | def _get_avail(self, obs, ball_distance): 102 | avail = [1,1,1,1,1,1,1,1,1,1,1,1] 103 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \ 104 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 105 | 106 | if obs['ball_owned_team'] == 1: # opponents owning ball 107 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 108 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # Ground ball and far from me 109 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 110 | else: # my team owning ball 111 | avail[SLIDE] = 0 112 | 113 | # Dealing with sticky actions 114 | sticky_actions = obs['sticky_actions'] 115 | if sticky_actions[8] == 0: # sprinting 116 | avail[RELEASE_SPRINT] = 0 117 | 118 | if sticky_actions[9] == 1: # dribbling 119 | avail[SLIDE] = 0 120 | else: 121 | avail[RELEASE_DRIBBLE] = 0 122 | 123 | if np.sum(sticky_actions[:8]) == 0: 124 | avail[RELEASE_MOVE] = 0 125 | 126 | 127 | # if too far, no shot 128 | ball_x, ball_y, _ = obs['ball'] 129 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y: 130 | avail[SHOT] = 0 131 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27): 132 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0 133 | 134 | 135 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick 136 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 137 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 138 | return np.array(avail) 139 | 140 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick 141 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 142 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 143 | return np.array(avail) 144 | 145 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick 146 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 147 | avail[SHOT] = 1 148 | return np.array(avail) 149 | 150 | return np.array(avail) 151 | 152 | def _encode_ball_which_zone(self, ball_x, ball_y): 153 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0 154 | PENALTY_Y, END_Y = 0.27, 0.42 155 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 156 | return [1.0,0,0,0,0,0] 157 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 158 | return [0,1.0,0,0,0,0] 159 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 160 | return [0,0,1.0,0,0,0] 161 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 162 | return [0,0,0,1.0,0,0] 163 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y): 164 | return [0,0,0,0,1.0,0] 165 | else: 166 | return [0,0,0,0,0,1.0] 167 | 168 | 169 | def _encode_role_onehot(self, role_num): 170 | result = [0,0,0,0,0,0,0,0,0,0] 171 | result[role_num] = 1.0 172 | return np.array(result) -------------------------------------------------------------------------------- /encoders/encoder_highpass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class FeatureEncoder: 4 | def __init__(self): 5 | self.active = -1 6 | self.player_pos_x, self.player_pos_y = 0, 0 7 | 8 | def get_feature_dims(self): 9 | dims = { 10 | 'player':29, 11 | 'ball':18, 12 | 'left_team':7, 13 | 'left_team_closest':7, 14 | 'right_team':7, 15 | 'right_team_closest':7, 16 | } 17 | return dims 18 | 19 | def encode(self, obs): 20 | player_num = obs['active'] 21 | 22 | player_pos_x, player_pos_y = obs['left_team'][player_num] 23 | player_direction = np.array(obs['left_team_direction'][player_num]) 24 | player_speed = np.linalg.norm(player_direction) 25 | player_role = obs['left_team_roles'][player_num] 26 | player_role_onehot = self._encode_role_onehot(player_role) 27 | player_tired = obs['left_team_tired_factor'][player_num] 28 | is_dribbling = obs['sticky_actions'][9] 29 | is_sprinting = obs['sticky_actions'][8] 30 | 31 | ball_x, ball_y, ball_z = obs['ball'] 32 | ball_x_relative = ball_x - player_pos_x 33 | ball_y_relative = ball_y - player_pos_y 34 | ball_x_speed, ball_y_speed, _ = obs['ball_direction'] 35 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative]) 36 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed]) 37 | ball_owned = 0.0 38 | if obs['ball_owned_team'] == -1: 39 | ball_owned = 0.0 40 | else: 41 | ball_owned = 1.0 42 | ball_owned_by_us = 0.0 43 | if obs['ball_owned_team'] == 0: 44 | ball_owned_by_us = 1.0 45 | elif obs['ball_owned_team'] == 1: 46 | ball_owned_by_us = 0.0 47 | else: 48 | ball_owned_by_us = 0.0 49 | 50 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y) 51 | 52 | if ball_distance > 0.03: 53 | ball_far = 1.0 54 | else: 55 | ball_far = 0.0 56 | 57 | avail = self._get_avail(obs, ball_distance) 58 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100], 59 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting])) 60 | 61 | 62 | ball_state = np.concatenate((np.array(obs['ball']), 63 | np.array(ball_which_zone), 64 | np.array([ball_x_relative, ball_y_relative]), 65 | np.array(obs['ball_direction'])*20, 66 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us]))) 67 | 68 | 69 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0) 70 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0) 71 | left_team_relative = obs_left_team 72 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True) 73 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True) 74 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1) 75 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \ 76 | left_team_distance*2, left_team_tired), axis=1) 77 | left_closest_idx = np.argmin(left_team_distance) 78 | left_closest_state = left_team_state[left_closest_idx] 79 | 80 | 81 | obs_right_team = np.array(obs['right_team']) 82 | obs_right_team_direction = np.array(obs['right_team_direction']) 83 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True) 84 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True) 85 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1) 86 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \ 87 | right_team_distance*2, right_team_tired), axis=1) 88 | right_closest_idx = np.argmin(right_team_distance) 89 | right_closest_state = right_team_state[right_closest_idx] 90 | 91 | state_dict = {"player": player_state, 92 | "ball": ball_state, 93 | "left_team" : left_team_state, 94 | "left_closest" : left_closest_state, 95 | "right_team" : right_team_state, 96 | "right_closest" : right_closest_state, 97 | "avail" : avail} 98 | 99 | return state_dict 100 | 101 | def _get_avail(self, obs, ball_distance): 102 | avail = [1,1,1,1,1,1,1,1,1,1,1,1] 103 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \ 104 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 105 | 106 | ball_x, ball_y, _ = obs['ball'] 107 | if obs['ball_owned_team'] == 1: # opponents owning ball 108 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 109 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # Ground ball and far from me 110 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 111 | else: # my team owning ball 112 | avail[SLIDE] = 0 113 | if ball_x > 0.85 and (ball_y < -0.34 or ball_y > 0.34): # when the ball is near the opponent corner 114 | avail[LONG_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0 115 | 116 | # Dealing with sticky actions 117 | sticky_actions = obs['sticky_actions'] 118 | if sticky_actions[8] == 0: # sprinting 119 | avail[RELEASE_SPRINT] = 0 120 | 121 | if sticky_actions[9] == 1: # dribbling 122 | avail[SLIDE] = 0 123 | else: 124 | avail[RELEASE_DRIBBLE] = 0 125 | 126 | if np.sum(sticky_actions[:8]) == 0: 127 | avail[RELEASE_MOVE] = 0 128 | 129 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y: # if too far, no shot 130 | avail[SHOT] = 0 131 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27): # In the penalty area, no pass 132 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0 133 | 134 | 135 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick 136 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 137 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 138 | return np.array(avail) 139 | 140 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick 141 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 142 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 143 | return np.array(avail) 144 | 145 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick 146 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 147 | avail[SHOT] = 1 148 | return np.array(avail) 149 | 150 | return np.array(avail) 151 | 152 | def _encode_ball_which_zone(self, ball_x, ball_y): 153 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0 154 | PENALTY_Y, END_Y = 0.27, 0.42 155 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 156 | return [1.0,0,0,0,0,0] 157 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 158 | return [0,1.0,0,0,0,0] 159 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 160 | return [0,0,1.0,0,0,0] 161 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 162 | return [0,0,0,1.0,0,0] 163 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y): 164 | return [0,0,0,0,1.0,0] 165 | else: 166 | return [0,0,0,0,0,1.0] 167 | 168 | 169 | def _encode_role_onehot(self, role_num): 170 | result = [0,0,0,0,0,0,0,0,0,0] 171 | result[role_num] = 1.0 172 | return np.array(result) -------------------------------------------------------------------------------- /evaluator.py: -------------------------------------------------------------------------------- 1 | import gfootball.env as football_env 2 | import time, pprint, importlib, random 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | import torch.multiprocessing as mp 10 | from os import listdir 11 | from os.path import isfile, join 12 | from datetime import datetime, timedelta 13 | 14 | def state_to_tensor(state_dict, h_in): 15 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0) 16 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0) 17 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0) 18 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0) 19 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0) 20 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0) 21 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0) 22 | 23 | state_dict_tensor = { 24 | "player" : player_state, 25 | "ball" : ball_state, 26 | "left_team" : left_team_state, 27 | "left_closest" : left_closest_state, 28 | "right_team" : right_team_state, 29 | "right_closest" : right_closest_state, 30 | "avail" : avail, 31 | "hidden" : h_in 32 | } 33 | return state_dict_tensor 34 | 35 | def get_action(a_prob, m_prob): 36 | a = Categorical(a_prob).sample().item() 37 | m, need_m = 0, 0 38 | prob_selected_a = a_prob[0][0][a].item() 39 | prob_selected_m = 0 40 | if a==0: 41 | real_action = a 42 | prob = prob_selected_a 43 | elif a==1: 44 | m = Categorical(m_prob).sample().item() 45 | need_m = 1 46 | real_action = m + 1 47 | prob_selected_m = m_prob[0][0][m].item() 48 | prob = prob_selected_a* prob_selected_m 49 | else: 50 | real_action = a + 7 51 | prob = prob_selected_a 52 | 53 | assert prob != 0, 'prob 0 ERROR!!!! a : {}, m:{} {}, {}'.format(a,m,prob_selected_a,prob_selected_m) 54 | 55 | return real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m 56 | 57 | def evaluator(center_model, signal_queue, summary_queue, arg_dict): 58 | print("Evaluator process started") 59 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"]) 60 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"]) 61 | imported_model = importlib.import_module("models." + arg_dict["model"]) 62 | 63 | fe = fe_module.FeatureEncoder() 64 | model = center_model 65 | 66 | env = football_env.create_environment(env_name=arg_dict["env_evaluation"], representation="raw", stacked=False, logdir='/tmp/football', \ 67 | write_goal_dumps=False, write_full_episode_dumps=False, render=False) 68 | n_epi = 0 69 | while True: # episode loop 70 | env.reset() 71 | done = False 72 | steps, score, tot_reward, win = 0, 0, 0, 0 73 | n_epi += 1 74 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float), 75 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float)) 76 | 77 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0 78 | obs = env.observation() 79 | 80 | while not done: # step loop 81 | init_t = time.time() 82 | is_stopped = False 83 | while signal_queue.qsize() > 0: 84 | time.sleep(0.02) 85 | is_stopped = True 86 | if is_stopped: 87 | #model.load_state_dict(center_model.state_dict()) 88 | pass 89 | wait_t += time.time() - init_t 90 | 91 | h_in = h_out 92 | state_dict = fe.encode(obs[0]) 93 | state_dict_tensor = state_to_tensor(state_dict, h_in) 94 | 95 | t1 = time.time() 96 | with torch.no_grad(): 97 | a_prob, m_prob, _, h_out = model(state_dict_tensor) 98 | forward_t += time.time()-t1 99 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob) 100 | 101 | prev_obs = obs 102 | obs, rew, done, info = env.step(real_action) 103 | fin_r = rewarder.calc_reward(rew, prev_obs[0], obs[0]) 104 | state_prime_dict = fe.encode(obs[0]) 105 | 106 | (h1_in, h2_in) = h_in 107 | (h1_out, h2_out) = h_out 108 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy()) 109 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy()) 110 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m) 111 | 112 | steps += 1 113 | score += rew 114 | tot_reward += fin_r 115 | 116 | if arg_dict['print_mode']: 117 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward) 118 | 119 | loop_t += time.time()-init_t 120 | 121 | if done: 122 | if score > 0: 123 | win = 1 124 | print("score",score,"total reward",tot_reward) 125 | summary_data = (win, score, tot_reward, steps, arg_dict['env_evaluation'], loop_t/steps, forward_t/steps, wait_t/steps) 126 | summary_queue.put(summary_data) 127 | 128 | -------------------------------------------------------------------------------- /kaggle_simulations/agent/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.distributions import Categorical 6 | import numpy as np 7 | import time, os 8 | 9 | class PPO(nn.Module): 10 | def __init__(self, arg_dict, device=None): 11 | super(PPO, self).__init__() 12 | self.device=None 13 | if device: 14 | self.device = device 15 | 16 | self.arg_dict = arg_dict 17 | 18 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64) 19 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64) 20 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48) 21 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48) 22 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48) 23 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48) 24 | 25 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1) 26 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1) 27 | self.fc_left2 = nn.Linear(36*10,96) 28 | self.fc_right2 = nn.Linear(36*11,96) 29 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"]) 30 | 31 | self.norm_player = nn.LayerNorm(64) 32 | self.norm_ball = nn.LayerNorm(64) 33 | self.norm_left = nn.LayerNorm(48) 34 | self.norm_left2 = nn.LayerNorm(96) 35 | self.norm_left_closest = nn.LayerNorm(48) 36 | self.norm_right = nn.LayerNorm(48) 37 | self.norm_right2 = nn.LayerNorm(96) 38 | self.norm_right_closest = nn.LayerNorm(48) 39 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 40 | 41 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 42 | 43 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164) 44 | self.fc_pi_a2 = nn.Linear(164, 12) 45 | self.norm_pi_a1 = nn.LayerNorm(164) 46 | 47 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164) 48 | self.fc_pi_m2 = nn.Linear(164, 8) 49 | self.norm_pi_m1 = nn.LayerNorm(164) 50 | 51 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164) 52 | self.norm_v1 = nn.LayerNorm(164) 53 | self.fc_v2 = nn.Linear(164, 1, bias=False) 54 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 55 | 56 | self.gamma = arg_dict["gamma"] 57 | self.K_epoch = arg_dict["k_epoch"] 58 | self.lmbda = arg_dict["lmbda"] 59 | self.eps_clip = 0.2 60 | self.entropy_coef = arg_dict["entropy_coef"] 61 | self.move_entropy_coef = arg_dict["move_entropy_coef"] 62 | 63 | def forward(self, state_dict): 64 | player_state = state_dict["player"] 65 | ball_state = state_dict["ball"] 66 | left_team_state = state_dict["left_team"] 67 | left_closest_state = state_dict["left_closest"] 68 | right_team_state = state_dict["right_team"] 69 | right_closest_state = state_dict["right_closest"] 70 | avail = state_dict["avail"] 71 | 72 | player_embed = self.norm_player(self.fc_player(player_state)) 73 | ball_embed = self.norm_ball(self.fc_ball(ball_state)) 74 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim 75 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state)) 76 | right_team_embed = self.norm_right(self.fc_right(right_team_state)) 77 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state)) 78 | 79 | [horizon, batch_size, n_player, dim] = left_team_embed.size() 80 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n 81 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2 82 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2 83 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) 84 | 85 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n 86 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2 87 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) 88 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed))) 89 | 90 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2) 91 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 92 | h_in = state_dict["hidden"] 93 | out, h_out = self.lstm(cat, h_in) 94 | 95 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 96 | a_out = self.fc_pi_a2(a_out) 97 | logit = a_out + (avail-1)*1e7 98 | prob = F.softmax(logit, dim=2) 99 | 100 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 101 | prob_m = self.fc_pi_m2(prob_m) 102 | prob_m = F.softmax(prob_m, dim=2) 103 | 104 | v = F.relu(self.norm_v1(self.fc_v1(out))) 105 | v = self.fc_v2(v) 106 | 107 | return prob, prob_m, v, h_out 108 | 109 | 110 | 111 | class FeatureEncoder: 112 | def __init__(self): 113 | self.active = -1 114 | self.player_pos_x, self.player_pos_y = 0, 0 115 | 116 | def get_feature_dims(self): 117 | dims = { 118 | 'player':29, 119 | 'ball':18, 120 | 'left_team':7, 121 | 'left_team_closest':7, 122 | 'right_team':7, 123 | 'right_team_closest':7, 124 | } 125 | return dims 126 | 127 | def encode(self, obs): 128 | player_num = obs['active'] 129 | 130 | player_pos_x, player_pos_y = obs['left_team'][player_num] 131 | player_direction = np.array(obs['left_team_direction'][player_num]) 132 | player_speed = np.linalg.norm(player_direction) 133 | player_role = obs['left_team_roles'][player_num] 134 | player_role_onehot = self._encode_role_onehot(player_role) 135 | player_tired = obs['left_team_tired_factor'][player_num] 136 | is_dribbling = obs['sticky_actions'][9] 137 | is_sprinting = obs['sticky_actions'][8] 138 | 139 | ball_x, ball_y, ball_z = obs['ball'] 140 | ball_x_relative = ball_x - player_pos_x 141 | ball_y_relative = ball_y - player_pos_y 142 | ball_x_speed, ball_y_speed, _ = obs['ball_direction'] 143 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative]) 144 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed]) 145 | ball_owned = 0.0 146 | if obs['ball_owned_team'] == -1: 147 | ball_owned = 0.0 148 | else: 149 | ball_owned = 1.0 150 | ball_owned_by_us = 0.0 151 | if obs['ball_owned_team'] == 0: 152 | ball_owned_by_us = 1.0 153 | elif obs['ball_owned_team'] == 1: 154 | ball_owned_by_us = 0.0 155 | else: 156 | ball_owned_by_us = 0.0 157 | 158 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y) 159 | 160 | if ball_distance > 0.03: 161 | ball_far = 1.0 162 | else: 163 | ball_far = 0.0 164 | 165 | avail = self._get_avail(obs, ball_distance) 166 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100], 167 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting])) 168 | 169 | 170 | ball_state = np.concatenate((np.array(obs['ball']), 171 | np.array(ball_which_zone), 172 | np.array([ball_x_relative, ball_y_relative]), 173 | np.array(obs['ball_direction'])*20, 174 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us]))) 175 | 176 | 177 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0) 178 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0) 179 | left_team_relative = obs_left_team 180 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True) 181 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True) 182 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1) 183 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \ 184 | left_team_distance*2, left_team_tired), axis=1) 185 | left_closest_idx = np.argmin(left_team_distance) 186 | left_closest_state = left_team_state[left_closest_idx] 187 | 188 | 189 | obs_right_team = np.array(obs['right_team']) 190 | obs_right_team_direction = np.array(obs['right_team_direction']) 191 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True) 192 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True) 193 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1) 194 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \ 195 | right_team_distance*2, right_team_tired), axis=1) 196 | right_closest_idx = np.argmin(right_team_distance) 197 | right_closest_state = right_team_state[right_closest_idx] 198 | 199 | 200 | 201 | state_dict = {"player": player_state, 202 | "ball": ball_state, 203 | "left_team" : left_team_state, 204 | "left_closest" : left_closest_state, 205 | "right_team" : right_team_state, 206 | "right_closest" : right_closest_state, 207 | "avail" : avail} 208 | 209 | return state_dict 210 | 211 | def _get_avail(self, obs, ball_distance): 212 | avail = [1,1,1,1,1,1,1,1,1,1,1,1] 213 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \ 214 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 215 | 216 | ball_x, ball_y, _ = obs['ball'] 217 | # When opponents owning ball ... 218 | if obs['ball_owned_team'] == 1: # opponents owning ball 219 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 220 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # GR ball and far from me 221 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0 222 | else: 223 | avail[SLIDE] = 0 224 | if ball_x > 0.85 and (ball_y < -0.34 or ball_y > 0.34): 225 | avail[LONG_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0 226 | 227 | 228 | # Dealing with sticky actions 229 | sticky_actions = obs['sticky_actions'] 230 | if sticky_actions[8] == 0: # sprinting 231 | avail[RELEASE_SPRINT] = 0 232 | 233 | if sticky_actions[9] == 1: # dribbling 234 | avail[SLIDE] = 0 235 | else: 236 | avail[RELEASE_DRIBBLE] = 0 237 | 238 | if np.sum(sticky_actions[:8]) == 0: 239 | avail[RELEASE_MOVE] = 0 240 | 241 | 242 | # if too far, no shot 243 | 244 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y: 245 | avail[SHOT] = 0 246 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27): 247 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0 248 | 249 | 250 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick 251 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 252 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 253 | return np.array(avail) 254 | 255 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick 256 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 257 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1 258 | return np.array(avail) 259 | 260 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick 261 | avail = [1,0,0,0,0,0,0,0,0,0,0,0] 262 | avail[SHOT] = 1 263 | return np.array(avail) 264 | 265 | return np.array(avail) 266 | 267 | def _encode_ball_which_zone(self, ball_x, ball_y): 268 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0 269 | PENALTY_Y, END_Y = 0.27, 0.42 270 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 271 | return [1.0,0,0,0,0,0] 272 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 273 | return [0,1.0,0,0,0,0] 274 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 275 | return [0,0,1.0,0,0,0] 276 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 277 | return [0,0,0,1.0,0,0] 278 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y): 279 | return [0,0,0,0,1.0,0] 280 | else: 281 | return [0,0,0,0,0,1.0] 282 | 283 | 284 | def _encode_role_onehot(self, role_num): 285 | result = [0,0,0,0,0,0,0,0,0,0] 286 | result[role_num] = 1.0 287 | return np.array(result) 288 | 289 | 290 | 291 | def state_to_tensor(state_dict, h_in): 292 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0) 293 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0) 294 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0) 295 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0) 296 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0) 297 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0) 298 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0) 299 | 300 | state_dict_tensor = { 301 | "player" : player_state, 302 | "ball" : ball_state, 303 | "left_team" : left_team_state, 304 | "left_closest" : left_closest_state, 305 | "right_team" : right_team_state, 306 | "right_closest" : right_closest_state, 307 | "avail" : avail, 308 | "hidden" : h_in 309 | } 310 | return state_dict_tensor 311 | 312 | 313 | 314 | fe = FeatureEncoder() 315 | 316 | arg_dict = { 317 | "lstm_size" : 256, 318 | "learning_rate" : 0.0002, 319 | "gamma" : 0.992, 320 | "lmbda" : 0.96, 321 | "entropy_coef" : 0.0, 322 | "move_entropy_coef" : 0.0, 323 | "trained_model_path" : "kaggle_simulations/agent/model_133997184.tar", 324 | "k_epoch" : 3, 325 | 326 | "arg_max" : True 327 | 328 | } 329 | arg_dict["feature_dims"] = fe.get_feature_dims() 330 | model = PPO(arg_dict) 331 | cpu_device = torch.device('cpu') 332 | checkpoint = torch.load(arg_dict["trained_model_path"], map_location=cpu_device) 333 | model.load_state_dict(checkpoint['model_state_dict']) 334 | 335 | 336 | hidden = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float), 337 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float)) 338 | steps = 0 339 | 340 | 341 | def agent(obs): 342 | global model 343 | global fe 344 | global hidden 345 | global steps 346 | 347 | steps +=1 348 | 349 | obs = obs['players_raw'][0] 350 | state_dict = fe.encode(obs) 351 | state_dict_tensor = state_to_tensor(state_dict, hidden) 352 | with torch.no_grad(): 353 | a_prob, m_prob, _, hidden = model(state_dict_tensor) 354 | 355 | if arg_dict["arg_max"]: 356 | a = torch.argmax(a_prob).item() 357 | else: 358 | a = Categorical(a_prob).sample().item() 359 | 360 | real_action = 0 361 | if a==0: 362 | real_action = int(a) 363 | elif a==1: 364 | if arg_dict["arg_max"]: 365 | m = torch.argmax(m_prob).item() 366 | else: 367 | m = Categorical(m_prob).sample().item() 368 | real_action = int(m + 1) 369 | else: 370 | real_action = int(a + 7) 371 | 372 | return [real_action] 373 | -------------------------------------------------------------------------------- /kaggle_simulations/agent/model_133997184.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/kaggle_simulations/agent/model_133997184.tar -------------------------------------------------------------------------------- /learner.py: -------------------------------------------------------------------------------- 1 | import gfootball.env as football_env 2 | import time, pprint, importlib 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | import torch.multiprocessing as mp 10 | from tensorboardX import SummaryWriter 11 | 12 | def write_summary(writer, arg_dict, summary_queue, n_game, loss_lst, pi_loss_lst, v_loss_lst, \ 13 | entropy_lst, move_entropy_lst, optimization_step, self_play_board, win_evaluation, score_evaluation): 14 | win, score, tot_reward, game_len = [], [], [], [] 15 | loop_t, forward_t, wait_t = [], [], [] 16 | 17 | for i in range(arg_dict["summary_game_window"]): 18 | game_data = summary_queue.get() 19 | a,b,c,d,opp_num,t1,t2,t3 = game_data 20 | if arg_dict["env"] == "11_vs_11_kaggle": 21 | if opp_num in self_play_board: 22 | self_play_board[opp_num].append(a) 23 | else: 24 | self_play_board[opp_num] = [a] 25 | 26 | if 'env_evaluation' in arg_dict and opp_num==arg_dict['env_evaluation']: 27 | win_evaluation.append(a) 28 | score_evaluation.append(b) 29 | else: 30 | win.append(a) 31 | score.append(b) 32 | tot_reward.append(c) 33 | game_len.append(d) 34 | loop_t.append(t1) 35 | forward_t.append(t2) 36 | wait_t.append(t3) 37 | 38 | writer.add_scalar('game/win_rate', float(np.mean(win)), n_game) 39 | writer.add_scalar('game/score', float(np.mean(score)), n_game) 40 | writer.add_scalar('game/reward', float(np.mean(tot_reward)), n_game) 41 | writer.add_scalar('game/game_len', float(np.mean(game_len)), n_game) 42 | writer.add_scalar('train/step', float(optimization_step), n_game) 43 | writer.add_scalar('time/loop', float(np.mean(loop_t)), n_game) 44 | writer.add_scalar('time/forward', float(np.mean(forward_t)), n_game) 45 | writer.add_scalar('time/wait', float(np.mean(wait_t)), n_game) 46 | writer.add_scalar('train/loss', np.mean(loss_lst), n_game) 47 | writer.add_scalar('train/pi_loss', np.mean(pi_loss_lst), n_game) 48 | writer.add_scalar('train/v_loss', np.mean(v_loss_lst), n_game) 49 | writer.add_scalar('train/entropy', np.mean(entropy_lst), n_game) 50 | writer.add_scalar('train/move_entropy', np.mean(move_entropy_lst), n_game) 51 | 52 | mini_window = max(1, int(arg_dict['summary_game_window']/3)) 53 | if len(win_evaluation)>=mini_window: 54 | writer.add_scalar('game/win_rate_evaluation', float(np.mean(win_evaluation)), n_game) 55 | writer.add_scalar('game/score_evaluation', float(np.mean(score_evaluation)), n_game) 56 | win_evaluation, score_evaluation = [], [] 57 | 58 | for opp_num in self_play_board: 59 | if len(self_play_board[opp_num]) >= mini_window: 60 | label = 'self_play/'+opp_num 61 | writer.add_scalar(label, np.mean(self_play_board[opp_num][:mini_window]), n_game) 62 | self_play_board[opp_num] = self_play_board[opp_num][mini_window:] 63 | 64 | return win_evaluation, score_evaluation 65 | 66 | def save_model(model, arg_dict, optimization_step, last_saved_step): 67 | if optimization_step >= last_saved_step + arg_dict["model_save_interval"]: 68 | model_dict = { 69 | 'optimization_step': optimization_step, 70 | 'model_state_dict': model.state_dict(), 71 | 'optimizer_state_dict': model.optimizer.state_dict(), 72 | } 73 | path = arg_dict["log_dir"]+"/model_"+str(optimization_step)+".tar" 74 | torch.save(model_dict, path) 75 | print("Model saved :", path) 76 | return optimization_step 77 | else: 78 | return last_saved_step 79 | 80 | def get_data(queue, arg_dict, model): 81 | data = [] 82 | for i in range(arg_dict["buffer_size"]): 83 | mini_batch_np = [] 84 | for j in range(arg_dict["batch_size"]): 85 | rollout = queue.get() 86 | mini_batch_np.append(rollout) 87 | mini_batch = model.make_batch(mini_batch_np) 88 | data.append(mini_batch) 89 | return data 90 | 91 | def learner(center_model, queue, signal_queue, summary_queue, arg_dict): 92 | print("Learner process started") 93 | imported_model = importlib.import_module("models." + arg_dict["model"]) 94 | imported_algo = importlib.import_module("algos." + arg_dict["algorithm"]) 95 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 96 | model = imported_model.Model(arg_dict, device) 97 | model.load_state_dict(center_model.state_dict()) 98 | model.optimizer.load_state_dict(center_model.optimizer.state_dict()) 99 | algo = imported_algo.Algo(arg_dict) 100 | 101 | for state in model.optimizer.state.values(): 102 | for k, v in state.items(): 103 | if isinstance(v, torch.Tensor): 104 | state[k] = v.cuda() 105 | model.to(device) 106 | 107 | writer = SummaryWriter(logdir=arg_dict["log_dir"]) 108 | optimization_step = 0 109 | if "optimization_step" in arg_dict: 110 | optimization_step = arg_dict["optimization_step"] 111 | last_saved_step = optimization_step 112 | n_game = 0 113 | loss_lst, pi_loss_lst, v_loss_lst, entropy_lst, move_entropy_lst = [], [], [], [], [] 114 | self_play_board = {} 115 | 116 | win_evaluation, score_evaluation = [], [] 117 | 118 | while True: 119 | if queue.qsize() > arg_dict["batch_size"]*arg_dict["buffer_size"]: 120 | last_saved_step = save_model(model, arg_dict, optimization_step, last_saved_step) 121 | 122 | signal_queue.put(1) 123 | data = get_data(queue, arg_dict, model) 124 | loss, pi_loss, v_loss, entropy, move_entropy = algo.train(model, data) 125 | optimization_step += arg_dict["batch_size"]*arg_dict["buffer_size"]*arg_dict["k_epoch"] 126 | print("step :", optimization_step, "loss", loss, "data_q", queue.qsize(), "summary_q", summary_queue.qsize()) 127 | 128 | loss_lst.append(loss) 129 | pi_loss_lst.append(pi_loss) 130 | v_loss_lst.append(v_loss) 131 | entropy_lst.append(entropy) 132 | move_entropy_lst.append(move_entropy) 133 | center_model.load_state_dict(model.state_dict()) 134 | 135 | if queue.qsize() > arg_dict["batch_size"]*arg_dict["buffer_size"]: 136 | print("warning. data remaining. queue size : ", queue.qsize()) 137 | 138 | if summary_queue.qsize() > arg_dict["summary_game_window"]: 139 | win_evaluation, score_evaluation = write_summary(writer, arg_dict, summary_queue, n_game, loss_lst, pi_loss_lst, 140 | v_loss_lst, entropy_lst, move_entropy_lst, optimization_step, 141 | self_play_board, win_evaluation, score_evaluation) 142 | loss_lst, pi_loss_lst, v_loss_lst, entropy_lst, move_entropy_lst = [], [], [], [], [] 143 | n_game += arg_dict["summary_game_window"] 144 | 145 | _ = signal_queue.get() 146 | 147 | else: 148 | time.sleep(0.1) 149 | -------------------------------------------------------------------------------- /models/conv1d.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pprint 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | 10 | class Model(nn.Module): 11 | def __init__(self, arg_dict, device=None): 12 | super(Model, self).__init__() 13 | self.device=None 14 | if device: 15 | self.device = device 16 | 17 | self.arg_dict = arg_dict 18 | 19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64) 20 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64) 21 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48) 22 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48) 23 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48) 24 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48) 25 | 26 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1) 27 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1) 28 | self.fc_left2 = nn.Linear(36*10,96) 29 | self.fc_right2 = nn.Linear(36*11,96) 30 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"]) 31 | 32 | self.norm_player = nn.LayerNorm(64) 33 | self.norm_ball = nn.LayerNorm(64) 34 | self.norm_left = nn.LayerNorm(48) 35 | self.norm_left2 = nn.LayerNorm(96) 36 | self.norm_left_closest = nn.LayerNorm(48) 37 | self.norm_right = nn.LayerNorm(48) 38 | self.norm_right2 = nn.LayerNorm(96) 39 | self.norm_right_closest = nn.LayerNorm(48) 40 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 41 | 42 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 43 | 44 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164) 45 | self.fc_pi_a2 = nn.Linear(164, 12) 46 | self.norm_pi_a1 = nn.LayerNorm(164) 47 | 48 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164) 49 | self.fc_pi_m2 = nn.Linear(164, 8) 50 | self.norm_pi_m1 = nn.LayerNorm(164) 51 | 52 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164) 53 | self.norm_v1 = nn.LayerNorm(164) 54 | self.fc_v2 = nn.Linear(164, 1, bias=False) 55 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 56 | 57 | def forward(self, state_dict): 58 | player_state = state_dict["player"] 59 | ball_state = state_dict["ball"] 60 | left_team_state = state_dict["left_team"] 61 | left_closest_state = state_dict["left_closest"] 62 | right_team_state = state_dict["right_team"] 63 | right_closest_state = state_dict["right_closest"] 64 | avail = state_dict["avail"] 65 | 66 | player_embed = self.norm_player(self.fc_player(player_state)) 67 | ball_embed = self.norm_ball(self.fc_ball(ball_state)) 68 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim 69 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state)) 70 | right_team_embed = self.norm_right(self.fc_right(right_team_state)) 71 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state)) 72 | 73 | [horizon, batch_size, n_player, dim] = left_team_embed.size() 74 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n 75 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2 76 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2 77 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) 78 | 79 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n 80 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2 81 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) 82 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed))) 83 | 84 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2) 85 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 86 | h_in = state_dict["hidden"] 87 | out, h_out = self.lstm(cat, h_in) 88 | 89 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 90 | a_out = self.fc_pi_a2(a_out) 91 | logit = a_out + (avail-1)*1e7 92 | prob = F.softmax(logit, dim=2) 93 | 94 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 95 | prob_m = self.fc_pi_m2(prob_m) 96 | prob_m = F.softmax(prob_m, dim=2) 97 | 98 | v = F.relu(self.norm_v1(self.fc_v1(out))) 99 | v = self.fc_v2(v) 100 | 101 | return prob, prob_m, v, h_out 102 | 103 | def make_batch(self, data): 104 | # data = [tr1, tr2, ..., tr10] * batch_size 105 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[] 106 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \ 107 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[] 108 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], [] 109 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], [] 110 | 111 | for rollout in data: 112 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], [] 113 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \ 114 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], [] 115 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], [] 116 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], [] 117 | 118 | for transition in rollout: 119 | s, a, m, r, s_prime, prob, done, need_move = transition 120 | 121 | s_player_lst.append(s["player"]) 122 | s_ball_lst.append(s["ball"]) 123 | s_left_lst.append(s["left_team"]) 124 | s_left_closest_lst.append(s["left_closest"]) 125 | s_right_lst.append(s["right_team"]) 126 | s_right_closest_lst.append(s["right_closest"]) 127 | avail_lst.append(s["avail"]) 128 | h1_in, h2_in = s["hidden"] 129 | h1_in_lst.append(h1_in) 130 | h2_in_lst.append(h2_in) 131 | 132 | s_player_prime_lst.append(s_prime["player"]) 133 | s_ball_prime_lst.append(s_prime["ball"]) 134 | s_left_prime_lst.append(s_prime["left_team"]) 135 | s_left_closest_prime_lst.append(s_prime["left_closest"]) 136 | s_right_prime_lst.append(s_prime["right_team"]) 137 | s_right_closest_prime_lst.append(s_prime["right_closest"]) 138 | avail_prime_lst.append(s_prime["avail"]) 139 | h1_out, h2_out = s_prime["hidden"] 140 | h1_out_lst.append(h1_out) 141 | h2_out_lst.append(h2_out) 142 | 143 | a_lst.append([a]) 144 | m_lst.append([m]) 145 | r_lst.append([r]) 146 | prob_lst.append([prob]) 147 | done_mask = 0 if done else 1 148 | done_lst.append([done_mask]) 149 | need_move_lst.append([need_move]), 150 | 151 | s_player_batch.append(s_player_lst) 152 | s_ball_batch.append(s_ball_lst) 153 | s_left_batch.append(s_left_lst) 154 | s_left_closest_batch.append(s_left_closest_lst) 155 | s_right_batch.append(s_right_lst) 156 | s_right_closest_batch.append(s_right_closest_lst) 157 | avail_batch.append(avail_lst) 158 | h1_in_batch.append(h1_in_lst[0]) 159 | h2_in_batch.append(h2_in_lst[0]) 160 | 161 | s_player_prime_batch.append(s_player_prime_lst) 162 | s_ball_prime_batch.append(s_ball_prime_lst) 163 | s_left_prime_batch.append(s_left_prime_lst) 164 | s_left_closest_prime_batch.append(s_left_closest_prime_lst) 165 | s_right_prime_batch.append(s_right_prime_lst) 166 | s_right_closest_prime_batch.append(s_right_closest_prime_lst) 167 | avail_prime_batch.append(avail_prime_lst) 168 | h1_out_batch.append(h1_out_lst[0]) 169 | h2_out_batch.append(h2_out_lst[0]) 170 | 171 | a_batch.append(a_lst) 172 | m_batch.append(m_lst) 173 | r_batch.append(r_lst) 174 | prob_batch.append(prob_lst) 175 | done_batch.append(done_lst) 176 | need_move_batch.append(need_move_lst) 177 | 178 | 179 | s = { 180 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2), 181 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2), 182 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 183 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 184 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 185 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 186 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2), 187 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 188 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 189 | } 190 | 191 | s_prime = { 192 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 193 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 194 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 195 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 196 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 197 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 198 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 199 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 200 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 201 | } 202 | 203 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \ 204 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \ 205 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 206 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 207 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 208 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2) 209 | 210 | return s, a, m, r, s_prime, done_mask, prob, need_move 211 | -------------------------------------------------------------------------------- /models/conv1d_larger.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pprint 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | 10 | class Model(nn.Module): 11 | def __init__(self, arg_dict, device=None): 12 | super(Model, self).__init__() 13 | self.device=None 14 | if device: 15 | self.device = device 16 | 17 | self.arg_dict = arg_dict 18 | 19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64) 20 | self.fc_player2 = nn.Linear(64,64) 21 | 22 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64) 23 | self.fc_ball2 = nn.Linear(64,64) 24 | 25 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48) 26 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48) 27 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48) 28 | self.fc_left_closest2 = nn.Linear(48,48) 29 | 30 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48) 31 | self.fc_right_closest2 = nn.Linear(48,48) 32 | 33 | 34 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1) 35 | self.conv1d_left2 = nn.Conv1d(36, 36, 1, stride=1) 36 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1) 37 | self.conv1d_right2 = nn.Conv1d(36, 36, 1, stride=1) 38 | self.fc_left2 = nn.Linear(36*10,96) 39 | self.fc_right2 = nn.Linear(36*11,96) 40 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"]) 41 | 42 | self.norm_player = nn.LayerNorm(64) 43 | self.norm_player2 = nn.LayerNorm(64) 44 | self.norm_ball = nn.LayerNorm(64) 45 | self.norm_ball2 = nn.LayerNorm(64) 46 | self.norm_left = nn.LayerNorm(48) 47 | self.norm_left2 = nn.LayerNorm(96) 48 | self.norm_left_closest = nn.LayerNorm(48) 49 | self.norm_left_closest2 = nn.LayerNorm(48) 50 | self.norm_right = nn.LayerNorm(48) 51 | self.norm_right2 = nn.LayerNorm(96) 52 | self.norm_right_closest = nn.LayerNorm(48) 53 | self.norm_right_closest2 = nn.LayerNorm(48) 54 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 55 | 56 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 57 | 58 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164) 59 | self.fc_pi_a2 = nn.Linear(164, 12) 60 | self.norm_pi_a1 = nn.LayerNorm(164) 61 | 62 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164) 63 | self.fc_pi_m2 = nn.Linear(164, 8) 64 | self.norm_pi_m1 = nn.LayerNorm(164) 65 | 66 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164) 67 | self.norm_v1 = nn.LayerNorm(164) 68 | self.fc_v2 = nn.Linear(164, 1, bias=False) 69 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 70 | 71 | def forward(self, state_dict): 72 | player_state = state_dict["player"] 73 | ball_state = state_dict["ball"] 74 | left_team_state = state_dict["left_team"] 75 | left_closest_state = state_dict["left_closest"] 76 | right_team_state = state_dict["right_team"] 77 | right_closest_state = state_dict["right_closest"] 78 | avail = state_dict["avail"] 79 | 80 | player_embed = F.relu(self.norm_player(self.fc_player(player_state))) 81 | player_embed = self.norm_player2(self.fc_player2(player_embed)) 82 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state))) 83 | ball_embed = self.norm_ball2(self.fc_ball2(ball_embed)) 84 | 85 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim 86 | left_closest_embed = F.relu(self.norm_left_closest(self.fc_left_closest(left_closest_state))) 87 | left_closest_embed = self.norm_left_closest2(self.fc_left_closest2(left_closest_embed)) 88 | 89 | right_team_embed = self.norm_right(self.fc_right(right_team_state)) 90 | right_closest_embed = F.relu(self.norm_right_closest(self.fc_right_closest(right_closest_state))) 91 | right_closest_embed = self.norm_right_closest2(self.fc_right_closest2(right_closest_embed)) 92 | 93 | [horizon, batch_size, n_player, dim] = left_team_embed.size() 94 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n 95 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)) # horizon * batch, n, dim2 96 | left_team_embed = F.relu(self.conv1d_left2(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2 97 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2 98 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) 99 | 100 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n 101 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)) # horizon * batch, n * dim2 102 | right_team_embed = F.relu(self.conv1d_right2(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2 103 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) 104 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed))) 105 | 106 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2) 107 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 108 | h_in = state_dict["hidden"] 109 | out, h_out = self.lstm(cat, h_in) 110 | 111 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 112 | a_out = self.fc_pi_a2(a_out) 113 | logit = a_out + (avail-1)*1e7 114 | prob = F.softmax(logit, dim=2) 115 | 116 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 117 | prob_m = self.fc_pi_m2(prob_m) 118 | prob_m = F.softmax(prob_m, dim=2) 119 | 120 | v = F.relu(self.norm_v1(self.fc_v1(out))) 121 | v = self.fc_v2(v) 122 | 123 | return prob, prob_m, v, h_out 124 | 125 | def make_batch(self, data): 126 | # data = [tr1, tr2, ..., tr10] * batch_size 127 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[] 128 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \ 129 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[] 130 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], [] 131 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], [] 132 | 133 | for rollout in data: 134 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], [] 135 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \ 136 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], [] 137 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], [] 138 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], [] 139 | 140 | for transition in rollout: 141 | s, a, m, r, s_prime, prob, done, need_move = transition 142 | 143 | s_player_lst.append(s["player"]) 144 | s_ball_lst.append(s["ball"]) 145 | s_left_lst.append(s["left_team"]) 146 | s_left_closest_lst.append(s["left_closest"]) 147 | s_right_lst.append(s["right_team"]) 148 | s_right_closest_lst.append(s["right_closest"]) 149 | avail_lst.append(s["avail"]) 150 | h1_in, h2_in = s["hidden"] 151 | h1_in_lst.append(h1_in) 152 | h2_in_lst.append(h2_in) 153 | 154 | s_player_prime_lst.append(s_prime["player"]) 155 | s_ball_prime_lst.append(s_prime["ball"]) 156 | s_left_prime_lst.append(s_prime["left_team"]) 157 | s_left_closest_prime_lst.append(s_prime["left_closest"]) 158 | s_right_prime_lst.append(s_prime["right_team"]) 159 | s_right_closest_prime_lst.append(s_prime["right_closest"]) 160 | avail_prime_lst.append(s_prime["avail"]) 161 | h1_out, h2_out = s_prime["hidden"] 162 | h1_out_lst.append(h1_out) 163 | h2_out_lst.append(h2_out) 164 | 165 | a_lst.append([a]) 166 | m_lst.append([m]) 167 | r_lst.append([r]) 168 | prob_lst.append([prob]) 169 | done_mask = 0 if done else 1 170 | done_lst.append([done_mask]) 171 | need_move_lst.append([need_move]), 172 | 173 | s_player_batch.append(s_player_lst) 174 | s_ball_batch.append(s_ball_lst) 175 | s_left_batch.append(s_left_lst) 176 | s_left_closest_batch.append(s_left_closest_lst) 177 | s_right_batch.append(s_right_lst) 178 | s_right_closest_batch.append(s_right_closest_lst) 179 | avail_batch.append(avail_lst) 180 | h1_in_batch.append(h1_in_lst[0]) 181 | h2_in_batch.append(h2_in_lst[0]) 182 | 183 | s_player_prime_batch.append(s_player_prime_lst) 184 | s_ball_prime_batch.append(s_ball_prime_lst) 185 | s_left_prime_batch.append(s_left_prime_lst) 186 | s_left_closest_prime_batch.append(s_left_closest_prime_lst) 187 | s_right_prime_batch.append(s_right_prime_lst) 188 | s_right_closest_prime_batch.append(s_right_closest_prime_lst) 189 | avail_prime_batch.append(avail_prime_lst) 190 | h1_out_batch.append(h1_out_lst[0]) 191 | h2_out_batch.append(h2_out_lst[0]) 192 | 193 | a_batch.append(a_lst) 194 | m_batch.append(m_lst) 195 | r_batch.append(r_lst) 196 | prob_batch.append(prob_lst) 197 | done_batch.append(done_lst) 198 | need_move_batch.append(need_move_lst) 199 | 200 | 201 | s = { 202 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2), 203 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2), 204 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 205 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 206 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 207 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 208 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2), 209 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 210 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 211 | } 212 | 213 | s_prime = { 214 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 215 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 216 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 217 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 218 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 219 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 220 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 221 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 222 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 223 | } 224 | 225 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \ 226 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \ 227 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 228 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 229 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 230 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2) 231 | 232 | return s, a, m, r, s_prime, done_mask, prob, need_move 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /models/simple_attention.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pprint 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | 10 | class Model(nn.Module): 11 | def __init__(self, arg_dict, device=None): 12 | super(Model, self).__init__() 13 | self.device=None 14 | if device: 15 | self.device = device 16 | 17 | self.arg_dict = arg_dict 18 | 19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],128) 20 | self.fc_player2 = nn.Linear(128,128) 21 | 22 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],96) 23 | self.fc_ball2 = nn.Linear(96,96) 24 | 25 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],128) 26 | self.fc_left2 = nn.Linear(128,128) 27 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],128) 28 | self.fc_right2 = nn.Linear(128,128) 29 | 30 | self.fc_player_left_q = nn.Linear(128,64) 31 | self.fc_left_k = nn.Linear(128,64) 32 | self.fc_player_right_q = nn.Linear(128,64) 33 | self.fc_right_k = nn.Linear(128,64) 34 | 35 | 36 | self.fc_cat = nn.Linear(128+96+128+128,arg_dict["lstm_size"]) 37 | 38 | self.norm_player = nn.LayerNorm(128) 39 | self.norm_player2 = nn.LayerNorm(128) 40 | self.norm_ball = nn.LayerNorm(96) 41 | self.norm_ball2 = nn.LayerNorm(96) 42 | self.norm_left = nn.LayerNorm(128) 43 | self.norm_left2 = nn.LayerNorm(128) 44 | self.norm_right = nn.LayerNorm(128) 45 | self.norm_right2 = nn.LayerNorm(128) 46 | 47 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 48 | 49 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 50 | 51 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164) 52 | self.fc_pi_a2 = nn.Linear(164, 12) 53 | self.norm_pi_a1 = nn.LayerNorm(164) 54 | 55 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164) 56 | self.fc_pi_m2 = nn.Linear(164, 8) 57 | self.norm_pi_m1 = nn.LayerNorm(164) 58 | 59 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164) 60 | self.norm_v1 = nn.LayerNorm(164) 61 | self.fc_v2 = nn.Linear(164, 1, bias=False) 62 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 63 | 64 | def forward(self, state_dict): 65 | player_state = state_dict["player"] 66 | ball_state = state_dict["ball"] 67 | left_team_state = state_dict["left_team"] 68 | left_closest_state = state_dict["left_closest"] 69 | right_team_state = state_dict["right_team"] 70 | right_closest_state = state_dict["right_closest"] 71 | avail = state_dict["avail"] 72 | 73 | player_embed = F.relu(self.norm_player(self.fc_player(player_state))) 74 | player_embed = self.norm_player2(self.fc_player2(player_embed)) 75 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state))) 76 | ball_embed = self.norm_ball2(self.fc_ball2(ball_embed)) 77 | 78 | left_team_embed = F.relu(self.norm_left(self.fc_left(left_team_state))) # horizon, batch, n, dim 79 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) # horizon, batch, n, dim 80 | 81 | right_team_embed = F.relu(self.norm_right(self.fc_right(right_team_state))) 82 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed))) 83 | 84 | player_left_q = self.fc_player_left_q(player_embed) # horizon, batch, dim 85 | left_team_k = self.fc_left_k(left_team_embed) # horizon, batch, n, dim 86 | [horizon, batch_size, n_player, f_dim] = left_team_k.size() 87 | player_left_q = player_left_q.view(horizon*batch_size, 1, f_dim) # horizon*batch, 1, dim1 88 | left_team_k = left_team_k.view(horizon*batch_size, n_player, f_dim).permute(0,2,1) # horizon*batch, dim1, n 89 | attention = F.softmax(torch.bmm(player_left_q, left_team_k)/8, dim=2) # horizon*batch, 1 , n 90 | attention = attention.view(horizon, batch_size, -1).unsqueeze(3) # horizon, batch, n, 1 91 | left_team = left_team_embed*attention # horizon, batch, n, dim 92 | left_team = left_team.permute(0,1,3,2) 93 | left_team = torch.sum(left_team, axis=3) 94 | 95 | player_right_q = self.fc_player_right_q(player_embed) # horizon, batch, dim 96 | right_team_k = self.fc_right_k(right_team_embed) # horizon, batch, n, dim 97 | [horizon, batch_size, n_player, f_dim] = right_team_k.size() 98 | player_right_q = player_right_q.view(horizon*batch_size, 1, f_dim) # horizon*batch, 1, dim1 99 | right_team_k = right_team_k.view(horizon*batch_size, n_player, f_dim).permute(0,2,1) # horizon*batch, dim1, n 100 | attention = F.softmax(torch.bmm(player_right_q, right_team_k)/8, dim=2) # horizon*batch, 1 , n 101 | attention = attention.view(horizon, batch_size, -1).unsqueeze(3) # horizon, batch, n, 1 102 | right_team = right_team_embed*attention # horizon, batch, n, dim 103 | right_team = right_team.permute(0,1,3,2) 104 | right_team = torch.sum(right_team, axis=3) 105 | 106 | 107 | cat = torch.cat([player_embed, ball_embed, left_team, right_team], 2) 108 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 109 | h_in = state_dict["hidden"] 110 | out, h_out = self.lstm(cat, h_in) 111 | 112 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 113 | a_out = self.fc_pi_a2(a_out) 114 | logit = a_out + (avail-1)*1e7 115 | prob = F.softmax(logit, dim=2) 116 | 117 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 118 | prob_m = self.fc_pi_m2(prob_m) 119 | prob_m = F.softmax(prob_m, dim=2) 120 | 121 | v = F.relu(self.norm_v1(self.fc_v1(out))) 122 | v = self.fc_v2(v) 123 | 124 | return prob, prob_m, v, h_out 125 | 126 | def make_batch(self, data): 127 | # data = [tr1, tr2, ..., tr10] * batch_size 128 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[] 129 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \ 130 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[] 131 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], [] 132 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], [] 133 | 134 | for rollout in data: 135 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], [] 136 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \ 137 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], [] 138 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], [] 139 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], [] 140 | 141 | for transition in rollout: 142 | s, a, m, r, s_prime, prob, done, need_move = transition 143 | 144 | s_player_lst.append(s["player"]) 145 | s_ball_lst.append(s["ball"]) 146 | s_left_lst.append(s["left_team"]) 147 | s_left_closest_lst.append(s["left_closest"]) 148 | s_right_lst.append(s["right_team"]) 149 | s_right_closest_lst.append(s["right_closest"]) 150 | avail_lst.append(s["avail"]) 151 | h1_in, h2_in = s["hidden"] 152 | h1_in_lst.append(h1_in) 153 | h2_in_lst.append(h2_in) 154 | 155 | s_player_prime_lst.append(s_prime["player"]) 156 | s_ball_prime_lst.append(s_prime["ball"]) 157 | s_left_prime_lst.append(s_prime["left_team"]) 158 | s_left_closest_prime_lst.append(s_prime["left_closest"]) 159 | s_right_prime_lst.append(s_prime["right_team"]) 160 | s_right_closest_prime_lst.append(s_prime["right_closest"]) 161 | avail_prime_lst.append(s_prime["avail"]) 162 | h1_out, h2_out = s_prime["hidden"] 163 | h1_out_lst.append(h1_out) 164 | h2_out_lst.append(h2_out) 165 | 166 | a_lst.append([a]) 167 | m_lst.append([m]) 168 | r_lst.append([r]) 169 | prob_lst.append([prob]) 170 | done_mask = 0 if done else 1 171 | done_lst.append([done_mask]) 172 | need_move_lst.append([need_move]), 173 | 174 | s_player_batch.append(s_player_lst) 175 | s_ball_batch.append(s_ball_lst) 176 | s_left_batch.append(s_left_lst) 177 | s_left_closest_batch.append(s_left_closest_lst) 178 | s_right_batch.append(s_right_lst) 179 | s_right_closest_batch.append(s_right_closest_lst) 180 | avail_batch.append(avail_lst) 181 | h1_in_batch.append(h1_in_lst[0]) 182 | h2_in_batch.append(h2_in_lst[0]) 183 | 184 | s_player_prime_batch.append(s_player_prime_lst) 185 | s_ball_prime_batch.append(s_ball_prime_lst) 186 | s_left_prime_batch.append(s_left_prime_lst) 187 | s_left_closest_prime_batch.append(s_left_closest_prime_lst) 188 | s_right_prime_batch.append(s_right_prime_lst) 189 | s_right_closest_prime_batch.append(s_right_closest_prime_lst) 190 | avail_prime_batch.append(avail_prime_lst) 191 | h1_out_batch.append(h1_out_lst[0]) 192 | h2_out_batch.append(h2_out_lst[0]) 193 | 194 | a_batch.append(a_lst) 195 | m_batch.append(m_lst) 196 | r_batch.append(r_lst) 197 | prob_batch.append(prob_lst) 198 | done_batch.append(done_lst) 199 | need_move_batch.append(need_move_lst) 200 | 201 | 202 | s = { 203 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2), 204 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2), 205 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 206 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 207 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 208 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 209 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2), 210 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 211 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 212 | } 213 | 214 | s_prime = { 215 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 216 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 217 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 218 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 219 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 220 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 221 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 222 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 223 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 224 | } 225 | 226 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \ 227 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \ 228 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 229 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 230 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 231 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2) 232 | 233 | return s, a, m, r, s_prime, done_mask, prob, need_move 234 | 235 | 236 | -------------------------------------------------------------------------------- /models/team_fc.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pprint 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | 10 | class Model(nn.Module): 11 | def __init__(self, arg_dict, device=None): 12 | super(Model, self).__init__() 13 | self.device=None 14 | if device: 15 | self.device = device 16 | 17 | self.arg_dict = arg_dict 18 | 19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64) 20 | self.norm_player = nn.LayerNorm(64) 21 | self.fc_player2 = nn.Linear(64,64) 22 | self.norm_player2 = nn.LayerNorm(64) 23 | 24 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64) 25 | self.norm_ball = nn.LayerNorm(64) 26 | self.fc_ball2 = nn.Linear(64,64) 27 | self.norm_ball2 = nn.LayerNorm(64) 28 | 29 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],64) 30 | self.norm_left = nn.LayerNorm(64) 31 | self.fc_left2 = nn.Linear(64,48) 32 | self.norm_left2 = nn.LayerNorm(48) 33 | self.fc_left_tot = nn.Linear(480, 96) 34 | self.norm_left_tot = nn.LayerNorm(96) 35 | 36 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],64) 37 | self.norm_right = nn.LayerNorm(64) 38 | self.fc_right2 = nn.Linear(64,48) 39 | self.norm_right2 = nn.LayerNorm(48) 40 | self.fc_right_tot = nn.Linear(48*11, 96) 41 | self.norm_right_tot = nn.LayerNorm(96) 42 | 43 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],64) 44 | self.norm_left_closest = nn.LayerNorm(64) 45 | self.fc_left_closest2 = nn.Linear(64,64) 46 | self.norm_left_closest2 = nn.LayerNorm(64) 47 | 48 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],64) 49 | self.norm_right_closest = nn.LayerNorm(64) 50 | self.fc_right_closest2 = nn.Linear(64,64) 51 | self.norm_right_closest2 = nn.LayerNorm(64) 52 | 53 | self.fc_cat = nn.Linear(96+96+64+64+64+64,arg_dict["lstm_size"]) 54 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 55 | 56 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 57 | 58 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164) 59 | self.fc_pi_a2 = nn.Linear(164, 12) 60 | self.norm_pi_a1 = nn.LayerNorm(164) 61 | 62 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164) 63 | self.fc_pi_m2 = nn.Linear(164, 8) 64 | self.norm_pi_m1 = nn.LayerNorm(164) 65 | 66 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164) 67 | self.norm_v1 = nn.LayerNorm(164) 68 | self.fc_v2 = nn.Linear(164, 1, bias=False) 69 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 70 | 71 | def forward(self, state_dict): 72 | player_state = state_dict["player"] 73 | ball_state = state_dict["ball"] 74 | left_team_state = state_dict["left_team"] 75 | left_closest_state = state_dict["left_closest"] 76 | right_team_state = state_dict["right_team"] 77 | right_closest_state = state_dict["right_closest"] 78 | avail = state_dict["avail"] 79 | 80 | player_embed = F.relu(self.norm_player(self.fc_player(player_state))) 81 | player_embed = F.relu(self.norm_player2(self.fc_player2(player_embed))) 82 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state))) 83 | ball_embed = F.relu(self.norm_ball2(self.fc_ball2(ball_embed))) 84 | 85 | left_team_embed = F.relu(self.norm_left(self.fc_left(left_team_state))) # horizon, batch, n, dim 86 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) # horizon, batch, n, dim 87 | right_team_embed = self.norm_right(self.fc_right(right_team_state)) 88 | right_team_embed = self.norm_right2(self.fc_right2(right_team_embed)) 89 | 90 | [horizon, batch_size, n_player, dim] = left_team_embed.size() 91 | left_team_embed = left_team_embed.view(horizon, batch_size, -1) 92 | left_team_embed = F.relu(self.norm_left_tot(self.fc_left_tot(left_team_embed))) 93 | right_team_embed = right_team_embed.view(horizon, batch_size, -1) 94 | right_team_embed = F.relu(self.norm_right_tot(self.fc_right_tot(right_team_embed))) 95 | 96 | 97 | 98 | left_closest_embed = F.relu(self.norm_left_closest(self.fc_left_closest(left_closest_state))) 99 | left_closest_embed = F.relu(self.norm_left_closest2(self.fc_left_closest2(left_closest_embed))) 100 | right_closest_embed = F.relu(self.norm_right_closest(self.fc_right_closest(right_closest_state))) 101 | right_closest_embed = F.relu(self.norm_right_closest2(self.fc_right_closest2(right_closest_embed))) 102 | 103 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2) 104 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 105 | h_in = state_dict["hidden"] 106 | out, h_out = self.lstm(cat, h_in) 107 | 108 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 109 | a_out = self.fc_pi_a2(a_out) 110 | logit = a_out + (avail-1)*1e7 111 | prob = F.softmax(logit, dim=2) 112 | 113 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 114 | prob_m = self.fc_pi_m2(prob_m) 115 | prob_m = F.softmax(prob_m, dim=2) 116 | 117 | v = F.relu(self.norm_v1(self.fc_v1(out))) 118 | v = self.fc_v2(v) 119 | 120 | return prob, prob_m, v, h_out 121 | 122 | def make_batch(self, data): 123 | # data = [tr1, tr2, ..., tr10] * batch_size 124 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[] 125 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \ 126 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[] 127 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], [] 128 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], [] 129 | 130 | for rollout in data: 131 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], [] 132 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \ 133 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], [] 134 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], [] 135 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], [] 136 | 137 | for transition in rollout: 138 | s, a, m, r, s_prime, prob, done, need_move = transition 139 | 140 | s_player_lst.append(s["player"]) 141 | s_ball_lst.append(s["ball"]) 142 | s_left_lst.append(s["left_team"]) 143 | s_left_closest_lst.append(s["left_closest"]) 144 | s_right_lst.append(s["right_team"]) 145 | s_right_closest_lst.append(s["right_closest"]) 146 | avail_lst.append(s["avail"]) 147 | h1_in, h2_in = s["hidden"] 148 | h1_in_lst.append(h1_in) 149 | h2_in_lst.append(h2_in) 150 | 151 | s_player_prime_lst.append(s_prime["player"]) 152 | s_ball_prime_lst.append(s_prime["ball"]) 153 | s_left_prime_lst.append(s_prime["left_team"]) 154 | s_left_closest_prime_lst.append(s_prime["left_closest"]) 155 | s_right_prime_lst.append(s_prime["right_team"]) 156 | s_right_closest_prime_lst.append(s_prime["right_closest"]) 157 | avail_prime_lst.append(s_prime["avail"]) 158 | h1_out, h2_out = s_prime["hidden"] 159 | h1_out_lst.append(h1_out) 160 | h2_out_lst.append(h2_out) 161 | 162 | a_lst.append([a]) 163 | m_lst.append([m]) 164 | r_lst.append([r]) 165 | prob_lst.append([prob]) 166 | done_mask = 0 if done else 1 167 | done_lst.append([done_mask]) 168 | need_move_lst.append([need_move]), 169 | 170 | s_player_batch.append(s_player_lst) 171 | s_ball_batch.append(s_ball_lst) 172 | s_left_batch.append(s_left_lst) 173 | s_left_closest_batch.append(s_left_closest_lst) 174 | s_right_batch.append(s_right_lst) 175 | s_right_closest_batch.append(s_right_closest_lst) 176 | avail_batch.append(avail_lst) 177 | h1_in_batch.append(h1_in_lst[0]) 178 | h2_in_batch.append(h2_in_lst[0]) 179 | 180 | s_player_prime_batch.append(s_player_prime_lst) 181 | s_ball_prime_batch.append(s_ball_prime_lst) 182 | s_left_prime_batch.append(s_left_prime_lst) 183 | s_left_closest_prime_batch.append(s_left_closest_prime_lst) 184 | s_right_prime_batch.append(s_right_prime_lst) 185 | s_right_closest_prime_batch.append(s_right_closest_prime_lst) 186 | avail_prime_batch.append(avail_prime_lst) 187 | h1_out_batch.append(h1_out_lst[0]) 188 | h2_out_batch.append(h2_out_lst[0]) 189 | 190 | a_batch.append(a_lst) 191 | m_batch.append(m_lst) 192 | r_batch.append(r_lst) 193 | prob_batch.append(prob_lst) 194 | done_batch.append(done_lst) 195 | need_move_batch.append(need_move_lst) 196 | 197 | 198 | s = { 199 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2), 200 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2), 201 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 202 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 203 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 204 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 205 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2), 206 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 207 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 208 | } 209 | 210 | s_prime = { 211 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 212 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 213 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 214 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 215 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 216 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 217 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 218 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 219 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 220 | } 221 | 222 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \ 223 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \ 224 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 225 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 226 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 227 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2) 228 | 229 | return s, a, m, r, s_prime, done_mask, prob, need_move 230 | 231 | 232 | -------------------------------------------------------------------------------- /models/team_pooling.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pprint 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | 10 | class Model(nn.Module): 11 | def __init__(self, arg_dict, device=None): 12 | super(Model, self).__init__() 13 | if device: 14 | self.device = device 15 | 16 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64) 17 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64) 18 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],64) 19 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],64) 20 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],32) 21 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],32) 22 | self.fc_cat = nn.Linear(256+64,arg_dict["lstm_size"]) 23 | self.norm_player = nn.LayerNorm(64) 24 | self.norm_ball = nn.LayerNorm(64) 25 | self.norm_left = nn.LayerNorm(64) 26 | self.norm_left_closest = nn.LayerNorm(32) 27 | self.norm_right = nn.LayerNorm(64) 28 | self.norm_right_closest = nn.LayerNorm(32) 29 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"]) 30 | 31 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"]) 32 | 33 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 128) 34 | self.fc_pi_a2 = nn.Linear(128, 12) 35 | self.norm_pi_a1 = nn.LayerNorm(128) 36 | 37 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 128) 38 | self.fc_pi_m2 = nn.Linear(128, 8) 39 | self.norm_pi_m1 = nn.LayerNorm(128) 40 | 41 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 128) 42 | self.norm_v1 = nn.LayerNorm(128) 43 | self.fc_v2 = nn.Linear(128, 1, bias=False) 44 | self.pool = nn.AdaptiveAvgPool2d((1,None)) 45 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"]) 46 | 47 | 48 | def forward(self, state_dict): 49 | player_state = state_dict["player"] 50 | ball_state = state_dict["ball"] 51 | left_team_state = state_dict["left_team"] 52 | left_closest_state = state_dict["left_closest"] 53 | right_team_state = state_dict["right_team"] 54 | right_closest_state = state_dict["right_closest"] 55 | avail = state_dict["avail"] 56 | 57 | player_embed = self.norm_player(self.fc_player(player_state)) 58 | ball_embed = self.norm_ball(self.fc_ball(ball_state)) 59 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) 60 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state)) 61 | right_team_embed = self.norm_right(self.fc_right(right_team_state)) 62 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state)) 63 | 64 | left_team_embed = self.pool(left_team_embed).squeeze(2) 65 | right_team_embed = self.pool(right_team_embed).squeeze(2) 66 | 67 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2) 68 | cat = F.relu(self.norm_cat(self.fc_cat(cat))) 69 | h_in = state_dict["hidden"] 70 | out, h_out = self.lstm(cat, h_in) 71 | 72 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out))) 73 | a_out = self.fc_pi_a2(a_out) 74 | logit = a_out + (avail-1)*1e8 75 | prob = F.softmax(logit, dim=2) 76 | 77 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out))) 78 | prob_m = self.fc_pi_m2(prob_m) 79 | prob_m = F.softmax(prob_m, dim=2) 80 | 81 | v = F.relu(self.norm_v1(self.fc_v1(out))) 82 | v = self.fc_v2(v) 83 | 84 | return prob, prob_m, v, h_out 85 | 86 | def make_batch(self, data): 87 | # data = [tr1, tr2, ..., tr10] * batch_size 88 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[] 89 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \ 90 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[] 91 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], [] 92 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], [] 93 | 94 | for rollout in data: 95 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], [] 96 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \ 97 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], [] 98 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], [] 99 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], [] 100 | 101 | for transition in rollout: 102 | s, a, m, r, s_prime, prob, done, need_move = transition 103 | 104 | s_player_lst.append(s["player"]) 105 | s_ball_lst.append(s["ball"]) 106 | s_left_lst.append(s["left_team"]) 107 | s_left_closest_lst.append(s["left_closest"]) 108 | s_right_lst.append(s["right_team"]) 109 | s_right_closest_lst.append(s["right_closest"]) 110 | avail_lst.append(s["avail"]) 111 | h1_in, h2_in = s["hidden"] 112 | h1_in_lst.append(h1_in) 113 | h2_in_lst.append(h2_in) 114 | 115 | s_player_prime_lst.append(s_prime["player"]) 116 | s_ball_prime_lst.append(s_prime["ball"]) 117 | s_left_prime_lst.append(s_prime["left_team"]) 118 | s_left_closest_prime_lst.append(s_prime["left_closest"]) 119 | s_right_prime_lst.append(s_prime["right_team"]) 120 | s_right_closest_prime_lst.append(s_prime["right_closest"]) 121 | avail_prime_lst.append(s_prime["avail"]) 122 | h1_out, h2_out = s_prime["hidden"] 123 | h1_out_lst.append(h1_out) 124 | h2_out_lst.append(h2_out) 125 | 126 | a_lst.append([a]) 127 | m_lst.append([m]) 128 | r_lst.append([r]) 129 | prob_lst.append([prob]) 130 | done_mask = 0 if done else 1 131 | done_lst.append([done_mask]) 132 | need_move_lst.append([need_move]), 133 | 134 | s_player_batch.append(s_player_lst) 135 | s_ball_batch.append(s_ball_lst) 136 | s_left_batch.append(s_left_lst) 137 | s_left_closest_batch.append(s_left_closest_lst) 138 | s_right_batch.append(s_right_lst) 139 | s_right_closest_batch.append(s_right_closest_lst) 140 | avail_batch.append(avail_lst) 141 | h1_in_batch.append(h1_in_lst[0]) 142 | h2_in_batch.append(h2_in_lst[0]) 143 | 144 | s_player_prime_batch.append(s_player_prime_lst) 145 | s_ball_prime_batch.append(s_ball_prime_lst) 146 | s_left_prime_batch.append(s_left_prime_lst) 147 | s_left_closest_prime_batch.append(s_left_closest_prime_lst) 148 | s_right_prime_batch.append(s_right_prime_lst) 149 | s_right_closest_prime_batch.append(s_right_closest_prime_lst) 150 | avail_prime_batch.append(avail_prime_lst) 151 | h1_out_batch.append(h1_out_lst[0]) 152 | h2_out_batch.append(h2_out_lst[0]) 153 | 154 | a_batch.append(a_lst) 155 | m_batch.append(m_lst) 156 | r_batch.append(r_lst) 157 | prob_batch.append(prob_lst) 158 | done_batch.append(done_lst) 159 | need_move_batch.append(need_move_lst) 160 | 161 | s = { 162 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2), 163 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2), 164 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 165 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 166 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 167 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2), 168 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2), 169 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 170 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 171 | } 172 | 173 | s_prime = { 174 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 175 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 176 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 177 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 178 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3), 179 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 180 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2), 181 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2), 182 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2)) 183 | } 184 | 185 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \ 186 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \ 187 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 188 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 189 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \ 190 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2) 191 | 192 | 193 | return s, a, m, r, s_prime, done_mask, prob, need_move 194 | 195 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | tensorboardX 3 | matplotlib 4 | kaggle-environments 5 | visdom==0.1.8.9 6 | -------------------------------------------------------------------------------- /rewarders/rewarder_basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calc_reward(rew, prev_obs, obs): 4 | ball_x, ball_y, ball_z = obs['ball'] 5 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0 6 | PENALTY_Y, END_Y = 0.27, 0.42 7 | 8 | ball_position_r = 0.0 9 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 10 | ball_position_r = -2.0 11 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 12 | ball_position_r = -1.0 13 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 14 | ball_position_r = 0.0 15 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 16 | ball_position_r = 2.0 17 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y): 18 | ball_position_r = 1.0 19 | else: 20 | ball_position_r = 0.0 21 | 22 | left_yellow = np.sum(obs["left_team_yellow_card"]) - np.sum(prev_obs["left_team_yellow_card"]) 23 | right_yellow = np.sum(obs["right_team_yellow_card"]) - np.sum(prev_obs["right_team_yellow_card"]) 24 | yellow_r = right_yellow - left_yellow 25 | 26 | 27 | win_reward = 0.0 28 | if obs['steps_left'] == 0: 29 | [my_score, opponent_score] = obs['score'] 30 | if my_score > opponent_score: 31 | win_reward = 1.0 32 | 33 | reward = 5.0*win_reward + 5.0*rew + 0.003*ball_position_r + yellow_r 34 | 35 | 36 | return reward -------------------------------------------------------------------------------- /rewarders/rewarder_highpass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calc_reward(rew, prev_obs, obs): 4 | ball_x, ball_y, ball_z = obs['ball'] 5 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0 6 | PENALTY_Y, END_Y = 0.27, 0.42 7 | 8 | ball_position_r = 0.0 9 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 10 | ball_position_r = -1.0 11 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 12 | ball_position_r = -1.0 13 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y): 14 | ball_position_r = 0.0 15 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y): 16 | ball_position_r = 1.0 17 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y): 18 | ball_position_r = 1.0 19 | else: 20 | ball_position_r = 0.0 21 | 22 | left_yellow = np.sum(obs["left_team_yellow_card"]) - np.sum(prev_obs["left_team_yellow_card"]) 23 | right_yellow = np.sum(obs["right_team_yellow_card"]) - np.sum(prev_obs["right_team_yellow_card"]) 24 | yellow_r = right_yellow - left_yellow 25 | 26 | 27 | win_reward = 0.0 28 | if obs['steps_left'] == 0: 29 | [my_score, opponent_score] = obs['score'] 30 | if my_score > opponent_score: 31 | win_reward = 1.0 32 | 33 | reward = 5.0*win_reward + 5.0*rew + 0.003*ball_position_r + yellow_r 34 | 35 | 36 | return reward -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import gfootball.env as football_env 2 | import time, pprint, json, os, importlib, shutil 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | from torch.distributions import Categorical 9 | import torch.multiprocessing as mp 10 | from tensorboardX import SummaryWriter 11 | 12 | from actor import * 13 | from learner import * 14 | from evaluator import evaluator 15 | from datetime import datetime, timedelta 16 | 17 | 18 | def save_args(arg_dict): 19 | os.makedirs(arg_dict["log_dir"]) 20 | args_info = json.dumps(arg_dict, indent=4) 21 | f = open(arg_dict["log_dir"]+"/args.json","w") 22 | f.write(args_info) 23 | f.close() 24 | 25 | def copy_models(dir_src, dir_dst): # src: source, dst: destination 26 | # retireve list of models 27 | l_cands = [f for f in os.listdir(dir_src) if os.path.isfile(os.path.join(dir_src, f)) and 'model_' in f] 28 | l_cands = sorted(l_cands, key=lambda x: int(x.split('_')[-1].split('.')[0])) 29 | 30 | print(f"models to be copied: {l_cands}") 31 | for m in l_cands: 32 | shutil.copyfile(os.path.join(dir_src, m), os.path.join(dir_dst, m)) 33 | print(f"{len(l_cands)} models copied in the given directory") 34 | 35 | def main(arg_dict): 36 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 37 | cur_time = datetime.now() + timedelta(hours = 9) 38 | arg_dict["log_dir"] = "logs/" + cur_time.strftime("[%m-%d]%H.%M.%S") 39 | save_args(arg_dict) 40 | if arg_dict["trained_model_path"] and 'kaggle' in arg_dict['env']: 41 | copy_models(os.path.dirname(arg_dict['trained_model_path']), arg_dict['log_dir']) 42 | 43 | np.set_printoptions(precision=3) 44 | np.set_printoptions(suppress=True) 45 | pp = pprint.PrettyPrinter(indent=4) 46 | torch.set_num_threads(1) 47 | 48 | fe = importlib.import_module("encoders." + arg_dict["encoder"]) 49 | fe = fe.FeatureEncoder() 50 | arg_dict["feature_dims"] = fe.get_feature_dims() 51 | 52 | model = importlib.import_module("models." + arg_dict["model"]) 53 | cpu_device = torch.device('cpu') 54 | center_model = model.Model(arg_dict) 55 | 56 | if arg_dict["trained_model_path"]: 57 | checkpoint = torch.load(arg_dict["trained_model_path"], map_location=cpu_device) 58 | optimization_step = checkpoint['optimization_step'] 59 | center_model.load_state_dict(checkpoint['model_state_dict']) 60 | center_model.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 61 | arg_dict["optimization_step"] = optimization_step 62 | print("Trained model", arg_dict["trained_model_path"] ,"suffessfully loaded") 63 | else: 64 | optimization_step = 0 65 | 66 | model_dict = { 67 | 'optimization_step': optimization_step, 68 | 'model_state_dict': center_model.state_dict(), 69 | 'optimizer_state_dict': center_model.optimizer.state_dict(), 70 | } 71 | 72 | path = arg_dict["log_dir"]+f"/model_{optimization_step}.tar" 73 | torch.save(model_dict, path) 74 | 75 | center_model.share_memory() 76 | data_queue = mp.Queue() 77 | signal_queue = mp.Queue() 78 | summary_queue = mp.Queue() 79 | 80 | processes = [] 81 | p = mp.Process(target=learner, args=(center_model, data_queue, signal_queue, summary_queue, arg_dict)) 82 | p.start() 83 | processes.append(p) 84 | for rank in range(arg_dict["num_processes"]): 85 | if arg_dict["env"] == "11_vs_11_kaggle": 86 | p = mp.Process(target=actor_self, args=(rank, center_model, data_queue, signal_queue, summary_queue, arg_dict)) 87 | else: 88 | p = mp.Process(target=actor, args=(rank, center_model, data_queue, signal_queue, summary_queue, arg_dict)) 89 | p.start() 90 | processes.append(p) 91 | 92 | if "env_evaluation" in arg_dict: 93 | p = mp.Process(target=evaluator, args=(center_model, signal_queue, summary_queue, arg_dict)) 94 | p.start() 95 | processes.append(p) 96 | 97 | for p in processes: 98 | p.join() 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | arg_dict = { 104 | "env": "11_vs_11_kaggle", 105 | # "11_vs_11_kaggle" : environment used for self-play training 106 | # "11_vs_11_stochastic" : environment used for training against fixed opponent(rule-based AI) 107 | "num_processes": 30, # should be less than the number of cpu cores in your workstation. 108 | "batch_size": 32, 109 | "buffer_size": 6, 110 | "rollout_len": 30, 111 | 112 | "lstm_size" : 256, 113 | "k_epoch" : 3, 114 | "learning_rate" : 0.0001, 115 | "gamma" : 0.993, 116 | "lmbda" : 0.96, 117 | "entropy_coef" : 0.0001, 118 | "grad_clip" : 3.0, 119 | "eps_clip" : 0.1, 120 | 121 | "summary_game_window" : 10, 122 | "model_save_interval" : 300000, # number of gradient updates bewteen saving model 123 | 124 | "trained_model_path" : None, # use when you want to continue traning from given model. 125 | "latest_ratio" : 0.5, # works only for self_play training. 126 | "latest_n_model" : 10, # works only for self_play training. 127 | "print_mode" : False, 128 | 129 | "encoder" : "encoder_basic", 130 | "rewarder" : "rewarder_basic", 131 | "model" : "conv1d", 132 | "algorithm" : "ppo", 133 | 134 | "env_evaluation":'11_vs_11_hard_stochastic' # for evaluation of self-play trained agent (like validation set in Supervised Learning) 135 | } 136 | 137 | main(arg_dict) 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /view_match.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Collecting kaggle-environments\n", 13 | " Downloading kaggle_environments-1.3.14-py2.py3-none-any.whl (100 kB)\n", 14 | "\u001b[K |████████████████████████████████| 100 kB 4.0 MB/s ta 0:00:011\n", 15 | "\u001b[?25hRequirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kaggle-environments) (3.2.0)\n", 16 | "Requirement already satisfied, skipping upgrade: six>=1.11.0 in /usr/lib/python3/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (1.11.0)\n", 17 | "Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (20.2.0)\n", 18 | "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (2.0.0)\n", 19 | "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (50.3.2)\n", 20 | "Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (0.17.3)\n", 21 | "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kaggle-environments) (3.3.1)\n", 22 | "Installing collected packages: kaggle-environments\n", 23 | "Successfully installed kaggle-environments-1.3.14\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "# !pip3 install kaggle-environments -U" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "Staring a new environment 59f217e9-9c8a-4b1c-a052-6d20491863b4: with scenario: 11_vs_11_kaggle\n", 43 | "Resetting environment 59f217e9-9c8a-4b1c-a052-6d20491863b4: with scenario: 11_vs_11_kaggle\n" 44 | ] 45 | }, 46 | { 47 | "ename": "KeyboardInterrupt", 48 | "evalue": "", 49 | "output_type": "error", 50 | "traceback": [ 51 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 52 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 53 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"kaggle_simulations/agent/main.py\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"kaggle_simulations/agent/main.py\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"human\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m400\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 54 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, agents)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 230\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfiguration\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunTimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 231\u001b[0;31m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrunner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mact\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 232\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 55 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mact\u001b[0;34m(none_action)\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mact_agent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mact_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 635\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 636\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mact_agent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mact_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 637\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;31m# results is a list of tuples where the first element is an agent action and the second is the agent log\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 56 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mact_agent\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnone_action\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mact\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"observation\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 57 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/agent.py\u001b[0m in \u001b[0;36mact\u001b[0;34m(self, observation)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprint_exc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 58 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/agent.py\u001b[0m in \u001b[0;36mcallable_agent\u001b[0;34m(observation, configuration)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 124\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 59 | "\u001b[0;32m\u001b[0m in \u001b[0;36magent\u001b[0;34m(obs)\u001b[0m\n", 60 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 724\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 61 | "\u001b[0;32m\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, state_dict)\u001b[0m\n", 62 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 723\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 724\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 725\u001b[0;31m self._forward_hooks.values()):\n\u001b[0m\u001b[1;32m 726\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 63 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "from kaggle_environments import make\n", 69 | "env = make(\"football\", debug=True, configuration={\"save_video\": True, \"scenario_name\": \"11_vs_11_kaggle\", \n", 70 | " \"debug\":True,\"running_in_notebook\": True})\n", 71 | "\n", 72 | "\n", 73 | "env.run([\"kaggle_simulations/agent/main.py\", \"kaggle_simulations/agent/main.py\"])\n", 74 | "env.render(mode=\"human\", width=400, height=300)" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.7.0" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 4 99 | } 100 | --------------------------------------------------------------------------------