├── .gitignore
├── LICENSE
├── README.md
├── actor.py
├── algos
└── ppo.py
├── data
└── images
│ ├── system.PNG
│ ├── system2.PNG
│ └── trained_result.png
├── encoders
├── encoder_basic.py
└── encoder_highpass.py
├── evaluator.py
├── kaggle_simulations
└── agent
│ ├── main.py
│ └── model_133997184.tar
├── learner.py
├── models
├── conv1d.py
├── conv1d_larger.py
├── simple_attention.py
├── team_fc.py
└── team_pooling.py
├── requirements.txt
├── rewarders
├── rewarder_basic.py
└── rewarder_highpass.py
├── train.py
└── view_match.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.swp
3 | *.pyc
4 | *.pkl
5 |
6 | logs/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 seungeunrho
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Google Research Football Competition - liveinparis team
2 |
3 | * The exact codes used by the team "liveinparis" at [the kaggle football competition](https://www.kaggle.com/c/google-football)
4 |
5 | * Implementations of self-play RL from scratch with distributed actors
6 |
7 | * Final version of agents ranked [6th/1141](https://www.kaggle.com/c/google-football/leaderboard) (gold prize)
8 |
9 | * You can find all the training details at [here](https://www.kaggle.com/c/google-football/discussion/201376)
10 |
11 |
12 | ## Dependencies
13 | 1. [google-research football](https://github.com/google-research/football)
14 | 2. PyTorch
15 | 3. tensorboardX
16 | 4. kaggle_environments
17 |
18 | ## Usage
19 | ```bash
20 | python3 train.py
21 | # You can find args and hyper-parameters at the "arg_dict" in train.py.
22 | ```
23 |
24 | ## training curves (vs rule base AI)
25 | 
26 | (x-axis : # of episodes)
27 | 1. Orange curve - vs. easy level AI
28 | 2. Blue - vs. medium level AI
29 |
30 | ## learning system
31 |
32 |
33 | Actor proceeds simulation and send rollouts(transition tuples of horizon length 30) to the central learner. Learner updates the agent with provided rollouts. Since we chose on-policy update algorithm, we used a trick to ensure perfect on-policyness(behavior policy and learning policy are equal). Actor periodically stops simulation process when the learner is updating the policy. Actor resumes simulation when it receives the newest model from the learner after training.
34 | We used 1 actor per 1 cpu core. Our final version of agent is trained with 30 cpu cores and 1 gpu for 370 hours (cpu: AMD Ryzen Threadripper 2950X, gpu : RTX 2080). This is equivalent to 450,000 episodes, and 133M times of mini batch updates(single mini batch composed of 32 rollouts, each rollout composed of 30 state transitions).
35 |
36 |
37 |
--------------------------------------------------------------------------------
/actor.py:
--------------------------------------------------------------------------------
1 | import gfootball.env as football_env
2 | import time, pprint, importlib, random, os
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.optim as optim
7 | from torch.distributions import Categorical
8 | import torch.multiprocessing as mp
9 | from os import listdir
10 | from os.path import isfile, join
11 | import numpy as np
12 |
13 | from datetime import datetime, timedelta
14 |
15 |
16 | def state_to_tensor(state_dict, h_in):
17 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0)
18 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0)
19 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0)
20 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0)
21 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0)
22 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0)
23 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0)
24 |
25 | state_dict_tensor = {
26 | "player" : player_state,
27 | "ball" : ball_state,
28 | "left_team" : left_team_state,
29 | "left_closest" : left_closest_state,
30 | "right_team" : right_team_state,
31 | "right_closest" : right_closest_state,
32 | "avail" : avail,
33 | "hidden" : h_in
34 | }
35 | return state_dict_tensor
36 |
37 |
38 | def get_action(a_prob, m_prob):
39 |
40 | a = Categorical(a_prob).sample().item()
41 | m, need_m = 0, 0
42 | prob_selected_a = a_prob[0][0][a].item()
43 | prob_selected_m = 0
44 | if a==0:
45 | real_action = a
46 | prob = prob_selected_a
47 | elif a==1:
48 | m = Categorical(m_prob).sample().item()
49 | need_m = 1
50 | real_action = m + 1
51 | prob_selected_m = m_prob[0][0][m].item()
52 | prob = prob_selected_a* prob_selected_m
53 | else:
54 | real_action = a + 7
55 | prob = prob_selected_a
56 |
57 | assert prob != 0, 'prob 0 ERROR!!!! a : {}, m:{} {}, {}'.format(a,m,prob_selected_a,prob_selected_m)
58 |
59 | return real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m
60 |
61 | def actor(actor_num, center_model, data_queue, signal_queue, summary_queue, arg_dict):
62 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
63 | print("Actor process {} started".format(actor_num))
64 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"])
65 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"])
66 | imported_model = importlib.import_module("models." + arg_dict["model"])
67 |
68 | fe = fe_module.FeatureEncoder()
69 | model = imported_model.Model(arg_dict)
70 | model.load_state_dict(center_model.state_dict())
71 |
72 | env = football_env.create_environment(env_name=arg_dict["env"], representation="raw", stacked=False, logdir='/tmp/football', \
73 | write_goal_dumps=False, write_full_episode_dumps=False, render=False)
74 | n_epi = 0
75 | rollout = []
76 | while True: # episode loop
77 | env.reset()
78 | done = False
79 | steps, score, tot_reward, win = 0, 0, 0, 0
80 | n_epi += 1
81 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float),
82 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float))
83 |
84 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0
85 | obs = env.observation()
86 |
87 | while not done: # step loop
88 | init_t = time.time()
89 |
90 | is_stopped = False
91 | while signal_queue.qsize() > 0:
92 | time.sleep(0.02)
93 | is_stopped = True
94 | if is_stopped:
95 | model.load_state_dict(center_model.state_dict())
96 | wait_t += time.time() - init_t
97 |
98 | h_in = h_out
99 | state_dict = fe.encode(obs[0])
100 | state_dict_tensor = state_to_tensor(state_dict, h_in)
101 |
102 | t1 = time.time()
103 | with torch.no_grad():
104 | a_prob, m_prob, _, h_out = model(state_dict_tensor)
105 | forward_t += time.time()-t1
106 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob)
107 |
108 | prev_obs = obs
109 | obs, rew, done, info = env.step(real_action)
110 | fin_r = rewarder.calc_reward(rew, prev_obs[0], obs[0])
111 | state_prime_dict = fe.encode(obs[0])
112 |
113 | (h1_in, h2_in) = h_in
114 | (h1_out, h2_out) = h_out
115 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy())
116 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy())
117 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m)
118 | rollout.append(transition)
119 | if len(rollout) == arg_dict["rollout_len"]:
120 | data_queue.put(rollout)
121 | rollout = []
122 | model.load_state_dict(center_model.state_dict())
123 |
124 | steps += 1
125 | score += rew
126 | tot_reward += fin_r
127 |
128 | if arg_dict['print_mode']:
129 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward)
130 | loop_t += time.time()-init_t
131 |
132 | if done:
133 | if score > 0:
134 | win = 1
135 | print("score",score,"total reward",tot_reward)
136 | summary_data = (win, score, tot_reward, steps, 0, loop_t/steps, forward_t/steps, wait_t/steps)
137 | summary_queue.put(summary_data)
138 |
139 | def select_opponent(arg_dict):
140 | onlyfiles_lst = [f for f in listdir(arg_dict["log_dir"]) if isfile(join(arg_dict["log_dir"], f))]
141 | model_num_lst = []
142 | for file_name in onlyfiles_lst:
143 | if file_name[:6] == "model_":
144 | model_num = file_name[6:]
145 | model_num = model_num[:-4]
146 | model_num_lst.append(int(model_num))
147 | model_num_lst.sort()
148 |
149 | coin = random.random()
150 | if coin arg_dict["latest_n_model"]:
152 | opp_model_num = random.randint(len(model_num_lst)-arg_dict["latest_n_model"],len(model_num_lst)-1)
153 | else:
154 | opp_model_num = len(model_num_lst)-1
155 | else:
156 | opp_model_num = random.randint(0,len(model_num_lst)-1)
157 |
158 | model_name = "/model_"+str(model_num_lst[opp_model_num])+".tar"
159 | opp_model_path = arg_dict["log_dir"] + model_name
160 | return opp_model_num, opp_model_path
161 |
162 |
163 | def actor_self(actor_num, center_model, data_queue, signal_queue, summary_queue, arg_dict):
164 | print("Actor process {} started".format(actor_num))
165 | cpu_device = torch.device('cpu')
166 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"])
167 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"])
168 | imported_model = importlib.import_module("models." + arg_dict["model"])
169 |
170 | fe = fe_module.FeatureEncoder()
171 | model = imported_model.Model(arg_dict)
172 | model.load_state_dict(center_model.state_dict())
173 | opp_model = imported_model.Model(arg_dict)
174 |
175 | env = football_env.create_environment(env_name=arg_dict["env"], number_of_right_players_agent_controls=1, representation="raw", \
176 | stacked=False, logdir='/tmp/football', write_goal_dumps=False, write_full_episode_dumps=False, \
177 | render=False)
178 |
179 | n_epi = 0
180 | rollout = []
181 | while True: # episode loop
182 | opp_model_num, opp_model_path = select_opponent(arg_dict)
183 | checkpoint = torch.load(opp_model_path, map_location=cpu_device)
184 | opp_model.load_state_dict(checkpoint['model_state_dict'])
185 | print("Current Opponent model Num:{}, Path:{} successfully loaded".format(opp_model_num, opp_model_path))
186 | del checkpoint
187 |
188 | env.reset()
189 | done = False
190 | steps, score, tot_reward, win = 0, 0, 0, 0
191 | n_epi += 1
192 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float),
193 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float))
194 | opp_h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float),
195 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float))
196 |
197 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0
198 | [obs, opp_obs] = env.observation()
199 |
200 | while not done: # step loop
201 | init_t = time.time()
202 | is_stopped = False
203 | while signal_queue.qsize() > 0:
204 | time.sleep(0.02)
205 | is_stopped = True
206 | if is_stopped:
207 | model.load_state_dict(center_model.state_dict())
208 | wait_t += time.time() - init_t
209 |
210 | h_in = h_out
211 | opp_h_in = opp_h_out
212 | state_dict = fe.encode(obs)
213 | state_dict_tensor = state_to_tensor(state_dict, h_in)
214 | opp_state_dict = fe.encode(opp_obs)
215 | opp_state_dict_tensor = state_to_tensor(opp_state_dict, opp_h_in)
216 |
217 | t1 = time.time()
218 | with torch.no_grad():
219 | a_prob, m_prob, _, h_out = model(state_dict_tensor)
220 | opp_a_prob, opp_m_prob, _, opp_h_out = opp_model(opp_state_dict_tensor)
221 | forward_t += time.time()-t1
222 |
223 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob)
224 | opp_real_action, _, _, _, _, _, _ = get_action(opp_a_prob, opp_m_prob)
225 |
226 | prev_obs = obs
227 | [obs, opp_obs], [rew, _], done, info = env.step([real_action, opp_real_action])
228 | fin_r = rewarder.calc_reward(rew, prev_obs, obs)
229 | state_prime_dict = fe.encode(obs)
230 |
231 | (h1_in, h2_in) = h_in
232 | (h1_out, h2_out) = h_out
233 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy())
234 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy())
235 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m)
236 | rollout.append(transition)
237 | if len(rollout) == arg_dict["rollout_len"]:
238 | data_queue.put(rollout)
239 | rollout = []
240 | model.load_state_dict(center_model.state_dict())
241 |
242 | steps += 1
243 | score += rew
244 | tot_reward += fin_r
245 |
246 | if arg_dict['print_mode']:
247 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward)
248 |
249 | loop_t += time.time()-init_t
250 |
251 | if done:
252 | if score > 0:
253 | win = 1
254 | print("score {}, total reward {:.2f}, opp num:{}, opp:{} ".format(score,tot_reward,opp_model_num, opp_model_path))
255 | summary_data = (win, score, tot_reward, steps, str(opp_model_num), loop_t/steps, forward_t/steps, wait_t/steps)
256 | summary_queue.put(summary_data)
257 |
258 |
--------------------------------------------------------------------------------
/algos/ppo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from torch.distributions import Categorical
6 | import torch.multiprocessing as mp
7 | import numpy as np
8 |
9 |
10 | class Algo():
11 | def __init__(self, arg_dict, device=None):
12 | self.gamma = arg_dict["gamma"]
13 | self.K_epoch = arg_dict["k_epoch"]
14 | self.lmbda = arg_dict["lmbda"]
15 | self.eps_clip = arg_dict["eps_clip"]
16 | self.entropy_coef = arg_dict["entropy_coef"]
17 | self.grad_clip = arg_dict["grad_clip"]
18 |
19 | def train(self, model, data):
20 | tot_loss_lst = []
21 | pi_loss_lst = []
22 | entropy_lst = []
23 | move_entropy_lst = []
24 | v_loss_lst = []
25 |
26 | # to calculate fixed advantages before update
27 | data_with_adv = []
28 | for mini_batch in data:
29 | s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch
30 | with torch.no_grad():
31 | pi, pi_move, v, _ = model(s)
32 | pi_prime, pi_m_prime, v_prime, _ = model(s_prime)
33 |
34 | td_target = r + self.gamma * v_prime * done_mask
35 | delta = td_target - v # [horizon * batch_size * 1]
36 | delta = delta.detach().cpu().numpy()
37 |
38 | advantage_lst = []
39 | advantage = np.array([0])
40 | for delta_t in delta[::-1]:
41 | advantage = self.gamma * self.lmbda * advantage + delta_t
42 | advantage_lst.append(advantage)
43 | advantage_lst.reverse()
44 | advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device)
45 |
46 | data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage))
47 |
48 | for i in range(self.K_epoch):
49 | for mini_batch in data_with_adv:
50 | s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch
51 | pi, pi_move, v, _ = model(s)
52 | pi_prime, pi_m_prime, v_prime, _ = model(s_prime)
53 |
54 | pi_a = pi.gather(2,a)
55 | pi_m = pi_move.gather(2,m)
56 | pi_am = pi_a*(1-need_move + need_move*pi_m)
57 | ratio = torch.exp(torch.log(pi_am) - torch.log(prob)) # a/b == exp(log(a)-log(b))
58 |
59 | surr1 = ratio * advantage
60 | surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage
61 | entropy = -torch.log(pi_am)
62 | move_entropy = -need_move*torch.log(pi_m)
63 |
64 | surr_loss = -torch.min(surr1, surr2)
65 | v_loss = F.smooth_l1_loss(v, td_target.detach())
66 | entropy_loss = -1*self.entropy_coef*entropy
67 | loss = surr_loss + v_loss + entropy_loss.mean()
68 | loss = loss.mean()
69 |
70 | model.optimizer.zero_grad()
71 | loss.backward()
72 | nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip)
73 | model.optimizer.step()
74 |
75 | tot_loss_lst.append(loss.item())
76 | pi_loss_lst.append(surr_loss.mean().item())
77 | v_loss_lst.append(v_loss.item())
78 | entropy_lst.append(entropy.mean().item())
79 | n_need_move = torch.sum(need_move).item()
80 | if n_need_move == 0:
81 | move_entropy_lst.append(0)
82 | else:
83 | move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item())
84 | return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst)
85 |
--------------------------------------------------------------------------------
/data/images/system.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/system.PNG
--------------------------------------------------------------------------------
/data/images/system2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/system2.PNG
--------------------------------------------------------------------------------
/data/images/trained_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/data/images/trained_result.png
--------------------------------------------------------------------------------
/encoders/encoder_basic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class FeatureEncoder:
4 | def __init__(self):
5 | self.active = -1
6 | self.player_pos_x, self.player_pos_y = 0, 0
7 |
8 | def get_feature_dims(self):
9 | dims = {
10 | 'player':29,
11 | 'ball':18,
12 | 'left_team':7,
13 | 'left_team_closest':7,
14 | 'right_team':7,
15 | 'right_team_closest':7,
16 | }
17 | return dims
18 |
19 | def encode(self, obs):
20 | player_num = obs['active']
21 |
22 | player_pos_x, player_pos_y = obs['left_team'][player_num]
23 | player_direction = np.array(obs['left_team_direction'][player_num])
24 | player_speed = np.linalg.norm(player_direction)
25 | player_role = obs['left_team_roles'][player_num]
26 | player_role_onehot = self._encode_role_onehot(player_role)
27 | player_tired = obs['left_team_tired_factor'][player_num]
28 | is_dribbling = obs['sticky_actions'][9]
29 | is_sprinting = obs['sticky_actions'][8]
30 |
31 | ball_x, ball_y, ball_z = obs['ball']
32 | ball_x_relative = ball_x - player_pos_x
33 | ball_y_relative = ball_y - player_pos_y
34 | ball_x_speed, ball_y_speed, _ = obs['ball_direction']
35 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative])
36 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed])
37 | ball_owned = 0.0
38 | if obs['ball_owned_team'] == -1:
39 | ball_owned = 0.0
40 | else:
41 | ball_owned = 1.0
42 | ball_owned_by_us = 0.0
43 | if obs['ball_owned_team'] == 0:
44 | ball_owned_by_us = 1.0
45 | elif obs['ball_owned_team'] == 1:
46 | ball_owned_by_us = 0.0
47 | else:
48 | ball_owned_by_us = 0.0
49 |
50 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y)
51 |
52 | if ball_distance > 0.03:
53 | ball_far = 1.0
54 | else:
55 | ball_far = 0.0
56 |
57 | avail = self._get_avail(obs, ball_distance)
58 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100],
59 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting]))
60 |
61 |
62 | ball_state = np.concatenate((np.array(obs['ball']),
63 | np.array(ball_which_zone),
64 | np.array([ball_x_relative, ball_y_relative]),
65 | np.array(obs['ball_direction'])*20,
66 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us])))
67 |
68 |
69 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0)
70 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0)
71 | left_team_relative = obs_left_team
72 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True)
73 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True)
74 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1)
75 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \
76 | left_team_distance*2, left_team_tired), axis=1)
77 | left_closest_idx = np.argmin(left_team_distance)
78 | left_closest_state = left_team_state[left_closest_idx]
79 |
80 |
81 | obs_right_team = np.array(obs['right_team'])
82 | obs_right_team_direction = np.array(obs['right_team_direction'])
83 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True)
84 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True)
85 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1)
86 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \
87 | right_team_distance*2, right_team_tired), axis=1)
88 | right_closest_idx = np.argmin(right_team_distance)
89 | right_closest_state = right_team_state[right_closest_idx]
90 |
91 | state_dict = {"player": player_state,
92 | "ball": ball_state,
93 | "left_team" : left_team_state,
94 | "left_closest" : left_closest_state,
95 | "right_team" : right_team_state,
96 | "right_closest" : right_closest_state,
97 | "avail" : avail}
98 |
99 | return state_dict
100 |
101 | def _get_avail(self, obs, ball_distance):
102 | avail = [1,1,1,1,1,1,1,1,1,1,1,1]
103 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \
104 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
105 |
106 | if obs['ball_owned_team'] == 1: # opponents owning ball
107 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
108 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # Ground ball and far from me
109 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
110 | else: # my team owning ball
111 | avail[SLIDE] = 0
112 |
113 | # Dealing with sticky actions
114 | sticky_actions = obs['sticky_actions']
115 | if sticky_actions[8] == 0: # sprinting
116 | avail[RELEASE_SPRINT] = 0
117 |
118 | if sticky_actions[9] == 1: # dribbling
119 | avail[SLIDE] = 0
120 | else:
121 | avail[RELEASE_DRIBBLE] = 0
122 |
123 | if np.sum(sticky_actions[:8]) == 0:
124 | avail[RELEASE_MOVE] = 0
125 |
126 |
127 | # if too far, no shot
128 | ball_x, ball_y, _ = obs['ball']
129 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y:
130 | avail[SHOT] = 0
131 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27):
132 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0
133 |
134 |
135 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick
136 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
137 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
138 | return np.array(avail)
139 |
140 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick
141 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
142 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
143 | return np.array(avail)
144 |
145 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick
146 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
147 | avail[SHOT] = 1
148 | return np.array(avail)
149 |
150 | return np.array(avail)
151 |
152 | def _encode_ball_which_zone(self, ball_x, ball_y):
153 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0
154 | PENALTY_Y, END_Y = 0.27, 0.42
155 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
156 | return [1.0,0,0,0,0,0]
157 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
158 | return [0,1.0,0,0,0,0]
159 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
160 | return [0,0,1.0,0,0,0]
161 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
162 | return [0,0,0,1.0,0,0]
163 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y):
164 | return [0,0,0,0,1.0,0]
165 | else:
166 | return [0,0,0,0,0,1.0]
167 |
168 |
169 | def _encode_role_onehot(self, role_num):
170 | result = [0,0,0,0,0,0,0,0,0,0]
171 | result[role_num] = 1.0
172 | return np.array(result)
--------------------------------------------------------------------------------
/encoders/encoder_highpass.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class FeatureEncoder:
4 | def __init__(self):
5 | self.active = -1
6 | self.player_pos_x, self.player_pos_y = 0, 0
7 |
8 | def get_feature_dims(self):
9 | dims = {
10 | 'player':29,
11 | 'ball':18,
12 | 'left_team':7,
13 | 'left_team_closest':7,
14 | 'right_team':7,
15 | 'right_team_closest':7,
16 | }
17 | return dims
18 |
19 | def encode(self, obs):
20 | player_num = obs['active']
21 |
22 | player_pos_x, player_pos_y = obs['left_team'][player_num]
23 | player_direction = np.array(obs['left_team_direction'][player_num])
24 | player_speed = np.linalg.norm(player_direction)
25 | player_role = obs['left_team_roles'][player_num]
26 | player_role_onehot = self._encode_role_onehot(player_role)
27 | player_tired = obs['left_team_tired_factor'][player_num]
28 | is_dribbling = obs['sticky_actions'][9]
29 | is_sprinting = obs['sticky_actions'][8]
30 |
31 | ball_x, ball_y, ball_z = obs['ball']
32 | ball_x_relative = ball_x - player_pos_x
33 | ball_y_relative = ball_y - player_pos_y
34 | ball_x_speed, ball_y_speed, _ = obs['ball_direction']
35 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative])
36 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed])
37 | ball_owned = 0.0
38 | if obs['ball_owned_team'] == -1:
39 | ball_owned = 0.0
40 | else:
41 | ball_owned = 1.0
42 | ball_owned_by_us = 0.0
43 | if obs['ball_owned_team'] == 0:
44 | ball_owned_by_us = 1.0
45 | elif obs['ball_owned_team'] == 1:
46 | ball_owned_by_us = 0.0
47 | else:
48 | ball_owned_by_us = 0.0
49 |
50 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y)
51 |
52 | if ball_distance > 0.03:
53 | ball_far = 1.0
54 | else:
55 | ball_far = 0.0
56 |
57 | avail = self._get_avail(obs, ball_distance)
58 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100],
59 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting]))
60 |
61 |
62 | ball_state = np.concatenate((np.array(obs['ball']),
63 | np.array(ball_which_zone),
64 | np.array([ball_x_relative, ball_y_relative]),
65 | np.array(obs['ball_direction'])*20,
66 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us])))
67 |
68 |
69 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0)
70 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0)
71 | left_team_relative = obs_left_team
72 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True)
73 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True)
74 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1)
75 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \
76 | left_team_distance*2, left_team_tired), axis=1)
77 | left_closest_idx = np.argmin(left_team_distance)
78 | left_closest_state = left_team_state[left_closest_idx]
79 |
80 |
81 | obs_right_team = np.array(obs['right_team'])
82 | obs_right_team_direction = np.array(obs['right_team_direction'])
83 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True)
84 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True)
85 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1)
86 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \
87 | right_team_distance*2, right_team_tired), axis=1)
88 | right_closest_idx = np.argmin(right_team_distance)
89 | right_closest_state = right_team_state[right_closest_idx]
90 |
91 | state_dict = {"player": player_state,
92 | "ball": ball_state,
93 | "left_team" : left_team_state,
94 | "left_closest" : left_closest_state,
95 | "right_team" : right_team_state,
96 | "right_closest" : right_closest_state,
97 | "avail" : avail}
98 |
99 | return state_dict
100 |
101 | def _get_avail(self, obs, ball_distance):
102 | avail = [1,1,1,1,1,1,1,1,1,1,1,1]
103 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \
104 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
105 |
106 | ball_x, ball_y, _ = obs['ball']
107 | if obs['ball_owned_team'] == 1: # opponents owning ball
108 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
109 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # Ground ball and far from me
110 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
111 | else: # my team owning ball
112 | avail[SLIDE] = 0
113 | if ball_x > 0.85 and (ball_y < -0.34 or ball_y > 0.34): # when the ball is near the opponent corner
114 | avail[LONG_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0
115 |
116 | # Dealing with sticky actions
117 | sticky_actions = obs['sticky_actions']
118 | if sticky_actions[8] == 0: # sprinting
119 | avail[RELEASE_SPRINT] = 0
120 |
121 | if sticky_actions[9] == 1: # dribbling
122 | avail[SLIDE] = 0
123 | else:
124 | avail[RELEASE_DRIBBLE] = 0
125 |
126 | if np.sum(sticky_actions[:8]) == 0:
127 | avail[RELEASE_MOVE] = 0
128 |
129 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y: # if too far, no shot
130 | avail[SHOT] = 0
131 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27): # In the penalty area, no pass
132 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0
133 |
134 |
135 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick
136 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
137 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
138 | return np.array(avail)
139 |
140 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick
141 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
142 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
143 | return np.array(avail)
144 |
145 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick
146 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
147 | avail[SHOT] = 1
148 | return np.array(avail)
149 |
150 | return np.array(avail)
151 |
152 | def _encode_ball_which_zone(self, ball_x, ball_y):
153 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0
154 | PENALTY_Y, END_Y = 0.27, 0.42
155 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
156 | return [1.0,0,0,0,0,0]
157 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
158 | return [0,1.0,0,0,0,0]
159 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
160 | return [0,0,1.0,0,0,0]
161 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
162 | return [0,0,0,1.0,0,0]
163 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y):
164 | return [0,0,0,0,1.0,0]
165 | else:
166 | return [0,0,0,0,0,1.0]
167 |
168 |
169 | def _encode_role_onehot(self, role_num):
170 | result = [0,0,0,0,0,0,0,0,0,0]
171 | result[role_num] = 1.0
172 | return np.array(result)
--------------------------------------------------------------------------------
/evaluator.py:
--------------------------------------------------------------------------------
1 | import gfootball.env as football_env
2 | import time, pprint, importlib, random
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 | import torch.multiprocessing as mp
10 | from os import listdir
11 | from os.path import isfile, join
12 | from datetime import datetime, timedelta
13 |
14 | def state_to_tensor(state_dict, h_in):
15 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0)
16 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0)
17 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0)
18 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0)
19 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0)
20 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0)
21 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0)
22 |
23 | state_dict_tensor = {
24 | "player" : player_state,
25 | "ball" : ball_state,
26 | "left_team" : left_team_state,
27 | "left_closest" : left_closest_state,
28 | "right_team" : right_team_state,
29 | "right_closest" : right_closest_state,
30 | "avail" : avail,
31 | "hidden" : h_in
32 | }
33 | return state_dict_tensor
34 |
35 | def get_action(a_prob, m_prob):
36 | a = Categorical(a_prob).sample().item()
37 | m, need_m = 0, 0
38 | prob_selected_a = a_prob[0][0][a].item()
39 | prob_selected_m = 0
40 | if a==0:
41 | real_action = a
42 | prob = prob_selected_a
43 | elif a==1:
44 | m = Categorical(m_prob).sample().item()
45 | need_m = 1
46 | real_action = m + 1
47 | prob_selected_m = m_prob[0][0][m].item()
48 | prob = prob_selected_a* prob_selected_m
49 | else:
50 | real_action = a + 7
51 | prob = prob_selected_a
52 |
53 | assert prob != 0, 'prob 0 ERROR!!!! a : {}, m:{} {}, {}'.format(a,m,prob_selected_a,prob_selected_m)
54 |
55 | return real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m
56 |
57 | def evaluator(center_model, signal_queue, summary_queue, arg_dict):
58 | print("Evaluator process started")
59 | fe_module = importlib.import_module("encoders." + arg_dict["encoder"])
60 | rewarder = importlib.import_module("rewarders." + arg_dict["rewarder"])
61 | imported_model = importlib.import_module("models." + arg_dict["model"])
62 |
63 | fe = fe_module.FeatureEncoder()
64 | model = center_model
65 |
66 | env = football_env.create_environment(env_name=arg_dict["env_evaluation"], representation="raw", stacked=False, logdir='/tmp/football', \
67 | write_goal_dumps=False, write_full_episode_dumps=False, render=False)
68 | n_epi = 0
69 | while True: # episode loop
70 | env.reset()
71 | done = False
72 | steps, score, tot_reward, win = 0, 0, 0, 0
73 | n_epi += 1
74 | h_out = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float),
75 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float))
76 |
77 | loop_t, forward_t, wait_t = 0.0, 0.0, 0.0
78 | obs = env.observation()
79 |
80 | while not done: # step loop
81 | init_t = time.time()
82 | is_stopped = False
83 | while signal_queue.qsize() > 0:
84 | time.sleep(0.02)
85 | is_stopped = True
86 | if is_stopped:
87 | #model.load_state_dict(center_model.state_dict())
88 | pass
89 | wait_t += time.time() - init_t
90 |
91 | h_in = h_out
92 | state_dict = fe.encode(obs[0])
93 | state_dict_tensor = state_to_tensor(state_dict, h_in)
94 |
95 | t1 = time.time()
96 | with torch.no_grad():
97 | a_prob, m_prob, _, h_out = model(state_dict_tensor)
98 | forward_t += time.time()-t1
99 | real_action, a, m, need_m, prob, prob_selected_a, prob_selected_m = get_action(a_prob, m_prob)
100 |
101 | prev_obs = obs
102 | obs, rew, done, info = env.step(real_action)
103 | fin_r = rewarder.calc_reward(rew, prev_obs[0], obs[0])
104 | state_prime_dict = fe.encode(obs[0])
105 |
106 | (h1_in, h2_in) = h_in
107 | (h1_out, h2_out) = h_out
108 | state_dict["hidden"] = (h1_in.numpy(), h2_in.numpy())
109 | state_prime_dict["hidden"] = (h1_out.numpy(), h2_out.numpy())
110 | transition = (state_dict, a, m, fin_r, state_prime_dict, prob, done, need_m)
111 |
112 | steps += 1
113 | score += rew
114 | tot_reward += fin_r
115 |
116 | if arg_dict['print_mode']:
117 | print_status(steps,a,m,prob_selected_a,prob_selected_m,prev_obs,obs,fin_r,tot_reward)
118 |
119 | loop_t += time.time()-init_t
120 |
121 | if done:
122 | if score > 0:
123 | win = 1
124 | print("score",score,"total reward",tot_reward)
125 | summary_data = (win, score, tot_reward, steps, arg_dict['env_evaluation'], loop_t/steps, forward_t/steps, wait_t/steps)
126 | summary_queue.put(summary_data)
127 |
128 |
--------------------------------------------------------------------------------
/kaggle_simulations/agent/main.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | from torch.distributions import Categorical
6 | import numpy as np
7 | import time, os
8 |
9 | class PPO(nn.Module):
10 | def __init__(self, arg_dict, device=None):
11 | super(PPO, self).__init__()
12 | self.device=None
13 | if device:
14 | self.device = device
15 |
16 | self.arg_dict = arg_dict
17 |
18 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64)
19 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64)
20 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48)
21 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48)
22 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48)
23 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48)
24 |
25 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1)
26 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1)
27 | self.fc_left2 = nn.Linear(36*10,96)
28 | self.fc_right2 = nn.Linear(36*11,96)
29 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"])
30 |
31 | self.norm_player = nn.LayerNorm(64)
32 | self.norm_ball = nn.LayerNorm(64)
33 | self.norm_left = nn.LayerNorm(48)
34 | self.norm_left2 = nn.LayerNorm(96)
35 | self.norm_left_closest = nn.LayerNorm(48)
36 | self.norm_right = nn.LayerNorm(48)
37 | self.norm_right2 = nn.LayerNorm(96)
38 | self.norm_right_closest = nn.LayerNorm(48)
39 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
40 |
41 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
42 |
43 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164)
44 | self.fc_pi_a2 = nn.Linear(164, 12)
45 | self.norm_pi_a1 = nn.LayerNorm(164)
46 |
47 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164)
48 | self.fc_pi_m2 = nn.Linear(164, 8)
49 | self.norm_pi_m1 = nn.LayerNorm(164)
50 |
51 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164)
52 | self.norm_v1 = nn.LayerNorm(164)
53 | self.fc_v2 = nn.Linear(164, 1, bias=False)
54 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
55 |
56 | self.gamma = arg_dict["gamma"]
57 | self.K_epoch = arg_dict["k_epoch"]
58 | self.lmbda = arg_dict["lmbda"]
59 | self.eps_clip = 0.2
60 | self.entropy_coef = arg_dict["entropy_coef"]
61 | self.move_entropy_coef = arg_dict["move_entropy_coef"]
62 |
63 | def forward(self, state_dict):
64 | player_state = state_dict["player"]
65 | ball_state = state_dict["ball"]
66 | left_team_state = state_dict["left_team"]
67 | left_closest_state = state_dict["left_closest"]
68 | right_team_state = state_dict["right_team"]
69 | right_closest_state = state_dict["right_closest"]
70 | avail = state_dict["avail"]
71 |
72 | player_embed = self.norm_player(self.fc_player(player_state))
73 | ball_embed = self.norm_ball(self.fc_ball(ball_state))
74 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim
75 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state))
76 | right_team_embed = self.norm_right(self.fc_right(right_team_state))
77 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state))
78 |
79 | [horizon, batch_size, n_player, dim] = left_team_embed.size()
80 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n
81 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2
82 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2
83 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed)))
84 |
85 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n
86 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2
87 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1)
88 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed)))
89 |
90 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2)
91 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
92 | h_in = state_dict["hidden"]
93 | out, h_out = self.lstm(cat, h_in)
94 |
95 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
96 | a_out = self.fc_pi_a2(a_out)
97 | logit = a_out + (avail-1)*1e7
98 | prob = F.softmax(logit, dim=2)
99 |
100 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
101 | prob_m = self.fc_pi_m2(prob_m)
102 | prob_m = F.softmax(prob_m, dim=2)
103 |
104 | v = F.relu(self.norm_v1(self.fc_v1(out)))
105 | v = self.fc_v2(v)
106 |
107 | return prob, prob_m, v, h_out
108 |
109 |
110 |
111 | class FeatureEncoder:
112 | def __init__(self):
113 | self.active = -1
114 | self.player_pos_x, self.player_pos_y = 0, 0
115 |
116 | def get_feature_dims(self):
117 | dims = {
118 | 'player':29,
119 | 'ball':18,
120 | 'left_team':7,
121 | 'left_team_closest':7,
122 | 'right_team':7,
123 | 'right_team_closest':7,
124 | }
125 | return dims
126 |
127 | def encode(self, obs):
128 | player_num = obs['active']
129 |
130 | player_pos_x, player_pos_y = obs['left_team'][player_num]
131 | player_direction = np.array(obs['left_team_direction'][player_num])
132 | player_speed = np.linalg.norm(player_direction)
133 | player_role = obs['left_team_roles'][player_num]
134 | player_role_onehot = self._encode_role_onehot(player_role)
135 | player_tired = obs['left_team_tired_factor'][player_num]
136 | is_dribbling = obs['sticky_actions'][9]
137 | is_sprinting = obs['sticky_actions'][8]
138 |
139 | ball_x, ball_y, ball_z = obs['ball']
140 | ball_x_relative = ball_x - player_pos_x
141 | ball_y_relative = ball_y - player_pos_y
142 | ball_x_speed, ball_y_speed, _ = obs['ball_direction']
143 | ball_distance = np.linalg.norm([ball_x_relative, ball_y_relative])
144 | ball_speed = np.linalg.norm([ball_x_speed, ball_y_speed])
145 | ball_owned = 0.0
146 | if obs['ball_owned_team'] == -1:
147 | ball_owned = 0.0
148 | else:
149 | ball_owned = 1.0
150 | ball_owned_by_us = 0.0
151 | if obs['ball_owned_team'] == 0:
152 | ball_owned_by_us = 1.0
153 | elif obs['ball_owned_team'] == 1:
154 | ball_owned_by_us = 0.0
155 | else:
156 | ball_owned_by_us = 0.0
157 |
158 | ball_which_zone = self._encode_ball_which_zone(ball_x, ball_y)
159 |
160 | if ball_distance > 0.03:
161 | ball_far = 1.0
162 | else:
163 | ball_far = 0.0
164 |
165 | avail = self._get_avail(obs, ball_distance)
166 | player_state = np.concatenate((avail[2:], obs['left_team'][player_num], player_direction*100, [player_speed*100],
167 | player_role_onehot, [ball_far, player_tired, is_dribbling, is_sprinting]))
168 |
169 |
170 | ball_state = np.concatenate((np.array(obs['ball']),
171 | np.array(ball_which_zone),
172 | np.array([ball_x_relative, ball_y_relative]),
173 | np.array(obs['ball_direction'])*20,
174 | np.array([ball_speed*20, ball_distance, ball_owned, ball_owned_by_us])))
175 |
176 |
177 | obs_left_team = np.delete(obs['left_team'], player_num, axis=0)
178 | obs_left_team_direction = np.delete(obs['left_team_direction'], player_num, axis=0)
179 | left_team_relative = obs_left_team
180 | left_team_distance = np.linalg.norm(left_team_relative - obs['left_team'][player_num], axis=1, keepdims=True)
181 | left_team_speed = np.linalg.norm(obs_left_team_direction, axis=1, keepdims=True)
182 | left_team_tired = np.delete(obs['left_team_tired_factor'], player_num, axis=0).reshape(-1,1)
183 | left_team_state = np.concatenate((left_team_relative*2, obs_left_team_direction*100, left_team_speed*100, \
184 | left_team_distance*2, left_team_tired), axis=1)
185 | left_closest_idx = np.argmin(left_team_distance)
186 | left_closest_state = left_team_state[left_closest_idx]
187 |
188 |
189 | obs_right_team = np.array(obs['right_team'])
190 | obs_right_team_direction = np.array(obs['right_team_direction'])
191 | right_team_distance = np.linalg.norm(obs_right_team - obs['left_team'][player_num], axis=1, keepdims=True)
192 | right_team_speed = np.linalg.norm(obs_right_team_direction, axis=1, keepdims=True)
193 | right_team_tired = np.array(obs['right_team_tired_factor']).reshape(-1,1)
194 | right_team_state = np.concatenate((obs_right_team*2, obs_right_team_direction*100, right_team_speed*100, \
195 | right_team_distance*2, right_team_tired), axis=1)
196 | right_closest_idx = np.argmin(right_team_distance)
197 | right_closest_state = right_team_state[right_closest_idx]
198 |
199 |
200 |
201 | state_dict = {"player": player_state,
202 | "ball": ball_state,
203 | "left_team" : left_team_state,
204 | "left_closest" : left_closest_state,
205 | "right_team" : right_team_state,
206 | "right_closest" : right_closest_state,
207 | "avail" : avail}
208 |
209 | return state_dict
210 |
211 | def _get_avail(self, obs, ball_distance):
212 | avail = [1,1,1,1,1,1,1,1,1,1,1,1]
213 | NO_OP, MOVE, LONG_PASS, HIGH_PASS, SHORT_PASS, SHOT, SPRINT, RELEASE_MOVE, \
214 | RELEASE_SPRINT, SLIDE, DRIBBLE, RELEASE_DRIBBLE = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
215 |
216 | ball_x, ball_y, _ = obs['ball']
217 | # When opponents owning ball ...
218 | if obs['ball_owned_team'] == 1: # opponents owning ball
219 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
220 | elif obs['ball_owned_team'] == -1 and ball_distance > 0.03 and obs['game_mode'] == 0: # GR ball and far from me
221 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0, 0
222 | else:
223 | avail[SLIDE] = 0
224 | if ball_x > 0.85 and (ball_y < -0.34 or ball_y > 0.34):
225 | avail[LONG_PASS], avail[SHORT_PASS], avail[SHOT], avail[DRIBBLE] = 0, 0, 0, 0
226 |
227 |
228 | # Dealing with sticky actions
229 | sticky_actions = obs['sticky_actions']
230 | if sticky_actions[8] == 0: # sprinting
231 | avail[RELEASE_SPRINT] = 0
232 |
233 | if sticky_actions[9] == 1: # dribbling
234 | avail[SLIDE] = 0
235 | else:
236 | avail[RELEASE_DRIBBLE] = 0
237 |
238 | if np.sum(sticky_actions[:8]) == 0:
239 | avail[RELEASE_MOVE] = 0
240 |
241 |
242 | # if too far, no shot
243 |
244 | if ball_x < 0.64 or ball_y < -0.27 or 0.27 < ball_y:
245 | avail[SHOT] = 0
246 | elif (0.64 <= ball_x and ball_x<=1.0) and (-0.27<=ball_y and ball_y<=0.27):
247 | avail[HIGH_PASS], avail[LONG_PASS] = 0, 0
248 |
249 |
250 | if obs['game_mode'] == 2 and ball_x < -0.7: # Our GoalKick
251 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
252 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
253 | return np.array(avail)
254 |
255 | elif obs['game_mode'] == 4 and ball_x > 0.9: # Our CornerKick
256 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
257 | avail[LONG_PASS], avail[HIGH_PASS], avail[SHORT_PASS] = 1, 1, 1
258 | return np.array(avail)
259 |
260 | elif obs['game_mode'] == 6 and ball_x > 0.6: # Our PenaltyKick
261 | avail = [1,0,0,0,0,0,0,0,0,0,0,0]
262 | avail[SHOT] = 1
263 | return np.array(avail)
264 |
265 | return np.array(avail)
266 |
267 | def _encode_ball_which_zone(self, ball_x, ball_y):
268 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0
269 | PENALTY_Y, END_Y = 0.27, 0.42
270 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
271 | return [1.0,0,0,0,0,0]
272 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
273 | return [0,1.0,0,0,0,0]
274 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
275 | return [0,0,1.0,0,0,0]
276 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
277 | return [0,0,0,1.0,0,0]
278 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y):
279 | return [0,0,0,0,1.0,0]
280 | else:
281 | return [0,0,0,0,0,1.0]
282 |
283 |
284 | def _encode_role_onehot(self, role_num):
285 | result = [0,0,0,0,0,0,0,0,0,0]
286 | result[role_num] = 1.0
287 | return np.array(result)
288 |
289 |
290 |
291 | def state_to_tensor(state_dict, h_in):
292 | player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0)
293 | ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0)
294 | left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0)
295 | left_closest_state = torch.from_numpy(state_dict["left_closest"]).float().unsqueeze(0).unsqueeze(0)
296 | right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0)
297 | right_closest_state = torch.from_numpy(state_dict["right_closest"]).float().unsqueeze(0).unsqueeze(0)
298 | avail = torch.from_numpy(state_dict["avail"]).float().unsqueeze(0).unsqueeze(0)
299 |
300 | state_dict_tensor = {
301 | "player" : player_state,
302 | "ball" : ball_state,
303 | "left_team" : left_team_state,
304 | "left_closest" : left_closest_state,
305 | "right_team" : right_team_state,
306 | "right_closest" : right_closest_state,
307 | "avail" : avail,
308 | "hidden" : h_in
309 | }
310 | return state_dict_tensor
311 |
312 |
313 |
314 | fe = FeatureEncoder()
315 |
316 | arg_dict = {
317 | "lstm_size" : 256,
318 | "learning_rate" : 0.0002,
319 | "gamma" : 0.992,
320 | "lmbda" : 0.96,
321 | "entropy_coef" : 0.0,
322 | "move_entropy_coef" : 0.0,
323 | "trained_model_path" : "kaggle_simulations/agent/model_133997184.tar",
324 | "k_epoch" : 3,
325 |
326 | "arg_max" : True
327 |
328 | }
329 | arg_dict["feature_dims"] = fe.get_feature_dims()
330 | model = PPO(arg_dict)
331 | cpu_device = torch.device('cpu')
332 | checkpoint = torch.load(arg_dict["trained_model_path"], map_location=cpu_device)
333 | model.load_state_dict(checkpoint['model_state_dict'])
334 |
335 |
336 | hidden = (torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float),
337 | torch.zeros([1, 1, arg_dict["lstm_size"]], dtype=torch.float))
338 | steps = 0
339 |
340 |
341 | def agent(obs):
342 | global model
343 | global fe
344 | global hidden
345 | global steps
346 |
347 | steps +=1
348 |
349 | obs = obs['players_raw'][0]
350 | state_dict = fe.encode(obs)
351 | state_dict_tensor = state_to_tensor(state_dict, hidden)
352 | with torch.no_grad():
353 | a_prob, m_prob, _, hidden = model(state_dict_tensor)
354 |
355 | if arg_dict["arg_max"]:
356 | a = torch.argmax(a_prob).item()
357 | else:
358 | a = Categorical(a_prob).sample().item()
359 |
360 | real_action = 0
361 | if a==0:
362 | real_action = int(a)
363 | elif a==1:
364 | if arg_dict["arg_max"]:
365 | m = torch.argmax(m_prob).item()
366 | else:
367 | m = Categorical(m_prob).sample().item()
368 | real_action = int(m + 1)
369 | else:
370 | real_action = int(a + 7)
371 |
372 | return [real_action]
373 |
--------------------------------------------------------------------------------
/kaggle_simulations/agent/model_133997184.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seungeunrho/football-paris/20d9ec464edce9153839b66a60dcb02874ea15ee/kaggle_simulations/agent/model_133997184.tar
--------------------------------------------------------------------------------
/learner.py:
--------------------------------------------------------------------------------
1 | import gfootball.env as football_env
2 | import time, pprint, importlib
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 | import torch.multiprocessing as mp
10 | from tensorboardX import SummaryWriter
11 |
12 | def write_summary(writer, arg_dict, summary_queue, n_game, loss_lst, pi_loss_lst, v_loss_lst, \
13 | entropy_lst, move_entropy_lst, optimization_step, self_play_board, win_evaluation, score_evaluation):
14 | win, score, tot_reward, game_len = [], [], [], []
15 | loop_t, forward_t, wait_t = [], [], []
16 |
17 | for i in range(arg_dict["summary_game_window"]):
18 | game_data = summary_queue.get()
19 | a,b,c,d,opp_num,t1,t2,t3 = game_data
20 | if arg_dict["env"] == "11_vs_11_kaggle":
21 | if opp_num in self_play_board:
22 | self_play_board[opp_num].append(a)
23 | else:
24 | self_play_board[opp_num] = [a]
25 |
26 | if 'env_evaluation' in arg_dict and opp_num==arg_dict['env_evaluation']:
27 | win_evaluation.append(a)
28 | score_evaluation.append(b)
29 | else:
30 | win.append(a)
31 | score.append(b)
32 | tot_reward.append(c)
33 | game_len.append(d)
34 | loop_t.append(t1)
35 | forward_t.append(t2)
36 | wait_t.append(t3)
37 |
38 | writer.add_scalar('game/win_rate', float(np.mean(win)), n_game)
39 | writer.add_scalar('game/score', float(np.mean(score)), n_game)
40 | writer.add_scalar('game/reward', float(np.mean(tot_reward)), n_game)
41 | writer.add_scalar('game/game_len', float(np.mean(game_len)), n_game)
42 | writer.add_scalar('train/step', float(optimization_step), n_game)
43 | writer.add_scalar('time/loop', float(np.mean(loop_t)), n_game)
44 | writer.add_scalar('time/forward', float(np.mean(forward_t)), n_game)
45 | writer.add_scalar('time/wait', float(np.mean(wait_t)), n_game)
46 | writer.add_scalar('train/loss', np.mean(loss_lst), n_game)
47 | writer.add_scalar('train/pi_loss', np.mean(pi_loss_lst), n_game)
48 | writer.add_scalar('train/v_loss', np.mean(v_loss_lst), n_game)
49 | writer.add_scalar('train/entropy', np.mean(entropy_lst), n_game)
50 | writer.add_scalar('train/move_entropy', np.mean(move_entropy_lst), n_game)
51 |
52 | mini_window = max(1, int(arg_dict['summary_game_window']/3))
53 | if len(win_evaluation)>=mini_window:
54 | writer.add_scalar('game/win_rate_evaluation', float(np.mean(win_evaluation)), n_game)
55 | writer.add_scalar('game/score_evaluation', float(np.mean(score_evaluation)), n_game)
56 | win_evaluation, score_evaluation = [], []
57 |
58 | for opp_num in self_play_board:
59 | if len(self_play_board[opp_num]) >= mini_window:
60 | label = 'self_play/'+opp_num
61 | writer.add_scalar(label, np.mean(self_play_board[opp_num][:mini_window]), n_game)
62 | self_play_board[opp_num] = self_play_board[opp_num][mini_window:]
63 |
64 | return win_evaluation, score_evaluation
65 |
66 | def save_model(model, arg_dict, optimization_step, last_saved_step):
67 | if optimization_step >= last_saved_step + arg_dict["model_save_interval"]:
68 | model_dict = {
69 | 'optimization_step': optimization_step,
70 | 'model_state_dict': model.state_dict(),
71 | 'optimizer_state_dict': model.optimizer.state_dict(),
72 | }
73 | path = arg_dict["log_dir"]+"/model_"+str(optimization_step)+".tar"
74 | torch.save(model_dict, path)
75 | print("Model saved :", path)
76 | return optimization_step
77 | else:
78 | return last_saved_step
79 |
80 | def get_data(queue, arg_dict, model):
81 | data = []
82 | for i in range(arg_dict["buffer_size"]):
83 | mini_batch_np = []
84 | for j in range(arg_dict["batch_size"]):
85 | rollout = queue.get()
86 | mini_batch_np.append(rollout)
87 | mini_batch = model.make_batch(mini_batch_np)
88 | data.append(mini_batch)
89 | return data
90 |
91 | def learner(center_model, queue, signal_queue, summary_queue, arg_dict):
92 | print("Learner process started")
93 | imported_model = importlib.import_module("models." + arg_dict["model"])
94 | imported_algo = importlib.import_module("algos." + arg_dict["algorithm"])
95 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
96 | model = imported_model.Model(arg_dict, device)
97 | model.load_state_dict(center_model.state_dict())
98 | model.optimizer.load_state_dict(center_model.optimizer.state_dict())
99 | algo = imported_algo.Algo(arg_dict)
100 |
101 | for state in model.optimizer.state.values():
102 | for k, v in state.items():
103 | if isinstance(v, torch.Tensor):
104 | state[k] = v.cuda()
105 | model.to(device)
106 |
107 | writer = SummaryWriter(logdir=arg_dict["log_dir"])
108 | optimization_step = 0
109 | if "optimization_step" in arg_dict:
110 | optimization_step = arg_dict["optimization_step"]
111 | last_saved_step = optimization_step
112 | n_game = 0
113 | loss_lst, pi_loss_lst, v_loss_lst, entropy_lst, move_entropy_lst = [], [], [], [], []
114 | self_play_board = {}
115 |
116 | win_evaluation, score_evaluation = [], []
117 |
118 | while True:
119 | if queue.qsize() > arg_dict["batch_size"]*arg_dict["buffer_size"]:
120 | last_saved_step = save_model(model, arg_dict, optimization_step, last_saved_step)
121 |
122 | signal_queue.put(1)
123 | data = get_data(queue, arg_dict, model)
124 | loss, pi_loss, v_loss, entropy, move_entropy = algo.train(model, data)
125 | optimization_step += arg_dict["batch_size"]*arg_dict["buffer_size"]*arg_dict["k_epoch"]
126 | print("step :", optimization_step, "loss", loss, "data_q", queue.qsize(), "summary_q", summary_queue.qsize())
127 |
128 | loss_lst.append(loss)
129 | pi_loss_lst.append(pi_loss)
130 | v_loss_lst.append(v_loss)
131 | entropy_lst.append(entropy)
132 | move_entropy_lst.append(move_entropy)
133 | center_model.load_state_dict(model.state_dict())
134 |
135 | if queue.qsize() > arg_dict["batch_size"]*arg_dict["buffer_size"]:
136 | print("warning. data remaining. queue size : ", queue.qsize())
137 |
138 | if summary_queue.qsize() > arg_dict["summary_game_window"]:
139 | win_evaluation, score_evaluation = write_summary(writer, arg_dict, summary_queue, n_game, loss_lst, pi_loss_lst,
140 | v_loss_lst, entropy_lst, move_entropy_lst, optimization_step,
141 | self_play_board, win_evaluation, score_evaluation)
142 | loss_lst, pi_loss_lst, v_loss_lst, entropy_lst, move_entropy_lst = [], [], [], [], []
143 | n_game += arg_dict["summary_game_window"]
144 |
145 | _ = signal_queue.get()
146 |
147 | else:
148 | time.sleep(0.1)
149 |
--------------------------------------------------------------------------------
/models/conv1d.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pprint
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 |
10 | class Model(nn.Module):
11 | def __init__(self, arg_dict, device=None):
12 | super(Model, self).__init__()
13 | self.device=None
14 | if device:
15 | self.device = device
16 |
17 | self.arg_dict = arg_dict
18 |
19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64)
20 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64)
21 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48)
22 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48)
23 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48)
24 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48)
25 |
26 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1)
27 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1)
28 | self.fc_left2 = nn.Linear(36*10,96)
29 | self.fc_right2 = nn.Linear(36*11,96)
30 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"])
31 |
32 | self.norm_player = nn.LayerNorm(64)
33 | self.norm_ball = nn.LayerNorm(64)
34 | self.norm_left = nn.LayerNorm(48)
35 | self.norm_left2 = nn.LayerNorm(96)
36 | self.norm_left_closest = nn.LayerNorm(48)
37 | self.norm_right = nn.LayerNorm(48)
38 | self.norm_right2 = nn.LayerNorm(96)
39 | self.norm_right_closest = nn.LayerNorm(48)
40 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
41 |
42 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
43 |
44 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164)
45 | self.fc_pi_a2 = nn.Linear(164, 12)
46 | self.norm_pi_a1 = nn.LayerNorm(164)
47 |
48 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164)
49 | self.fc_pi_m2 = nn.Linear(164, 8)
50 | self.norm_pi_m1 = nn.LayerNorm(164)
51 |
52 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164)
53 | self.norm_v1 = nn.LayerNorm(164)
54 | self.fc_v2 = nn.Linear(164, 1, bias=False)
55 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
56 |
57 | def forward(self, state_dict):
58 | player_state = state_dict["player"]
59 | ball_state = state_dict["ball"]
60 | left_team_state = state_dict["left_team"]
61 | left_closest_state = state_dict["left_closest"]
62 | right_team_state = state_dict["right_team"]
63 | right_closest_state = state_dict["right_closest"]
64 | avail = state_dict["avail"]
65 |
66 | player_embed = self.norm_player(self.fc_player(player_state))
67 | ball_embed = self.norm_ball(self.fc_ball(ball_state))
68 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim
69 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state))
70 | right_team_embed = self.norm_right(self.fc_right(right_team_state))
71 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state))
72 |
73 | [horizon, batch_size, n_player, dim] = left_team_embed.size()
74 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n
75 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2
76 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2
77 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed)))
78 |
79 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n
80 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2
81 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1)
82 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed)))
83 |
84 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2)
85 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
86 | h_in = state_dict["hidden"]
87 | out, h_out = self.lstm(cat, h_in)
88 |
89 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
90 | a_out = self.fc_pi_a2(a_out)
91 | logit = a_out + (avail-1)*1e7
92 | prob = F.softmax(logit, dim=2)
93 |
94 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
95 | prob_m = self.fc_pi_m2(prob_m)
96 | prob_m = F.softmax(prob_m, dim=2)
97 |
98 | v = F.relu(self.norm_v1(self.fc_v1(out)))
99 | v = self.fc_v2(v)
100 |
101 | return prob, prob_m, v, h_out
102 |
103 | def make_batch(self, data):
104 | # data = [tr1, tr2, ..., tr10] * batch_size
105 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[]
106 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \
107 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[]
108 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], []
109 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], []
110 |
111 | for rollout in data:
112 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], []
113 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \
114 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], []
115 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], []
116 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], []
117 |
118 | for transition in rollout:
119 | s, a, m, r, s_prime, prob, done, need_move = transition
120 |
121 | s_player_lst.append(s["player"])
122 | s_ball_lst.append(s["ball"])
123 | s_left_lst.append(s["left_team"])
124 | s_left_closest_lst.append(s["left_closest"])
125 | s_right_lst.append(s["right_team"])
126 | s_right_closest_lst.append(s["right_closest"])
127 | avail_lst.append(s["avail"])
128 | h1_in, h2_in = s["hidden"]
129 | h1_in_lst.append(h1_in)
130 | h2_in_lst.append(h2_in)
131 |
132 | s_player_prime_lst.append(s_prime["player"])
133 | s_ball_prime_lst.append(s_prime["ball"])
134 | s_left_prime_lst.append(s_prime["left_team"])
135 | s_left_closest_prime_lst.append(s_prime["left_closest"])
136 | s_right_prime_lst.append(s_prime["right_team"])
137 | s_right_closest_prime_lst.append(s_prime["right_closest"])
138 | avail_prime_lst.append(s_prime["avail"])
139 | h1_out, h2_out = s_prime["hidden"]
140 | h1_out_lst.append(h1_out)
141 | h2_out_lst.append(h2_out)
142 |
143 | a_lst.append([a])
144 | m_lst.append([m])
145 | r_lst.append([r])
146 | prob_lst.append([prob])
147 | done_mask = 0 if done else 1
148 | done_lst.append([done_mask])
149 | need_move_lst.append([need_move]),
150 |
151 | s_player_batch.append(s_player_lst)
152 | s_ball_batch.append(s_ball_lst)
153 | s_left_batch.append(s_left_lst)
154 | s_left_closest_batch.append(s_left_closest_lst)
155 | s_right_batch.append(s_right_lst)
156 | s_right_closest_batch.append(s_right_closest_lst)
157 | avail_batch.append(avail_lst)
158 | h1_in_batch.append(h1_in_lst[0])
159 | h2_in_batch.append(h2_in_lst[0])
160 |
161 | s_player_prime_batch.append(s_player_prime_lst)
162 | s_ball_prime_batch.append(s_ball_prime_lst)
163 | s_left_prime_batch.append(s_left_prime_lst)
164 | s_left_closest_prime_batch.append(s_left_closest_prime_lst)
165 | s_right_prime_batch.append(s_right_prime_lst)
166 | s_right_closest_prime_batch.append(s_right_closest_prime_lst)
167 | avail_prime_batch.append(avail_prime_lst)
168 | h1_out_batch.append(h1_out_lst[0])
169 | h2_out_batch.append(h2_out_lst[0])
170 |
171 | a_batch.append(a_lst)
172 | m_batch.append(m_lst)
173 | r_batch.append(r_lst)
174 | prob_batch.append(prob_lst)
175 | done_batch.append(done_lst)
176 | need_move_batch.append(need_move_lst)
177 |
178 |
179 | s = {
180 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2),
181 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2),
182 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
183 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
184 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
185 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
186 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2),
187 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
188 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
189 | }
190 |
191 | s_prime = {
192 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
193 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
194 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
195 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
196 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
197 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
198 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
199 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
200 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
201 | }
202 |
203 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \
204 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \
205 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
206 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
207 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
208 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2)
209 |
210 | return s, a, m, r, s_prime, done_mask, prob, need_move
211 |
--------------------------------------------------------------------------------
/models/conv1d_larger.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pprint
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 |
10 | class Model(nn.Module):
11 | def __init__(self, arg_dict, device=None):
12 | super(Model, self).__init__()
13 | self.device=None
14 | if device:
15 | self.device = device
16 |
17 | self.arg_dict = arg_dict
18 |
19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64)
20 | self.fc_player2 = nn.Linear(64,64)
21 |
22 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64)
23 | self.fc_ball2 = nn.Linear(64,64)
24 |
25 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],48)
26 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],48)
27 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],48)
28 | self.fc_left_closest2 = nn.Linear(48,48)
29 |
30 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],48)
31 | self.fc_right_closest2 = nn.Linear(48,48)
32 |
33 |
34 | self.conv1d_left = nn.Conv1d(48, 36, 1, stride=1)
35 | self.conv1d_left2 = nn.Conv1d(36, 36, 1, stride=1)
36 | self.conv1d_right = nn.Conv1d(48, 36, 1, stride=1)
37 | self.conv1d_right2 = nn.Conv1d(36, 36, 1, stride=1)
38 | self.fc_left2 = nn.Linear(36*10,96)
39 | self.fc_right2 = nn.Linear(36*11,96)
40 | self.fc_cat = nn.Linear(96+96+64+64+48+48,arg_dict["lstm_size"])
41 |
42 | self.norm_player = nn.LayerNorm(64)
43 | self.norm_player2 = nn.LayerNorm(64)
44 | self.norm_ball = nn.LayerNorm(64)
45 | self.norm_ball2 = nn.LayerNorm(64)
46 | self.norm_left = nn.LayerNorm(48)
47 | self.norm_left2 = nn.LayerNorm(96)
48 | self.norm_left_closest = nn.LayerNorm(48)
49 | self.norm_left_closest2 = nn.LayerNorm(48)
50 | self.norm_right = nn.LayerNorm(48)
51 | self.norm_right2 = nn.LayerNorm(96)
52 | self.norm_right_closest = nn.LayerNorm(48)
53 | self.norm_right_closest2 = nn.LayerNorm(48)
54 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
55 |
56 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
57 |
58 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164)
59 | self.fc_pi_a2 = nn.Linear(164, 12)
60 | self.norm_pi_a1 = nn.LayerNorm(164)
61 |
62 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164)
63 | self.fc_pi_m2 = nn.Linear(164, 8)
64 | self.norm_pi_m1 = nn.LayerNorm(164)
65 |
66 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164)
67 | self.norm_v1 = nn.LayerNorm(164)
68 | self.fc_v2 = nn.Linear(164, 1, bias=False)
69 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
70 |
71 | def forward(self, state_dict):
72 | player_state = state_dict["player"]
73 | ball_state = state_dict["ball"]
74 | left_team_state = state_dict["left_team"]
75 | left_closest_state = state_dict["left_closest"]
76 | right_team_state = state_dict["right_team"]
77 | right_closest_state = state_dict["right_closest"]
78 | avail = state_dict["avail"]
79 |
80 | player_embed = F.relu(self.norm_player(self.fc_player(player_state)))
81 | player_embed = self.norm_player2(self.fc_player2(player_embed))
82 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state)))
83 | ball_embed = self.norm_ball2(self.fc_ball2(ball_embed))
84 |
85 | left_team_embed = self.norm_left(self.fc_left(left_team_state)) # horizon, batch, n, dim
86 | left_closest_embed = F.relu(self.norm_left_closest(self.fc_left_closest(left_closest_state)))
87 | left_closest_embed = self.norm_left_closest2(self.fc_left_closest2(left_closest_embed))
88 |
89 | right_team_embed = self.norm_right(self.fc_right(right_team_state))
90 | right_closest_embed = F.relu(self.norm_right_closest(self.fc_right_closest(right_closest_state)))
91 | right_closest_embed = self.norm_right_closest2(self.fc_right_closest2(right_closest_embed))
92 |
93 | [horizon, batch_size, n_player, dim] = left_team_embed.size()
94 | left_team_embed = left_team_embed.view(horizon*batch_size, n_player, dim).permute(0,2,1) # horizon * batch, dim1, n
95 | left_team_embed = F.relu(self.conv1d_left(left_team_embed)) # horizon * batch, n, dim2
96 | left_team_embed = F.relu(self.conv1d_left2(left_team_embed)).permute(0,2,1) # horizon * batch, n, dim2
97 | left_team_embed = left_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1) # horizon, batch, n * dim2
98 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed)))
99 |
100 | right_team_embed = right_team_embed.view(horizon*batch_size, n_player+1, dim).permute(0,2,1) # horizon * batch, dim1, n
101 | right_team_embed = F.relu(self.conv1d_right(right_team_embed)) # horizon * batch, n * dim2
102 | right_team_embed = F.relu(self.conv1d_right2(right_team_embed)).permute(0,2,1) # horizon * batch, n * dim2
103 | right_team_embed = right_team_embed.reshape(horizon*batch_size, -1).view(horizon,batch_size,-1)
104 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed)))
105 |
106 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2)
107 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
108 | h_in = state_dict["hidden"]
109 | out, h_out = self.lstm(cat, h_in)
110 |
111 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
112 | a_out = self.fc_pi_a2(a_out)
113 | logit = a_out + (avail-1)*1e7
114 | prob = F.softmax(logit, dim=2)
115 |
116 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
117 | prob_m = self.fc_pi_m2(prob_m)
118 | prob_m = F.softmax(prob_m, dim=2)
119 |
120 | v = F.relu(self.norm_v1(self.fc_v1(out)))
121 | v = self.fc_v2(v)
122 |
123 | return prob, prob_m, v, h_out
124 |
125 | def make_batch(self, data):
126 | # data = [tr1, tr2, ..., tr10] * batch_size
127 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[]
128 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \
129 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[]
130 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], []
131 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], []
132 |
133 | for rollout in data:
134 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], []
135 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \
136 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], []
137 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], []
138 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], []
139 |
140 | for transition in rollout:
141 | s, a, m, r, s_prime, prob, done, need_move = transition
142 |
143 | s_player_lst.append(s["player"])
144 | s_ball_lst.append(s["ball"])
145 | s_left_lst.append(s["left_team"])
146 | s_left_closest_lst.append(s["left_closest"])
147 | s_right_lst.append(s["right_team"])
148 | s_right_closest_lst.append(s["right_closest"])
149 | avail_lst.append(s["avail"])
150 | h1_in, h2_in = s["hidden"]
151 | h1_in_lst.append(h1_in)
152 | h2_in_lst.append(h2_in)
153 |
154 | s_player_prime_lst.append(s_prime["player"])
155 | s_ball_prime_lst.append(s_prime["ball"])
156 | s_left_prime_lst.append(s_prime["left_team"])
157 | s_left_closest_prime_lst.append(s_prime["left_closest"])
158 | s_right_prime_lst.append(s_prime["right_team"])
159 | s_right_closest_prime_lst.append(s_prime["right_closest"])
160 | avail_prime_lst.append(s_prime["avail"])
161 | h1_out, h2_out = s_prime["hidden"]
162 | h1_out_lst.append(h1_out)
163 | h2_out_lst.append(h2_out)
164 |
165 | a_lst.append([a])
166 | m_lst.append([m])
167 | r_lst.append([r])
168 | prob_lst.append([prob])
169 | done_mask = 0 if done else 1
170 | done_lst.append([done_mask])
171 | need_move_lst.append([need_move]),
172 |
173 | s_player_batch.append(s_player_lst)
174 | s_ball_batch.append(s_ball_lst)
175 | s_left_batch.append(s_left_lst)
176 | s_left_closest_batch.append(s_left_closest_lst)
177 | s_right_batch.append(s_right_lst)
178 | s_right_closest_batch.append(s_right_closest_lst)
179 | avail_batch.append(avail_lst)
180 | h1_in_batch.append(h1_in_lst[0])
181 | h2_in_batch.append(h2_in_lst[0])
182 |
183 | s_player_prime_batch.append(s_player_prime_lst)
184 | s_ball_prime_batch.append(s_ball_prime_lst)
185 | s_left_prime_batch.append(s_left_prime_lst)
186 | s_left_closest_prime_batch.append(s_left_closest_prime_lst)
187 | s_right_prime_batch.append(s_right_prime_lst)
188 | s_right_closest_prime_batch.append(s_right_closest_prime_lst)
189 | avail_prime_batch.append(avail_prime_lst)
190 | h1_out_batch.append(h1_out_lst[0])
191 | h2_out_batch.append(h2_out_lst[0])
192 |
193 | a_batch.append(a_lst)
194 | m_batch.append(m_lst)
195 | r_batch.append(r_lst)
196 | prob_batch.append(prob_lst)
197 | done_batch.append(done_lst)
198 | need_move_batch.append(need_move_lst)
199 |
200 |
201 | s = {
202 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2),
203 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2),
204 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
205 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
206 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
207 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
208 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2),
209 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
210 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
211 | }
212 |
213 | s_prime = {
214 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
215 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
216 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
217 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
218 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
219 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
220 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
221 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
222 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
223 | }
224 |
225 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \
226 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \
227 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
228 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
229 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
230 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2)
231 |
232 | return s, a, m, r, s_prime, done_mask, prob, need_move
233 |
234 |
235 |
236 |
--------------------------------------------------------------------------------
/models/simple_attention.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pprint
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 |
10 | class Model(nn.Module):
11 | def __init__(self, arg_dict, device=None):
12 | super(Model, self).__init__()
13 | self.device=None
14 | if device:
15 | self.device = device
16 |
17 | self.arg_dict = arg_dict
18 |
19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],128)
20 | self.fc_player2 = nn.Linear(128,128)
21 |
22 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],96)
23 | self.fc_ball2 = nn.Linear(96,96)
24 |
25 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],128)
26 | self.fc_left2 = nn.Linear(128,128)
27 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],128)
28 | self.fc_right2 = nn.Linear(128,128)
29 |
30 | self.fc_player_left_q = nn.Linear(128,64)
31 | self.fc_left_k = nn.Linear(128,64)
32 | self.fc_player_right_q = nn.Linear(128,64)
33 | self.fc_right_k = nn.Linear(128,64)
34 |
35 |
36 | self.fc_cat = nn.Linear(128+96+128+128,arg_dict["lstm_size"])
37 |
38 | self.norm_player = nn.LayerNorm(128)
39 | self.norm_player2 = nn.LayerNorm(128)
40 | self.norm_ball = nn.LayerNorm(96)
41 | self.norm_ball2 = nn.LayerNorm(96)
42 | self.norm_left = nn.LayerNorm(128)
43 | self.norm_left2 = nn.LayerNorm(128)
44 | self.norm_right = nn.LayerNorm(128)
45 | self.norm_right2 = nn.LayerNorm(128)
46 |
47 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
48 |
49 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
50 |
51 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164)
52 | self.fc_pi_a2 = nn.Linear(164, 12)
53 | self.norm_pi_a1 = nn.LayerNorm(164)
54 |
55 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164)
56 | self.fc_pi_m2 = nn.Linear(164, 8)
57 | self.norm_pi_m1 = nn.LayerNorm(164)
58 |
59 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164)
60 | self.norm_v1 = nn.LayerNorm(164)
61 | self.fc_v2 = nn.Linear(164, 1, bias=False)
62 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
63 |
64 | def forward(self, state_dict):
65 | player_state = state_dict["player"]
66 | ball_state = state_dict["ball"]
67 | left_team_state = state_dict["left_team"]
68 | left_closest_state = state_dict["left_closest"]
69 | right_team_state = state_dict["right_team"]
70 | right_closest_state = state_dict["right_closest"]
71 | avail = state_dict["avail"]
72 |
73 | player_embed = F.relu(self.norm_player(self.fc_player(player_state)))
74 | player_embed = self.norm_player2(self.fc_player2(player_embed))
75 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state)))
76 | ball_embed = self.norm_ball2(self.fc_ball2(ball_embed))
77 |
78 | left_team_embed = F.relu(self.norm_left(self.fc_left(left_team_state))) # horizon, batch, n, dim
79 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) # horizon, batch, n, dim
80 |
81 | right_team_embed = F.relu(self.norm_right(self.fc_right(right_team_state)))
82 | right_team_embed = F.relu(self.norm_right2(self.fc_right2(right_team_embed)))
83 |
84 | player_left_q = self.fc_player_left_q(player_embed) # horizon, batch, dim
85 | left_team_k = self.fc_left_k(left_team_embed) # horizon, batch, n, dim
86 | [horizon, batch_size, n_player, f_dim] = left_team_k.size()
87 | player_left_q = player_left_q.view(horizon*batch_size, 1, f_dim) # horizon*batch, 1, dim1
88 | left_team_k = left_team_k.view(horizon*batch_size, n_player, f_dim).permute(0,2,1) # horizon*batch, dim1, n
89 | attention = F.softmax(torch.bmm(player_left_q, left_team_k)/8, dim=2) # horizon*batch, 1 , n
90 | attention = attention.view(horizon, batch_size, -1).unsqueeze(3) # horizon, batch, n, 1
91 | left_team = left_team_embed*attention # horizon, batch, n, dim
92 | left_team = left_team.permute(0,1,3,2)
93 | left_team = torch.sum(left_team, axis=3)
94 |
95 | player_right_q = self.fc_player_right_q(player_embed) # horizon, batch, dim
96 | right_team_k = self.fc_right_k(right_team_embed) # horizon, batch, n, dim
97 | [horizon, batch_size, n_player, f_dim] = right_team_k.size()
98 | player_right_q = player_right_q.view(horizon*batch_size, 1, f_dim) # horizon*batch, 1, dim1
99 | right_team_k = right_team_k.view(horizon*batch_size, n_player, f_dim).permute(0,2,1) # horizon*batch, dim1, n
100 | attention = F.softmax(torch.bmm(player_right_q, right_team_k)/8, dim=2) # horizon*batch, 1 , n
101 | attention = attention.view(horizon, batch_size, -1).unsqueeze(3) # horizon, batch, n, 1
102 | right_team = right_team_embed*attention # horizon, batch, n, dim
103 | right_team = right_team.permute(0,1,3,2)
104 | right_team = torch.sum(right_team, axis=3)
105 |
106 |
107 | cat = torch.cat([player_embed, ball_embed, left_team, right_team], 2)
108 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
109 | h_in = state_dict["hidden"]
110 | out, h_out = self.lstm(cat, h_in)
111 |
112 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
113 | a_out = self.fc_pi_a2(a_out)
114 | logit = a_out + (avail-1)*1e7
115 | prob = F.softmax(logit, dim=2)
116 |
117 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
118 | prob_m = self.fc_pi_m2(prob_m)
119 | prob_m = F.softmax(prob_m, dim=2)
120 |
121 | v = F.relu(self.norm_v1(self.fc_v1(out)))
122 | v = self.fc_v2(v)
123 |
124 | return prob, prob_m, v, h_out
125 |
126 | def make_batch(self, data):
127 | # data = [tr1, tr2, ..., tr10] * batch_size
128 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[]
129 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \
130 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[]
131 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], []
132 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], []
133 |
134 | for rollout in data:
135 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], []
136 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \
137 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], []
138 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], []
139 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], []
140 |
141 | for transition in rollout:
142 | s, a, m, r, s_prime, prob, done, need_move = transition
143 |
144 | s_player_lst.append(s["player"])
145 | s_ball_lst.append(s["ball"])
146 | s_left_lst.append(s["left_team"])
147 | s_left_closest_lst.append(s["left_closest"])
148 | s_right_lst.append(s["right_team"])
149 | s_right_closest_lst.append(s["right_closest"])
150 | avail_lst.append(s["avail"])
151 | h1_in, h2_in = s["hidden"]
152 | h1_in_lst.append(h1_in)
153 | h2_in_lst.append(h2_in)
154 |
155 | s_player_prime_lst.append(s_prime["player"])
156 | s_ball_prime_lst.append(s_prime["ball"])
157 | s_left_prime_lst.append(s_prime["left_team"])
158 | s_left_closest_prime_lst.append(s_prime["left_closest"])
159 | s_right_prime_lst.append(s_prime["right_team"])
160 | s_right_closest_prime_lst.append(s_prime["right_closest"])
161 | avail_prime_lst.append(s_prime["avail"])
162 | h1_out, h2_out = s_prime["hidden"]
163 | h1_out_lst.append(h1_out)
164 | h2_out_lst.append(h2_out)
165 |
166 | a_lst.append([a])
167 | m_lst.append([m])
168 | r_lst.append([r])
169 | prob_lst.append([prob])
170 | done_mask = 0 if done else 1
171 | done_lst.append([done_mask])
172 | need_move_lst.append([need_move]),
173 |
174 | s_player_batch.append(s_player_lst)
175 | s_ball_batch.append(s_ball_lst)
176 | s_left_batch.append(s_left_lst)
177 | s_left_closest_batch.append(s_left_closest_lst)
178 | s_right_batch.append(s_right_lst)
179 | s_right_closest_batch.append(s_right_closest_lst)
180 | avail_batch.append(avail_lst)
181 | h1_in_batch.append(h1_in_lst[0])
182 | h2_in_batch.append(h2_in_lst[0])
183 |
184 | s_player_prime_batch.append(s_player_prime_lst)
185 | s_ball_prime_batch.append(s_ball_prime_lst)
186 | s_left_prime_batch.append(s_left_prime_lst)
187 | s_left_closest_prime_batch.append(s_left_closest_prime_lst)
188 | s_right_prime_batch.append(s_right_prime_lst)
189 | s_right_closest_prime_batch.append(s_right_closest_prime_lst)
190 | avail_prime_batch.append(avail_prime_lst)
191 | h1_out_batch.append(h1_out_lst[0])
192 | h2_out_batch.append(h2_out_lst[0])
193 |
194 | a_batch.append(a_lst)
195 | m_batch.append(m_lst)
196 | r_batch.append(r_lst)
197 | prob_batch.append(prob_lst)
198 | done_batch.append(done_lst)
199 | need_move_batch.append(need_move_lst)
200 |
201 |
202 | s = {
203 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2),
204 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2),
205 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
206 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
207 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
208 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
209 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2),
210 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
211 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
212 | }
213 |
214 | s_prime = {
215 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
216 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
217 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
218 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
219 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
220 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
221 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
222 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
223 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
224 | }
225 |
226 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \
227 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \
228 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
229 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
230 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
231 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2)
232 |
233 | return s, a, m, r, s_prime, done_mask, prob, need_move
234 |
235 |
236 |
--------------------------------------------------------------------------------
/models/team_fc.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pprint
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 |
10 | class Model(nn.Module):
11 | def __init__(self, arg_dict, device=None):
12 | super(Model, self).__init__()
13 | self.device=None
14 | if device:
15 | self.device = device
16 |
17 | self.arg_dict = arg_dict
18 |
19 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64)
20 | self.norm_player = nn.LayerNorm(64)
21 | self.fc_player2 = nn.Linear(64,64)
22 | self.norm_player2 = nn.LayerNorm(64)
23 |
24 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64)
25 | self.norm_ball = nn.LayerNorm(64)
26 | self.fc_ball2 = nn.Linear(64,64)
27 | self.norm_ball2 = nn.LayerNorm(64)
28 |
29 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],64)
30 | self.norm_left = nn.LayerNorm(64)
31 | self.fc_left2 = nn.Linear(64,48)
32 | self.norm_left2 = nn.LayerNorm(48)
33 | self.fc_left_tot = nn.Linear(480, 96)
34 | self.norm_left_tot = nn.LayerNorm(96)
35 |
36 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],64)
37 | self.norm_right = nn.LayerNorm(64)
38 | self.fc_right2 = nn.Linear(64,48)
39 | self.norm_right2 = nn.LayerNorm(48)
40 | self.fc_right_tot = nn.Linear(48*11, 96)
41 | self.norm_right_tot = nn.LayerNorm(96)
42 |
43 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],64)
44 | self.norm_left_closest = nn.LayerNorm(64)
45 | self.fc_left_closest2 = nn.Linear(64,64)
46 | self.norm_left_closest2 = nn.LayerNorm(64)
47 |
48 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],64)
49 | self.norm_right_closest = nn.LayerNorm(64)
50 | self.fc_right_closest2 = nn.Linear(64,64)
51 | self.norm_right_closest2 = nn.LayerNorm(64)
52 |
53 | self.fc_cat = nn.Linear(96+96+64+64+64+64,arg_dict["lstm_size"])
54 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
55 |
56 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
57 |
58 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 164)
59 | self.fc_pi_a2 = nn.Linear(164, 12)
60 | self.norm_pi_a1 = nn.LayerNorm(164)
61 |
62 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 164)
63 | self.fc_pi_m2 = nn.Linear(164, 8)
64 | self.norm_pi_m1 = nn.LayerNorm(164)
65 |
66 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 164)
67 | self.norm_v1 = nn.LayerNorm(164)
68 | self.fc_v2 = nn.Linear(164, 1, bias=False)
69 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
70 |
71 | def forward(self, state_dict):
72 | player_state = state_dict["player"]
73 | ball_state = state_dict["ball"]
74 | left_team_state = state_dict["left_team"]
75 | left_closest_state = state_dict["left_closest"]
76 | right_team_state = state_dict["right_team"]
77 | right_closest_state = state_dict["right_closest"]
78 | avail = state_dict["avail"]
79 |
80 | player_embed = F.relu(self.norm_player(self.fc_player(player_state)))
81 | player_embed = F.relu(self.norm_player2(self.fc_player2(player_embed)))
82 | ball_embed = F.relu(self.norm_ball(self.fc_ball(ball_state)))
83 | ball_embed = F.relu(self.norm_ball2(self.fc_ball2(ball_embed)))
84 |
85 | left_team_embed = F.relu(self.norm_left(self.fc_left(left_team_state))) # horizon, batch, n, dim
86 | left_team_embed = F.relu(self.norm_left2(self.fc_left2(left_team_embed))) # horizon, batch, n, dim
87 | right_team_embed = self.norm_right(self.fc_right(right_team_state))
88 | right_team_embed = self.norm_right2(self.fc_right2(right_team_embed))
89 |
90 | [horizon, batch_size, n_player, dim] = left_team_embed.size()
91 | left_team_embed = left_team_embed.view(horizon, batch_size, -1)
92 | left_team_embed = F.relu(self.norm_left_tot(self.fc_left_tot(left_team_embed)))
93 | right_team_embed = right_team_embed.view(horizon, batch_size, -1)
94 | right_team_embed = F.relu(self.norm_right_tot(self.fc_right_tot(right_team_embed)))
95 |
96 |
97 |
98 | left_closest_embed = F.relu(self.norm_left_closest(self.fc_left_closest(left_closest_state)))
99 | left_closest_embed = F.relu(self.norm_left_closest2(self.fc_left_closest2(left_closest_embed)))
100 | right_closest_embed = F.relu(self.norm_right_closest(self.fc_right_closest(right_closest_state)))
101 | right_closest_embed = F.relu(self.norm_right_closest2(self.fc_right_closest2(right_closest_embed)))
102 |
103 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2)
104 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
105 | h_in = state_dict["hidden"]
106 | out, h_out = self.lstm(cat, h_in)
107 |
108 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
109 | a_out = self.fc_pi_a2(a_out)
110 | logit = a_out + (avail-1)*1e7
111 | prob = F.softmax(logit, dim=2)
112 |
113 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
114 | prob_m = self.fc_pi_m2(prob_m)
115 | prob_m = F.softmax(prob_m, dim=2)
116 |
117 | v = F.relu(self.norm_v1(self.fc_v1(out)))
118 | v = self.fc_v2(v)
119 |
120 | return prob, prob_m, v, h_out
121 |
122 | def make_batch(self, data):
123 | # data = [tr1, tr2, ..., tr10] * batch_size
124 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[]
125 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \
126 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[]
127 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], []
128 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], []
129 |
130 | for rollout in data:
131 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], []
132 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \
133 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], []
134 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], []
135 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], []
136 |
137 | for transition in rollout:
138 | s, a, m, r, s_prime, prob, done, need_move = transition
139 |
140 | s_player_lst.append(s["player"])
141 | s_ball_lst.append(s["ball"])
142 | s_left_lst.append(s["left_team"])
143 | s_left_closest_lst.append(s["left_closest"])
144 | s_right_lst.append(s["right_team"])
145 | s_right_closest_lst.append(s["right_closest"])
146 | avail_lst.append(s["avail"])
147 | h1_in, h2_in = s["hidden"]
148 | h1_in_lst.append(h1_in)
149 | h2_in_lst.append(h2_in)
150 |
151 | s_player_prime_lst.append(s_prime["player"])
152 | s_ball_prime_lst.append(s_prime["ball"])
153 | s_left_prime_lst.append(s_prime["left_team"])
154 | s_left_closest_prime_lst.append(s_prime["left_closest"])
155 | s_right_prime_lst.append(s_prime["right_team"])
156 | s_right_closest_prime_lst.append(s_prime["right_closest"])
157 | avail_prime_lst.append(s_prime["avail"])
158 | h1_out, h2_out = s_prime["hidden"]
159 | h1_out_lst.append(h1_out)
160 | h2_out_lst.append(h2_out)
161 |
162 | a_lst.append([a])
163 | m_lst.append([m])
164 | r_lst.append([r])
165 | prob_lst.append([prob])
166 | done_mask = 0 if done else 1
167 | done_lst.append([done_mask])
168 | need_move_lst.append([need_move]),
169 |
170 | s_player_batch.append(s_player_lst)
171 | s_ball_batch.append(s_ball_lst)
172 | s_left_batch.append(s_left_lst)
173 | s_left_closest_batch.append(s_left_closest_lst)
174 | s_right_batch.append(s_right_lst)
175 | s_right_closest_batch.append(s_right_closest_lst)
176 | avail_batch.append(avail_lst)
177 | h1_in_batch.append(h1_in_lst[0])
178 | h2_in_batch.append(h2_in_lst[0])
179 |
180 | s_player_prime_batch.append(s_player_prime_lst)
181 | s_ball_prime_batch.append(s_ball_prime_lst)
182 | s_left_prime_batch.append(s_left_prime_lst)
183 | s_left_closest_prime_batch.append(s_left_closest_prime_lst)
184 | s_right_prime_batch.append(s_right_prime_lst)
185 | s_right_closest_prime_batch.append(s_right_closest_prime_lst)
186 | avail_prime_batch.append(avail_prime_lst)
187 | h1_out_batch.append(h1_out_lst[0])
188 | h2_out_batch.append(h2_out_lst[0])
189 |
190 | a_batch.append(a_lst)
191 | m_batch.append(m_lst)
192 | r_batch.append(r_lst)
193 | prob_batch.append(prob_lst)
194 | done_batch.append(done_lst)
195 | need_move_batch.append(need_move_lst)
196 |
197 |
198 | s = {
199 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2),
200 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2),
201 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
202 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
203 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
204 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
205 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2),
206 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
207 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
208 | }
209 |
210 | s_prime = {
211 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
212 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
213 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
214 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
215 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
216 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
217 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
218 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
219 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
220 | }
221 |
222 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \
223 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \
224 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
225 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
226 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
227 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2)
228 |
229 | return s, a, m, r, s_prime, done_mask, prob, need_move
230 |
231 |
232 |
--------------------------------------------------------------------------------
/models/team_pooling.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pprint
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 |
10 | class Model(nn.Module):
11 | def __init__(self, arg_dict, device=None):
12 | super(Model, self).__init__()
13 | if device:
14 | self.device = device
15 |
16 | self.fc_player = nn.Linear(arg_dict["feature_dims"]["player"],64)
17 | self.fc_ball = nn.Linear(arg_dict["feature_dims"]["ball"],64)
18 | self.fc_left = nn.Linear(arg_dict["feature_dims"]["left_team"],64)
19 | self.fc_right = nn.Linear(arg_dict["feature_dims"]["right_team"],64)
20 | self.fc_left_closest = nn.Linear(arg_dict["feature_dims"]["left_team_closest"],32)
21 | self.fc_right_closest = nn.Linear(arg_dict["feature_dims"]["right_team_closest"],32)
22 | self.fc_cat = nn.Linear(256+64,arg_dict["lstm_size"])
23 | self.norm_player = nn.LayerNorm(64)
24 | self.norm_ball = nn.LayerNorm(64)
25 | self.norm_left = nn.LayerNorm(64)
26 | self.norm_left_closest = nn.LayerNorm(32)
27 | self.norm_right = nn.LayerNorm(64)
28 | self.norm_right_closest = nn.LayerNorm(32)
29 | self.norm_cat = nn.LayerNorm(arg_dict["lstm_size"])
30 |
31 | self.lstm = nn.LSTM(arg_dict["lstm_size"],arg_dict["lstm_size"])
32 |
33 | self.fc_pi_a1 = nn.Linear(arg_dict["lstm_size"], 128)
34 | self.fc_pi_a2 = nn.Linear(128, 12)
35 | self.norm_pi_a1 = nn.LayerNorm(128)
36 |
37 | self.fc_pi_m1 = nn.Linear(arg_dict["lstm_size"], 128)
38 | self.fc_pi_m2 = nn.Linear(128, 8)
39 | self.norm_pi_m1 = nn.LayerNorm(128)
40 |
41 | self.fc_v1 = nn.Linear(arg_dict["lstm_size"], 128)
42 | self.norm_v1 = nn.LayerNorm(128)
43 | self.fc_v2 = nn.Linear(128, 1, bias=False)
44 | self.pool = nn.AdaptiveAvgPool2d((1,None))
45 | self.optimizer = optim.Adam(self.parameters(), lr=arg_dict["learning_rate"])
46 |
47 |
48 | def forward(self, state_dict):
49 | player_state = state_dict["player"]
50 | ball_state = state_dict["ball"]
51 | left_team_state = state_dict["left_team"]
52 | left_closest_state = state_dict["left_closest"]
53 | right_team_state = state_dict["right_team"]
54 | right_closest_state = state_dict["right_closest"]
55 | avail = state_dict["avail"]
56 |
57 | player_embed = self.norm_player(self.fc_player(player_state))
58 | ball_embed = self.norm_ball(self.fc_ball(ball_state))
59 | left_team_embed = self.norm_left(self.fc_left(left_team_state))
60 | left_closest_embed = self.norm_left_closest(self.fc_left_closest(left_closest_state))
61 | right_team_embed = self.norm_right(self.fc_right(right_team_state))
62 | right_closest_embed = self.norm_right_closest(self.fc_right_closest(right_closest_state))
63 |
64 | left_team_embed = self.pool(left_team_embed).squeeze(2)
65 | right_team_embed = self.pool(right_team_embed).squeeze(2)
66 |
67 | cat = torch.cat([player_embed, ball_embed, left_team_embed, right_team_embed, left_closest_embed, right_closest_embed], 2)
68 | cat = F.relu(self.norm_cat(self.fc_cat(cat)))
69 | h_in = state_dict["hidden"]
70 | out, h_out = self.lstm(cat, h_in)
71 |
72 | a_out = F.relu(self.norm_pi_a1(self.fc_pi_a1(out)))
73 | a_out = self.fc_pi_a2(a_out)
74 | logit = a_out + (avail-1)*1e8
75 | prob = F.softmax(logit, dim=2)
76 |
77 | prob_m = F.relu(self.norm_pi_m1(self.fc_pi_m1(out)))
78 | prob_m = self.fc_pi_m2(prob_m)
79 | prob_m = F.softmax(prob_m, dim=2)
80 |
81 | v = F.relu(self.norm_v1(self.fc_v1(out)))
82 | v = self.fc_v2(v)
83 |
84 | return prob, prob_m, v, h_out
85 |
86 | def make_batch(self, data):
87 | # data = [tr1, tr2, ..., tr10] * batch_size
88 | s_player_batch, s_ball_batch, s_left_batch, s_left_closest_batch, s_right_batch, s_right_closest_batch, avail_batch = [],[],[],[],[],[],[]
89 | s_player_prime_batch, s_ball_prime_batch, s_left_prime_batch, s_left_closest_prime_batch, \
90 | s_right_prime_batch, s_right_closest_prime_batch, avail_prime_batch = [],[],[],[],[],[],[]
91 | h1_in_batch, h2_in_batch, h1_out_batch, h2_out_batch = [], [], [], []
92 | a_batch, m_batch, r_batch, prob_batch, done_batch, need_move_batch = [], [], [], [], [], []
93 |
94 | for rollout in data:
95 | s_player_lst, s_ball_lst, s_left_lst, s_left_closest_lst, s_right_lst, s_right_closest_lst, avail_lst = [], [], [], [], [], [], []
96 | s_player_prime_lst, s_ball_prime_lst, s_left_prime_lst, s_left_closest_prime_lst, \
97 | s_right_prime_lst, s_right_closest_prime_lst, avail_prime_lst = [], [], [], [], [], [], []
98 | h1_in_lst, h2_in_lst, h1_out_lst, h2_out_lst = [], [], [], []
99 | a_lst, m_lst, r_lst, prob_lst, done_lst, need_move_lst = [], [], [], [], [], []
100 |
101 | for transition in rollout:
102 | s, a, m, r, s_prime, prob, done, need_move = transition
103 |
104 | s_player_lst.append(s["player"])
105 | s_ball_lst.append(s["ball"])
106 | s_left_lst.append(s["left_team"])
107 | s_left_closest_lst.append(s["left_closest"])
108 | s_right_lst.append(s["right_team"])
109 | s_right_closest_lst.append(s["right_closest"])
110 | avail_lst.append(s["avail"])
111 | h1_in, h2_in = s["hidden"]
112 | h1_in_lst.append(h1_in)
113 | h2_in_lst.append(h2_in)
114 |
115 | s_player_prime_lst.append(s_prime["player"])
116 | s_ball_prime_lst.append(s_prime["ball"])
117 | s_left_prime_lst.append(s_prime["left_team"])
118 | s_left_closest_prime_lst.append(s_prime["left_closest"])
119 | s_right_prime_lst.append(s_prime["right_team"])
120 | s_right_closest_prime_lst.append(s_prime["right_closest"])
121 | avail_prime_lst.append(s_prime["avail"])
122 | h1_out, h2_out = s_prime["hidden"]
123 | h1_out_lst.append(h1_out)
124 | h2_out_lst.append(h2_out)
125 |
126 | a_lst.append([a])
127 | m_lst.append([m])
128 | r_lst.append([r])
129 | prob_lst.append([prob])
130 | done_mask = 0 if done else 1
131 | done_lst.append([done_mask])
132 | need_move_lst.append([need_move]),
133 |
134 | s_player_batch.append(s_player_lst)
135 | s_ball_batch.append(s_ball_lst)
136 | s_left_batch.append(s_left_lst)
137 | s_left_closest_batch.append(s_left_closest_lst)
138 | s_right_batch.append(s_right_lst)
139 | s_right_closest_batch.append(s_right_closest_lst)
140 | avail_batch.append(avail_lst)
141 | h1_in_batch.append(h1_in_lst[0])
142 | h2_in_batch.append(h2_in_lst[0])
143 |
144 | s_player_prime_batch.append(s_player_prime_lst)
145 | s_ball_prime_batch.append(s_ball_prime_lst)
146 | s_left_prime_batch.append(s_left_prime_lst)
147 | s_left_closest_prime_batch.append(s_left_closest_prime_lst)
148 | s_right_prime_batch.append(s_right_prime_lst)
149 | s_right_closest_prime_batch.append(s_right_closest_prime_lst)
150 | avail_prime_batch.append(avail_prime_lst)
151 | h1_out_batch.append(h1_out_lst[0])
152 | h2_out_batch.append(h2_out_lst[0])
153 |
154 | a_batch.append(a_lst)
155 | m_batch.append(m_lst)
156 | r_batch.append(r_lst)
157 | prob_batch.append(prob_lst)
158 | done_batch.append(done_lst)
159 | need_move_batch.append(need_move_lst)
160 |
161 | s = {
162 | "player": torch.tensor(s_player_batch, dtype=torch.float, device=self.device).permute(1,0,2),
163 | "ball": torch.tensor(s_ball_batch, dtype=torch.float, device=self.device).permute(1,0,2),
164 | "left_team": torch.tensor(s_left_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
165 | "left_closest": torch.tensor(s_left_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
166 | "right_team": torch.tensor(s_right_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
167 | "right_closest": torch.tensor(s_right_closest_batch, dtype=torch.float, device=self.device).permute(1,0,2),
168 | "avail": torch.tensor(avail_batch, dtype=torch.float, device=self.device).permute(1,0,2),
169 | "hidden" : (torch.tensor(h1_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
170 | torch.tensor(h2_in_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
171 | }
172 |
173 | s_prime = {
174 | "player": torch.tensor(s_player_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
175 | "ball": torch.tensor(s_ball_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
176 | "left_team": torch.tensor(s_left_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
177 | "left_closest": torch.tensor(s_left_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
178 | "right_team": torch.tensor(s_right_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2,3),
179 | "right_closest": torch.tensor(s_right_closest_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
180 | "avail": torch.tensor(avail_prime_batch, dtype=torch.float, device=self.device).permute(1,0,2),
181 | "hidden" : (torch.tensor(h1_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2),
182 | torch.tensor(h2_out_batch, dtype=torch.float, device=self.device).squeeze(1).permute(1,0,2))
183 | }
184 |
185 | a,m,r,done_mask,prob,need_move = torch.tensor(a_batch, device=self.device).permute(1,0,2), \
186 | torch.tensor(m_batch, device=self.device).permute(1,0,2), \
187 | torch.tensor(r_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
188 | torch.tensor(done_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
189 | torch.tensor(prob_batch, dtype=torch.float, device=self.device).permute(1,0,2), \
190 | torch.tensor(need_move_batch, dtype=torch.float, device=self.device).permute(1,0,2)
191 |
192 |
193 | return s, a, m, r, s_prime, done_mask, prob, need_move
194 |
195 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | tensorboardX
3 | matplotlib
4 | kaggle-environments
5 | visdom==0.1.8.9
6 |
--------------------------------------------------------------------------------
/rewarders/rewarder_basic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def calc_reward(rew, prev_obs, obs):
4 | ball_x, ball_y, ball_z = obs['ball']
5 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0
6 | PENALTY_Y, END_Y = 0.27, 0.42
7 |
8 | ball_position_r = 0.0
9 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
10 | ball_position_r = -2.0
11 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
12 | ball_position_r = -1.0
13 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
14 | ball_position_r = 0.0
15 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
16 | ball_position_r = 2.0
17 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y):
18 | ball_position_r = 1.0
19 | else:
20 | ball_position_r = 0.0
21 |
22 | left_yellow = np.sum(obs["left_team_yellow_card"]) - np.sum(prev_obs["left_team_yellow_card"])
23 | right_yellow = np.sum(obs["right_team_yellow_card"]) - np.sum(prev_obs["right_team_yellow_card"])
24 | yellow_r = right_yellow - left_yellow
25 |
26 |
27 | win_reward = 0.0
28 | if obs['steps_left'] == 0:
29 | [my_score, opponent_score] = obs['score']
30 | if my_score > opponent_score:
31 | win_reward = 1.0
32 |
33 | reward = 5.0*win_reward + 5.0*rew + 0.003*ball_position_r + yellow_r
34 |
35 |
36 | return reward
--------------------------------------------------------------------------------
/rewarders/rewarder_highpass.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def calc_reward(rew, prev_obs, obs):
4 | ball_x, ball_y, ball_z = obs['ball']
5 | MIDDLE_X, PENALTY_X, END_X = 0.2, 0.64, 1.0
6 | PENALTY_Y, END_Y = 0.27, 0.42
7 |
8 | ball_position_r = 0.0
9 | if (-END_X <= ball_x and ball_x < -PENALTY_X)and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
10 | ball_position_r = -1.0
11 | elif (-END_X <= ball_x and ball_x < -MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
12 | ball_position_r = -1.0
13 | elif (-MIDDLE_X <= ball_x and ball_x <= MIDDLE_X) and (-END_Y < ball_y and ball_y < END_Y):
14 | ball_position_r = 0.0
15 | elif (PENALTY_X < ball_x and ball_x <=END_X) and (-PENALTY_Y < ball_y and ball_y < PENALTY_Y):
16 | ball_position_r = 1.0
17 | elif (MIDDLE_X < ball_x and ball_x <=END_X) and (-END_Y < ball_y and ball_y < END_Y):
18 | ball_position_r = 1.0
19 | else:
20 | ball_position_r = 0.0
21 |
22 | left_yellow = np.sum(obs["left_team_yellow_card"]) - np.sum(prev_obs["left_team_yellow_card"])
23 | right_yellow = np.sum(obs["right_team_yellow_card"]) - np.sum(prev_obs["right_team_yellow_card"])
24 | yellow_r = right_yellow - left_yellow
25 |
26 |
27 | win_reward = 0.0
28 | if obs['steps_left'] == 0:
29 | [my_score, opponent_score] = obs['score']
30 | if my_score > opponent_score:
31 | win_reward = 1.0
32 |
33 | reward = 5.0*win_reward + 5.0*rew + 0.003*ball_position_r + yellow_r
34 |
35 |
36 | return reward
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import gfootball.env as football_env
2 | import time, pprint, json, os, importlib, shutil
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torch.optim as optim
8 | from torch.distributions import Categorical
9 | import torch.multiprocessing as mp
10 | from tensorboardX import SummaryWriter
11 |
12 | from actor import *
13 | from learner import *
14 | from evaluator import evaluator
15 | from datetime import datetime, timedelta
16 |
17 |
18 | def save_args(arg_dict):
19 | os.makedirs(arg_dict["log_dir"])
20 | args_info = json.dumps(arg_dict, indent=4)
21 | f = open(arg_dict["log_dir"]+"/args.json","w")
22 | f.write(args_info)
23 | f.close()
24 |
25 | def copy_models(dir_src, dir_dst): # src: source, dst: destination
26 | # retireve list of models
27 | l_cands = [f for f in os.listdir(dir_src) if os.path.isfile(os.path.join(dir_src, f)) and 'model_' in f]
28 | l_cands = sorted(l_cands, key=lambda x: int(x.split('_')[-1].split('.')[0]))
29 |
30 | print(f"models to be copied: {l_cands}")
31 | for m in l_cands:
32 | shutil.copyfile(os.path.join(dir_src, m), os.path.join(dir_dst, m))
33 | print(f"{len(l_cands)} models copied in the given directory")
34 |
35 | def main(arg_dict):
36 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
37 | cur_time = datetime.now() + timedelta(hours = 9)
38 | arg_dict["log_dir"] = "logs/" + cur_time.strftime("[%m-%d]%H.%M.%S")
39 | save_args(arg_dict)
40 | if arg_dict["trained_model_path"] and 'kaggle' in arg_dict['env']:
41 | copy_models(os.path.dirname(arg_dict['trained_model_path']), arg_dict['log_dir'])
42 |
43 | np.set_printoptions(precision=3)
44 | np.set_printoptions(suppress=True)
45 | pp = pprint.PrettyPrinter(indent=4)
46 | torch.set_num_threads(1)
47 |
48 | fe = importlib.import_module("encoders." + arg_dict["encoder"])
49 | fe = fe.FeatureEncoder()
50 | arg_dict["feature_dims"] = fe.get_feature_dims()
51 |
52 | model = importlib.import_module("models." + arg_dict["model"])
53 | cpu_device = torch.device('cpu')
54 | center_model = model.Model(arg_dict)
55 |
56 | if arg_dict["trained_model_path"]:
57 | checkpoint = torch.load(arg_dict["trained_model_path"], map_location=cpu_device)
58 | optimization_step = checkpoint['optimization_step']
59 | center_model.load_state_dict(checkpoint['model_state_dict'])
60 | center_model.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
61 | arg_dict["optimization_step"] = optimization_step
62 | print("Trained model", arg_dict["trained_model_path"] ,"suffessfully loaded")
63 | else:
64 | optimization_step = 0
65 |
66 | model_dict = {
67 | 'optimization_step': optimization_step,
68 | 'model_state_dict': center_model.state_dict(),
69 | 'optimizer_state_dict': center_model.optimizer.state_dict(),
70 | }
71 |
72 | path = arg_dict["log_dir"]+f"/model_{optimization_step}.tar"
73 | torch.save(model_dict, path)
74 |
75 | center_model.share_memory()
76 | data_queue = mp.Queue()
77 | signal_queue = mp.Queue()
78 | summary_queue = mp.Queue()
79 |
80 | processes = []
81 | p = mp.Process(target=learner, args=(center_model, data_queue, signal_queue, summary_queue, arg_dict))
82 | p.start()
83 | processes.append(p)
84 | for rank in range(arg_dict["num_processes"]):
85 | if arg_dict["env"] == "11_vs_11_kaggle":
86 | p = mp.Process(target=actor_self, args=(rank, center_model, data_queue, signal_queue, summary_queue, arg_dict))
87 | else:
88 | p = mp.Process(target=actor, args=(rank, center_model, data_queue, signal_queue, summary_queue, arg_dict))
89 | p.start()
90 | processes.append(p)
91 |
92 | if "env_evaluation" in arg_dict:
93 | p = mp.Process(target=evaluator, args=(center_model, signal_queue, summary_queue, arg_dict))
94 | p.start()
95 | processes.append(p)
96 |
97 | for p in processes:
98 | p.join()
99 |
100 |
101 | if __name__ == '__main__':
102 |
103 | arg_dict = {
104 | "env": "11_vs_11_kaggle",
105 | # "11_vs_11_kaggle" : environment used for self-play training
106 | # "11_vs_11_stochastic" : environment used for training against fixed opponent(rule-based AI)
107 | "num_processes": 30, # should be less than the number of cpu cores in your workstation.
108 | "batch_size": 32,
109 | "buffer_size": 6,
110 | "rollout_len": 30,
111 |
112 | "lstm_size" : 256,
113 | "k_epoch" : 3,
114 | "learning_rate" : 0.0001,
115 | "gamma" : 0.993,
116 | "lmbda" : 0.96,
117 | "entropy_coef" : 0.0001,
118 | "grad_clip" : 3.0,
119 | "eps_clip" : 0.1,
120 |
121 | "summary_game_window" : 10,
122 | "model_save_interval" : 300000, # number of gradient updates bewteen saving model
123 |
124 | "trained_model_path" : None, # use when you want to continue traning from given model.
125 | "latest_ratio" : 0.5, # works only for self_play training.
126 | "latest_n_model" : 10, # works only for self_play training.
127 | "print_mode" : False,
128 |
129 | "encoder" : "encoder_basic",
130 | "rewarder" : "rewarder_basic",
131 | "model" : "conv1d",
132 | "algorithm" : "ppo",
133 |
134 | "env_evaluation":'11_vs_11_hard_stochastic' # for evaluation of self-play trained agent (like validation set in Supervised Learning)
135 | }
136 |
137 | main(arg_dict)
138 |
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/view_match.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Collecting kaggle-environments\n",
13 | " Downloading kaggle_environments-1.3.14-py2.py3-none-any.whl (100 kB)\n",
14 | "\u001b[K |████████████████████████████████| 100 kB 4.0 MB/s ta 0:00:011\n",
15 | "\u001b[?25hRequirement already satisfied, skipping upgrade: jsonschema>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from kaggle-environments) (3.2.0)\n",
16 | "Requirement already satisfied, skipping upgrade: six>=1.11.0 in /usr/lib/python3/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (1.11.0)\n",
17 | "Requirement already satisfied, skipping upgrade: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (20.2.0)\n",
18 | "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (2.0.0)\n",
19 | "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (50.3.2)\n",
20 | "Requirement already satisfied, skipping upgrade: pyrsistent>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from jsonschema>=3.0.1->kaggle-environments) (0.17.3)\n",
21 | "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->jsonschema>=3.0.1->kaggle-environments) (3.3.1)\n",
22 | "Installing collected packages: kaggle-environments\n",
23 | "Successfully installed kaggle-environments-1.3.14\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "# !pip3 install kaggle-environments -U"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {
35 | "collapsed": true
36 | },
37 | "outputs": [
38 | {
39 | "name": "stdout",
40 | "output_type": "stream",
41 | "text": [
42 | "Staring a new environment 59f217e9-9c8a-4b1c-a052-6d20491863b4: with scenario: 11_vs_11_kaggle\n",
43 | "Resetting environment 59f217e9-9c8a-4b1c-a052-6d20491863b4: with scenario: 11_vs_11_kaggle\n"
44 | ]
45 | },
46 | {
47 | "ename": "KeyboardInterrupt",
48 | "evalue": "",
49 | "output_type": "error",
50 | "traceback": [
51 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
52 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
53 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"kaggle_simulations/agent/main.py\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"kaggle_simulations/agent/main.py\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"human\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m400\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
54 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, agents)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 230\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfiguration\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunTimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 231\u001b[0;31m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrunner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mact\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 232\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
55 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mact\u001b[0;34m(none_action)\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mact_agent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mact_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 635\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 636\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mact_agent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mact_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 637\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;31m# results is a list of tuples where the first element is an agent action and the second is the agent log\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
56 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/core.py\u001b[0m in \u001b[0;36mact_agent\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnone_action\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mact\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"observation\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
57 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/agent.py\u001b[0m in \u001b[0;36mact\u001b[0;34m(self, observation)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mperf_counter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprint_exc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
58 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/kaggle_environments/agent.py\u001b[0m in \u001b[0;36mcallable_agent\u001b[0;34m(observation, configuration)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0magent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 124\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
59 | "\u001b[0;32m\u001b[0m in \u001b[0;36magent\u001b[0;34m(obs)\u001b[0m\n",
60 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 724\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
61 | "\u001b[0;32m\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, state_dict)\u001b[0m\n",
62 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 723\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 724\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 725\u001b[0;31m self._forward_hooks.values()):\n\u001b[0m\u001b[1;32m 726\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
63 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
64 | ]
65 | }
66 | ],
67 | "source": [
68 | "from kaggle_environments import make\n",
69 | "env = make(\"football\", debug=True, configuration={\"save_video\": True, \"scenario_name\": \"11_vs_11_kaggle\", \n",
70 | " \"debug\":True,\"running_in_notebook\": True})\n",
71 | "\n",
72 | "\n",
73 | "env.run([\"kaggle_simulations/agent/main.py\", \"kaggle_simulations/agent/main.py\"])\n",
74 | "env.render(mode=\"human\", width=400, height=300)"
75 | ]
76 | }
77 | ],
78 | "metadata": {
79 | "kernelspec": {
80 | "display_name": "Python 3",
81 | "language": "python",
82 | "name": "python3"
83 | },
84 | "language_info": {
85 | "codemirror_mode": {
86 | "name": "ipython",
87 | "version": 3
88 | },
89 | "file_extension": ".py",
90 | "mimetype": "text/x-python",
91 | "name": "python",
92 | "nbconvert_exporter": "python",
93 | "pygments_lexer": "ipython3",
94 | "version": "3.7.0"
95 | }
96 | },
97 | "nbformat": 4,
98 | "nbformat_minor": 4
99 | }
100 |
--------------------------------------------------------------------------------