├── .gitignore
├── setup.py
├── gym_soccer
    ├── envs
    │   ├── __init__.py
    │   ├── soccer_against_keeper.py
    │   ├── soccer_empty_goal.py
    │   └── soccer_env.py
    └── __init__.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | .idea
4 | /logs
5 | *~
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='gym_soccer',
4 |       version='0.0.1',
5 |       install_requires=['gym>=0.2.3',
6 |                         'hfo_py>=0.2']
7 | )
8 | 


--------------------------------------------------------------------------------
/gym_soccer/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_soccer.envs.soccer_env import SoccerEnv
2 | from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
3 | from gym_soccer.envs.soccer_against_keeper import SoccerAgainstKeeperEnv
4 | 


--------------------------------------------------------------------------------
/gym_soccer/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from gym.envs.registration import register
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | register(
 7 |     id='Soccer-v0',
 8 |     entry_point='gym_soccer.envs:SoccerEnv',
 9 |     timestep_limit=1000,
10 |     reward_threshold=1.0,
11 |     nondeterministic = True,
12 | )
13 | 
14 | register(
15 |     id='SoccerEmptyGoal-v0',
16 |     entry_point='gym_soccer.envs:SoccerEmptyGoalEnv',
17 |     timestep_limit=1000,
18 |     reward_threshold=10.0,
19 |     nondeterministic = True,
20 | )
21 | 
22 | register(
23 |     id='SoccerAgainstKeeper-v0',
24 |     entry_point='gym.envs:SoccerAgainstKeeperEnv',
25 |     timestep_limit=1000,
26 |     reward_threshold=8.0,
27 |     nondeterministic = True,
28 | )
29 | 


--------------------------------------------------------------------------------
/gym_soccer/envs/soccer_against_keeper.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | class SoccerAgainstKeeperEnv(SoccerEmptyGoalEnv):
 7 |     """
 8 |     SoccerAgainstKeeper initializes the agent most of the way down the
 9 |     field with the ball and tasks it with scoring on a keeper.
10 | 
11 |     Rewards in this task are the same as SoccerEmptyGoal: reward
12 |     is given for kicking the ball close to the goal and extra reward is
13 |     given for scoring a goal.
14 | 
15 |     """
16 |     def __init__(self):
17 |         super(SoccerAgainstKeeperEnv, self).__init__()
18 | 
19 |     def _configure_environment(self):
20 |         super(SoccerAgainstKeeperEnv, self)._start_hfo_server(defense_npcs=1,
21 |                                                               offense_on_ball=1,
22 |                                                               ball_x_min=0.6)
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **Status:** Archive (code is provided as-is, no updates expected)
 2 | 
 3 | # gym-soccer
 4 | 
 5 | The [Soccer environment](https://github.com/LARG/HFO) is a multiagent
 6 | domain featuring continuous state and action spaces. Currently,
 7 | several tasks are supported:
 8 | 
 9 | ## Soccer
10 | 
11 | The soccer task initializes a single offensive agent on the field and rewards +1 for scoring a goal and 0 otherwise. In order to score a goal, the agent will need to know how to approach the ball and kick towards the goal. The sparse nature of the goal reward makes this task very difficult to accomplish.
12 | 
13 | ## SoccerEmptyGoal
14 | 
15 | The SoccerEmptyGoal task features a more informative reward signal than the Soccer task. As before, the objective is to score a goal. However, SoccerEmtpyGoal rewards the agent for approaching the ball and moving the ball towards the goal. These frequent rewards make the task much more accessible.
16 | 
17 | ## SoccerAgainstKeeper
18 | 
19 | The objective of the SoccerAgainstKeeper task is to score against a goal keeper. The agent is rewarded for moving the ball towards the goal and for scoring a goal. The goal keeper uses a hand-coded policy developed by the Helios RoboCup team. The difficulty in this task is learning how to shoot around the goal keeper.
20 | 
21 | # Installation
22 | 
23 | ```bash
24 | cd gym-soccer
25 | pip install -e .
26 | ```
27 | 


--------------------------------------------------------------------------------
/gym_soccer/envs/soccer_empty_goal.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import math
 3 | from gym_soccer.envs.soccer_env import SoccerEnv
 4 | 
 5 | try:
 6 |     import hfo_py
 7 | except ImportError as e:
 8 |     raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e))
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | class SoccerEmptyGoalEnv(SoccerEnv):
13 |     """
14 |     SoccerEmptyGoal tasks the agent with approaching the ball,
15 |     dribbling, and scoring a goal. Rewards are given as the agent nears
16 |     the ball, kicks the ball towards the goal, and scores a goal.
17 | 
18 |     """
19 |     def __init__(self):
20 |         super(SoccerEmptyGoalEnv, self).__init__()
21 |         self.old_ball_prox = 0
22 |         self.old_kickable = 0
23 |         self.old_ball_dist_goal = 0
24 |         self.got_kickable_reward = False
25 |         self.first_step = True
26 | 
27 |     def _get_reward(self):
28 |         """
29 |         Agent is rewarded for minimizing the distance between itself and
30 |         the ball, minimizing the distance between the ball and the goal,
31 |         and scoring a goal.
32 |         """
33 |         current_state = self.env.getState()
34 |         ball_proximity = current_state[53]
35 |         goal_proximity = current_state[15]
36 |         ball_dist = 1.0 - ball_proximity
37 |         goal_dist = 1.0 - goal_proximity
38 |         kickable = current_state[12]
39 |         ball_ang_sin_rad = current_state[51]
40 |         ball_ang_cos_rad = current_state[52]
41 |         ball_ang_rad = math.acos(ball_ang_cos_rad)
42 |         if ball_ang_sin_rad < 0:
43 |             ball_ang_rad *= -1.
44 |         goal_ang_sin_rad = current_state[13]
45 |         goal_ang_cos_rad = current_state[14]
46 |         goal_ang_rad = math.acos(goal_ang_cos_rad)
47 |         if goal_ang_sin_rad < 0:
48 |             goal_ang_rad *= -1.
49 |         alpha = max(ball_ang_rad, goal_ang_rad) - min(ball_ang_rad, goal_ang_rad)
50 |         ball_dist_goal = math.sqrt(ball_dist*ball_dist + goal_dist*goal_dist -
51 |                                    2.*ball_dist*goal_dist*math.cos(alpha))
52 |         # Compute the difference in ball proximity from the last step
53 |         if not self.first_step:
54 |             ball_prox_delta = ball_proximity - self.old_ball_prox
55 |             kickable_delta = kickable - self.old_kickable
56 |             ball_dist_goal_delta = ball_dist_goal - self.old_ball_dist_goal
57 |         self.old_ball_prox = ball_proximity
58 |         self.old_kickable = kickable
59 |         self.old_ball_dist_goal = ball_dist_goal
60 | 
61 |         reward = 0
62 |         if not self.first_step:
63 |             # Reward the agent for moving towards the ball
64 |             reward += ball_prox_delta
65 |             if kickable_delta > 0 and not self.got_kickable_reward:
66 |                 reward += 1.
67 |                 self.got_kickable_reward = True
68 |             # Reward the agent for kicking towards the goal
69 |             reward += 0.6 * -ball_dist_goal_delta
70 |             # Reward the agent for scoring
71 |             if self.status == hfo_py.GOAL:
72 |                 reward += 5.0
73 |         self.first_step = False
74 |         return reward
75 | 
76 |     def _reset(self):
77 |         self.old_ball_prox = 0
78 |         self.old_kickable = 0
79 |         self.old_ball_dist_goal = 0
80 |         self.got_kickable_reward = False
81 |         self.first_step = True
82 |         return super(SoccerEmptyGoalEnv, self)._reset()
83 | 


--------------------------------------------------------------------------------
/gym_soccer/envs/soccer_env.py:
--------------------------------------------------------------------------------
  1 | import os, subprocess, time, signal
  2 | import gym
  3 | from gym import error, spaces
  4 | from gym import utils
  5 | from gym.utils import seeding
  6 | 
  7 | try:
  8 |     import hfo_py
  9 | except ImportError as e:
 10 |     raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e))
 11 | 
 12 | import logging
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | class SoccerEnv(gym.Env, utils.EzPickle):
 16 |     metadata = {'render.modes': ['human']}
 17 | 
 18 |     def __init__(self):
 19 |         self.viewer = None
 20 |         self.server_process = None
 21 |         self.server_port = None
 22 |         self.hfo_path = hfo_py.get_hfo_path()
 23 |         self._configure_environment()
 24 |         self.env = hfo_py.HFOEnvironment()
 25 |         self.env.connectToServer(config_dir=hfo_py.get_config_path())
 26 |         self.observation_space = spaces.Box(low=-1, high=1,
 27 |                                             shape=(self.env.getStateSize()))
 28 |         # Action space omits the Tackle/Catch actions, which are useful on defense
 29 |         self.action_space = spaces.Tuple((spaces.Discrete(3),
 30 |                                           spaces.Box(low=0, high=100, shape=1),
 31 |                                           spaces.Box(low=-180, high=180, shape=1),
 32 |                                           spaces.Box(low=-180, high=180, shape=1),
 33 |                                           spaces.Box(low=0, high=100, shape=1),
 34 |                                           spaces.Box(low=-180, high=180, shape=1)))
 35 |         self.status = hfo_py.IN_GAME
 36 | 
 37 |     def __del__(self):
 38 |         self.env.act(hfo_py.QUIT)
 39 |         self.env.step()
 40 |         os.kill(self.server_process.pid, signal.SIGINT)
 41 |         if self.viewer is not None:
 42 |             os.kill(self.viewer.pid, signal.SIGKILL)
 43 | 
 44 |     def _configure_environment(self):
 45 |         """
 46 |         Provides a chance for subclasses to override this method and supply
 47 |         a different server configuration. By default, we initialize one
 48 |         offense agent against no defenders.
 49 |         """
 50 |         self._start_hfo_server()
 51 | 
 52 |     def _start_hfo_server(self, frames_per_trial=500,
 53 |                           untouched_time=100, offense_agents=1,
 54 |                           defense_agents=0, offense_npcs=0,
 55 |                           defense_npcs=0, sync_mode=True, port=6000,
 56 |                           offense_on_ball=0, fullstate=True, seed=-1,
 57 |                           ball_x_min=0.0, ball_x_max=0.2,
 58 |                           verbose=False, log_game=False,
 59 |                           log_dir="log"):
 60 |         """
 61 |         Starts the Half-Field-Offense server.
 62 |         frames_per_trial: Episodes end after this many steps.
 63 |         untouched_time: Episodes end if the ball is untouched for this many steps.
 64 |         offense_agents: Number of user-controlled offensive players.
 65 |         defense_agents: Number of user-controlled defenders.
 66 |         offense_npcs: Number of offensive bots.
 67 |         defense_npcs: Number of defense bots.
 68 |         sync_mode: Disabling sync mode runs server in real time (SLOW!).
 69 |         port: Port to start the server on.
 70 |         offense_on_ball: Player to give the ball to at beginning of episode.
 71 |         fullstate: Enable noise-free perception.
 72 |         seed: Seed the starting positions of the players and ball.
 73 |         ball_x_[min/max]: Initialize the ball this far downfield: [0,1]
 74 |         verbose: Verbose server messages.
 75 |         log_game: Enable game logging. Logs can be used for replay + visualization.
 76 |         log_dir: Directory to place game logs (*.rcg).
 77 |         """
 78 |         self.server_port = port
 79 |         cmd = self.hfo_path + \
 80 |               " --headless --frames-per-trial %i --untouched-time %i --offense-agents %i"\
 81 |               " --defense-agents %i --offense-npcs %i --defense-npcs %i"\
 82 |               " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\
 83 |               " --ball-x-max %f --log-dir %s"\
 84 |               % (frames_per_trial, untouched_time, offense_agents,
 85 |                  defense_agents, offense_npcs, defense_npcs, port,
 86 |                  offense_on_ball, seed, ball_x_min, ball_x_max,
 87 |                  log_dir)
 88 |         if not sync_mode: cmd += " --no-sync"
 89 |         if fullstate:     cmd += " --fullstate"
 90 |         if verbose:       cmd += " --verbose"
 91 |         if not log_game:  cmd += " --no-logging"
 92 |         print('Starting server with command: %s' % cmd)
 93 |         self.server_process = subprocess.Popen(cmd.split(' '), shell=False)
 94 |         time.sleep(10) # Wait for server to startup before connecting a player
 95 | 
 96 |     def _start_viewer(self):
 97 |         """
 98 |         Starts the SoccerWindow visualizer. Note the viewer may also be
 99 |         used with a *.rcg logfile to replay a game. See details at
100 |         https://github.com/LARG/HFO/blob/master/doc/manual.pdf.
101 |         """
102 |         cmd = hfo_py.get_viewer_path() +\
103 |               " --connect --port %d" % (self.server_port)
104 |         self.viewer = subprocess.Popen(cmd.split(' '), shell=False)
105 | 
106 |     def _step(self, action):
107 |         self._take_action(action)
108 |         self.status = self.env.step()
109 |         reward = self._get_reward()
110 |         ob = self.env.getState()
111 |         episode_over = self.status != hfo_py.IN_GAME
112 |         return ob, reward, episode_over, {}
113 | 
114 |     def _take_action(self, action):
115 |         """ Converts the action space into an HFO action. """
116 |         action_type = ACTION_LOOKUP[action[0]]
117 |         if action_type == hfo_py.DASH:
118 |             self.env.act(action_type, action[1], action[2])
119 |         elif action_type == hfo_py.TURN:
120 |             self.env.act(action_type, action[3])
121 |         elif action_type == hfo_py.KICK:
122 |             self.env.act(action_type, action[4], action[5])
123 |         else:
124 |             print('Unrecognized action %d' % action_type)
125 |             self.env.act(hfo_py.NOOP)
126 | 
127 |     def _get_reward(self):
128 |         """ Reward is given for scoring a goal. """
129 |         if self.status == hfo_py.GOAL:
130 |             return 1
131 |         else:
132 |             return 0
133 | 
134 |     def _reset(self):
135 |         """ Repeats NO-OP action until a new episode begins. """
136 |         while self.status == hfo_py.IN_GAME:
137 |             self.env.act(hfo_py.NOOP)
138 |             self.status = self.env.step()
139 |         while self.status != hfo_py.IN_GAME:
140 |             self.env.act(hfo_py.NOOP)
141 |             self.status = self.env.step()
142 |         return self.env.getState()
143 | 
144 |     def _render(self, mode='human', close=False):
145 |         """ Viewer only supports human mode currently. """
146 |         if close:
147 |             if self.viewer is not None:
148 |                 os.kill(self.viewer.pid, signal.SIGKILL)
149 |         else:
150 |             if self.viewer is None:
151 |                 self._start_viewer()
152 | 
153 | ACTION_LOOKUP = {
154 |     0 : hfo_py.DASH,
155 |     1 : hfo_py.TURN,
156 |     2 : hfo_py.KICK,
157 |     3 : hfo_py.TACKLE, # Used on defense to slide tackle the ball
158 |     4 : hfo_py.CATCH,  # Used only by goalie to catch the ball
159 | }
160 | 


--------------------------------------------------------------------------------