├── .gitignore ├── setup.py ├── gym_soccer ├── envs │ ├── __init__.py │ ├── soccer_against_keeper.py │ ├── soccer_empty_goal.py │ └── soccer_env.py └── __init__.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | .idea 4 | /logs 5 | *~ 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_soccer', 4 | version='0.0.1', 5 | install_requires=['gym>=0.2.3', 6 | 'hfo_py>=0.2'] 7 | ) 8 | -------------------------------------------------------------------------------- /gym_soccer/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_soccer.envs.soccer_env import SoccerEnv 2 | from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv 3 | from gym_soccer.envs.soccer_against_keeper import SoccerAgainstKeeperEnv 4 | -------------------------------------------------------------------------------- /gym_soccer/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from gym.envs.registration import register 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | register( 7 | id='Soccer-v0', 8 | entry_point='gym_soccer.envs:SoccerEnv', 9 | timestep_limit=1000, 10 | reward_threshold=1.0, 11 | nondeterministic = True, 12 | ) 13 | 14 | register( 15 | id='SoccerEmptyGoal-v0', 16 | entry_point='gym_soccer.envs:SoccerEmptyGoalEnv', 17 | timestep_limit=1000, 18 | reward_threshold=10.0, 19 | nondeterministic = True, 20 | ) 21 | 22 | register( 23 | id='SoccerAgainstKeeper-v0', 24 | entry_point='gym.envs:SoccerAgainstKeeperEnv', 25 | timestep_limit=1000, 26 | reward_threshold=8.0, 27 | nondeterministic = True, 28 | ) 29 | -------------------------------------------------------------------------------- /gym_soccer/envs/soccer_against_keeper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | class SoccerAgainstKeeperEnv(SoccerEmptyGoalEnv): 7 | """ 8 | SoccerAgainstKeeper initializes the agent most of the way down the 9 | field with the ball and tasks it with scoring on a keeper. 10 | 11 | Rewards in this task are the same as SoccerEmptyGoal: reward 12 | is given for kicking the ball close to the goal and extra reward is 13 | given for scoring a goal. 14 | 15 | """ 16 | def __init__(self): 17 | super(SoccerAgainstKeeperEnv, self).__init__() 18 | 19 | def _configure_environment(self): 20 | super(SoccerAgainstKeeperEnv, self)._start_hfo_server(defense_npcs=1, 21 | offense_on_ball=1, 22 | ball_x_min=0.6) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Status:** Archive (code is provided as-is, no updates expected) 2 | 3 | # gym-soccer 4 | 5 | The [Soccer environment](https://github.com/LARG/HFO) is a multiagent 6 | domain featuring continuous state and action spaces. Currently, 7 | several tasks are supported: 8 | 9 | ## Soccer 10 | 11 | The soccer task initializes a single offensive agent on the field and rewards +1 for scoring a goal and 0 otherwise. In order to score a goal, the agent will need to know how to approach the ball and kick towards the goal. The sparse nature of the goal reward makes this task very difficult to accomplish. 12 | 13 | ## SoccerEmptyGoal 14 | 15 | The SoccerEmptyGoal task features a more informative reward signal than the Soccer task. As before, the objective is to score a goal. However, SoccerEmtpyGoal rewards the agent for approaching the ball and moving the ball towards the goal. These frequent rewards make the task much more accessible. 16 | 17 | ## SoccerAgainstKeeper 18 | 19 | The objective of the SoccerAgainstKeeper task is to score against a goal keeper. The agent is rewarded for moving the ball towards the goal and for scoring a goal. The goal keeper uses a hand-coded policy developed by the Helios RoboCup team. The difficulty in this task is learning how to shoot around the goal keeper. 20 | 21 | # Installation 22 | 23 | ```bash 24 | cd gym-soccer 25 | pip install -e . 26 | ``` 27 | -------------------------------------------------------------------------------- /gym_soccer/envs/soccer_empty_goal.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | from gym_soccer.envs.soccer_env import SoccerEnv 4 | 5 | try: 6 | import hfo_py 7 | except ImportError as e: 8 | raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e)) 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class SoccerEmptyGoalEnv(SoccerEnv): 13 | """ 14 | SoccerEmptyGoal tasks the agent with approaching the ball, 15 | dribbling, and scoring a goal. Rewards are given as the agent nears 16 | the ball, kicks the ball towards the goal, and scores a goal. 17 | 18 | """ 19 | def __init__(self): 20 | super(SoccerEmptyGoalEnv, self).__init__() 21 | self.old_ball_prox = 0 22 | self.old_kickable = 0 23 | self.old_ball_dist_goal = 0 24 | self.got_kickable_reward = False 25 | self.first_step = True 26 | 27 | def _get_reward(self): 28 | """ 29 | Agent is rewarded for minimizing the distance between itself and 30 | the ball, minimizing the distance between the ball and the goal, 31 | and scoring a goal. 32 | """ 33 | current_state = self.env.getState() 34 | ball_proximity = current_state[53] 35 | goal_proximity = current_state[15] 36 | ball_dist = 1.0 - ball_proximity 37 | goal_dist = 1.0 - goal_proximity 38 | kickable = current_state[12] 39 | ball_ang_sin_rad = current_state[51] 40 | ball_ang_cos_rad = current_state[52] 41 | ball_ang_rad = math.acos(ball_ang_cos_rad) 42 | if ball_ang_sin_rad < 0: 43 | ball_ang_rad *= -1. 44 | goal_ang_sin_rad = current_state[13] 45 | goal_ang_cos_rad = current_state[14] 46 | goal_ang_rad = math.acos(goal_ang_cos_rad) 47 | if goal_ang_sin_rad < 0: 48 | goal_ang_rad *= -1. 49 | alpha = max(ball_ang_rad, goal_ang_rad) - min(ball_ang_rad, goal_ang_rad) 50 | ball_dist_goal = math.sqrt(ball_dist*ball_dist + goal_dist*goal_dist - 51 | 2.*ball_dist*goal_dist*math.cos(alpha)) 52 | # Compute the difference in ball proximity from the last step 53 | if not self.first_step: 54 | ball_prox_delta = ball_proximity - self.old_ball_prox 55 | kickable_delta = kickable - self.old_kickable 56 | ball_dist_goal_delta = ball_dist_goal - self.old_ball_dist_goal 57 | self.old_ball_prox = ball_proximity 58 | self.old_kickable = kickable 59 | self.old_ball_dist_goal = ball_dist_goal 60 | 61 | reward = 0 62 | if not self.first_step: 63 | # Reward the agent for moving towards the ball 64 | reward += ball_prox_delta 65 | if kickable_delta > 0 and not self.got_kickable_reward: 66 | reward += 1. 67 | self.got_kickable_reward = True 68 | # Reward the agent for kicking towards the goal 69 | reward += 0.6 * -ball_dist_goal_delta 70 | # Reward the agent for scoring 71 | if self.status == hfo_py.GOAL: 72 | reward += 5.0 73 | self.first_step = False 74 | return reward 75 | 76 | def _reset(self): 77 | self.old_ball_prox = 0 78 | self.old_kickable = 0 79 | self.old_ball_dist_goal = 0 80 | self.got_kickable_reward = False 81 | self.first_step = True 82 | return super(SoccerEmptyGoalEnv, self)._reset() 83 | -------------------------------------------------------------------------------- /gym_soccer/envs/soccer_env.py: -------------------------------------------------------------------------------- 1 | import os, subprocess, time, signal 2 | import gym 3 | from gym import error, spaces 4 | from gym import utils 5 | from gym.utils import seeding 6 | 7 | try: 8 | import hfo_py 9 | except ImportError as e: 10 | raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e)) 11 | 12 | import logging 13 | logger = logging.getLogger(__name__) 14 | 15 | class SoccerEnv(gym.Env, utils.EzPickle): 16 | metadata = {'render.modes': ['human']} 17 | 18 | def __init__(self): 19 | self.viewer = None 20 | self.server_process = None 21 | self.server_port = None 22 | self.hfo_path = hfo_py.get_hfo_path() 23 | self._configure_environment() 24 | self.env = hfo_py.HFOEnvironment() 25 | self.env.connectToServer(config_dir=hfo_py.get_config_path()) 26 | self.observation_space = spaces.Box(low=-1, high=1, 27 | shape=(self.env.getStateSize())) 28 | # Action space omits the Tackle/Catch actions, which are useful on defense 29 | self.action_space = spaces.Tuple((spaces.Discrete(3), 30 | spaces.Box(low=0, high=100, shape=1), 31 | spaces.Box(low=-180, high=180, shape=1), 32 | spaces.Box(low=-180, high=180, shape=1), 33 | spaces.Box(low=0, high=100, shape=1), 34 | spaces.Box(low=-180, high=180, shape=1))) 35 | self.status = hfo_py.IN_GAME 36 | 37 | def __del__(self): 38 | self.env.act(hfo_py.QUIT) 39 | self.env.step() 40 | os.kill(self.server_process.pid, signal.SIGINT) 41 | if self.viewer is not None: 42 | os.kill(self.viewer.pid, signal.SIGKILL) 43 | 44 | def _configure_environment(self): 45 | """ 46 | Provides a chance for subclasses to override this method and supply 47 | a different server configuration. By default, we initialize one 48 | offense agent against no defenders. 49 | """ 50 | self._start_hfo_server() 51 | 52 | def _start_hfo_server(self, frames_per_trial=500, 53 | untouched_time=100, offense_agents=1, 54 | defense_agents=0, offense_npcs=0, 55 | defense_npcs=0, sync_mode=True, port=6000, 56 | offense_on_ball=0, fullstate=True, seed=-1, 57 | ball_x_min=0.0, ball_x_max=0.2, 58 | verbose=False, log_game=False, 59 | log_dir="log"): 60 | """ 61 | Starts the Half-Field-Offense server. 62 | frames_per_trial: Episodes end after this many steps. 63 | untouched_time: Episodes end if the ball is untouched for this many steps. 64 | offense_agents: Number of user-controlled offensive players. 65 | defense_agents: Number of user-controlled defenders. 66 | offense_npcs: Number of offensive bots. 67 | defense_npcs: Number of defense bots. 68 | sync_mode: Disabling sync mode runs server in real time (SLOW!). 69 | port: Port to start the server on. 70 | offense_on_ball: Player to give the ball to at beginning of episode. 71 | fullstate: Enable noise-free perception. 72 | seed: Seed the starting positions of the players and ball. 73 | ball_x_[min/max]: Initialize the ball this far downfield: [0,1] 74 | verbose: Verbose server messages. 75 | log_game: Enable game logging. Logs can be used for replay + visualization. 76 | log_dir: Directory to place game logs (*.rcg). 77 | """ 78 | self.server_port = port 79 | cmd = self.hfo_path + \ 80 | " --headless --frames-per-trial %i --untouched-time %i --offense-agents %i"\ 81 | " --defense-agents %i --offense-npcs %i --defense-npcs %i"\ 82 | " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\ 83 | " --ball-x-max %f --log-dir %s"\ 84 | % (frames_per_trial, untouched_time, offense_agents, 85 | defense_agents, offense_npcs, defense_npcs, port, 86 | offense_on_ball, seed, ball_x_min, ball_x_max, 87 | log_dir) 88 | if not sync_mode: cmd += " --no-sync" 89 | if fullstate: cmd += " --fullstate" 90 | if verbose: cmd += " --verbose" 91 | if not log_game: cmd += " --no-logging" 92 | print('Starting server with command: %s' % cmd) 93 | self.server_process = subprocess.Popen(cmd.split(' '), shell=False) 94 | time.sleep(10) # Wait for server to startup before connecting a player 95 | 96 | def _start_viewer(self): 97 | """ 98 | Starts the SoccerWindow visualizer. Note the viewer may also be 99 | used with a *.rcg logfile to replay a game. See details at 100 | https://github.com/LARG/HFO/blob/master/doc/manual.pdf. 101 | """ 102 | cmd = hfo_py.get_viewer_path() +\ 103 | " --connect --port %d" % (self.server_port) 104 | self.viewer = subprocess.Popen(cmd.split(' '), shell=False) 105 | 106 | def _step(self, action): 107 | self._take_action(action) 108 | self.status = self.env.step() 109 | reward = self._get_reward() 110 | ob = self.env.getState() 111 | episode_over = self.status != hfo_py.IN_GAME 112 | return ob, reward, episode_over, {} 113 | 114 | def _take_action(self, action): 115 | """ Converts the action space into an HFO action. """ 116 | action_type = ACTION_LOOKUP[action[0]] 117 | if action_type == hfo_py.DASH: 118 | self.env.act(action_type, action[1], action[2]) 119 | elif action_type == hfo_py.TURN: 120 | self.env.act(action_type, action[3]) 121 | elif action_type == hfo_py.KICK: 122 | self.env.act(action_type, action[4], action[5]) 123 | else: 124 | print('Unrecognized action %d' % action_type) 125 | self.env.act(hfo_py.NOOP) 126 | 127 | def _get_reward(self): 128 | """ Reward is given for scoring a goal. """ 129 | if self.status == hfo_py.GOAL: 130 | return 1 131 | else: 132 | return 0 133 | 134 | def _reset(self): 135 | """ Repeats NO-OP action until a new episode begins. """ 136 | while self.status == hfo_py.IN_GAME: 137 | self.env.act(hfo_py.NOOP) 138 | self.status = self.env.step() 139 | while self.status != hfo_py.IN_GAME: 140 | self.env.act(hfo_py.NOOP) 141 | self.status = self.env.step() 142 | return self.env.getState() 143 | 144 | def _render(self, mode='human', close=False): 145 | """ Viewer only supports human mode currently. """ 146 | if close: 147 | if self.viewer is not None: 148 | os.kill(self.viewer.pid, signal.SIGKILL) 149 | else: 150 | if self.viewer is None: 151 | self._start_viewer() 152 | 153 | ACTION_LOOKUP = { 154 | 0 : hfo_py.DASH, 155 | 1 : hfo_py.TURN, 156 | 2 : hfo_py.KICK, 157 | 3 : hfo_py.TACKLE, # Used on defense to slide tackle the ball 158 | 4 : hfo_py.CATCH, # Used only by goalie to catch the ball 159 | } 160 | --------------------------------------------------------------------------------